Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions core/api/core.api
Original file line number Diff line number Diff line change
Expand Up @@ -3547,8 +3547,12 @@ public final class org/jetbrains/kotlinx/dataframe/api/ParseKt {
public static synthetic fun parse$default (Lorg/jetbrains/kotlinx/dataframe/DataFrame;[Lorg/jetbrains/kotlinx/dataframe/columns/ColumnReference;Lorg/jetbrains/kotlinx/dataframe/api/ParserOptions;ILjava/lang/Object;)Lorg/jetbrains/kotlinx/dataframe/DataFrame;
public static final fun parseAnyFrameNullable (Lorg/jetbrains/kotlinx/dataframe/DataColumn;Lorg/jetbrains/kotlinx/dataframe/api/ParserOptions;)Lorg/jetbrains/kotlinx/dataframe/DataColumn;
public static synthetic fun parseAnyFrameNullable$default (Lorg/jetbrains/kotlinx/dataframe/DataColumn;Lorg/jetbrains/kotlinx/dataframe/api/ParserOptions;ILjava/lang/Object;)Lorg/jetbrains/kotlinx/dataframe/DataColumn;
public static final fun parseChar (Lorg/jetbrains/kotlinx/dataframe/DataColumn;Lorg/jetbrains/kotlinx/dataframe/api/ParserOptions;)Lorg/jetbrains/kotlinx/dataframe/DataColumn;
public static synthetic fun parseChar$default (Lorg/jetbrains/kotlinx/dataframe/DataColumn;Lorg/jetbrains/kotlinx/dataframe/api/ParserOptions;ILjava/lang/Object;)Lorg/jetbrains/kotlinx/dataframe/DataColumn;
public static final fun tryParse (Lorg/jetbrains/kotlinx/dataframe/DataColumn;Lorg/jetbrains/kotlinx/dataframe/api/ParserOptions;)Lorg/jetbrains/kotlinx/dataframe/DataColumn;
public static synthetic fun tryParse$default (Lorg/jetbrains/kotlinx/dataframe/DataColumn;Lorg/jetbrains/kotlinx/dataframe/api/ParserOptions;ILjava/lang/Object;)Lorg/jetbrains/kotlinx/dataframe/DataColumn;
public static final fun tryParseChar (Lorg/jetbrains/kotlinx/dataframe/DataColumn;Lorg/jetbrains/kotlinx/dataframe/api/ParserOptions;)Lorg/jetbrains/kotlinx/dataframe/DataColumn;
public static synthetic fun tryParseChar$default (Lorg/jetbrains/kotlinx/dataframe/DataColumn;Lorg/jetbrains/kotlinx/dataframe/api/ParserOptions;ILjava/lang/Object;)Lorg/jetbrains/kotlinx/dataframe/DataColumn;
}

public final class org/jetbrains/kotlinx/dataframe/api/ParserOptions {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ import java.time.format.DateTimeFormatter
import java.util.Locale
import kotlin.reflect.KProperty
import kotlin.reflect.KType
import kotlin.reflect.typeOf
import kotlin.uuid.ExperimentalUuidApi
import kotlin.uuid.Uuid

Expand Down Expand Up @@ -312,6 +313,28 @@ public class ParserOptions(
* @return a new column with parsed values */
public fun DataColumn<String?>.tryParse(options: ParserOptions? = null): DataColumn<*> = tryParseImpl(options)

/**
* Tries to parse a column of chars into a column of a different type.
* Each parser in [Parsers] is run in order until a valid parser is found,
* a.k.a. that parser was able to parse all values in the column successfully. If a parser
* fails to parse any value, the next parser is tried. If all the others fail, the final parser
* returns strings.
*
* Parsers that are [covered by][StringParser.coveredBy] other parsers are skipped.
*
* @param options options for parsing, like providing a locale or a custom date-time formatter
* @throws IllegalStateException if no valid parser is found (unlikely, unless the `String` parser is disabled)
* @return a new column with parsed values
*/
@JvmName("tryParseChar")
public fun DataColumn<Char?>.tryParse(options: ParserOptions? = null): DataColumn<*> {
// skip the Char parser, as we're trying to parse away from Char
val providedSkipTypes = options?.skipTypes ?: DataFrame.parser.skipTypes
val parserOptions = (options ?: ParserOptions()).copy(skipTypes = providedSkipTypes + typeOf<Char>())

return map { it?.toString() }.tryParse(parserOptions)
}

public fun <T> DataFrame<T>.parse(options: ParserOptions? = null): DataFrame<T> =
parse(options) {
colsAtAnyDepth().filter { !it.isColumnGroup() }
Expand All @@ -335,6 +358,23 @@ public fun <T> DataFrame<T>.parse(options: ParserOptions? = null): DataFrame<T>
public fun DataColumn<String?>.parse(options: ParserOptions? = null): DataColumn<*> =
tryParse(options).also { if (it.typeClass == String::class) error("Can't guess column type") }

/**
* Tries to parse a column of chars as strings into a column of a different type.
* Each parser in [Parsers] is run in order until a valid parser is found,
* a.k.a. that parser was able to parse all values in the column successfully. If a parser
* fails to parse any value, the next parser is tried.
*
* If all fail, the column is returned as `String`, this can never fail.
*
* Parsers that are [covered by][StringParser.coveredBy] other parsers are skipped.
*
* @param options options for parsing, like providing a locale or a custom date-time formatter
* @return a new column with parsed values
*/
@JvmName("parseChar")
public fun DataColumn<Char?>.parse(options: ParserOptions? = null): DataColumn<*> =
tryParse(options) // no need to throw an exception, as Char can always be parsed as String

@JvmName("parseAnyFrameNullable")
public fun DataColumn<AnyFrame?>.parse(options: ParserOptions? = null): DataColumn<AnyFrame?> =
map { it?.parse(options) }
Original file line number Diff line number Diff line change
Expand Up @@ -369,7 +369,13 @@ internal fun createConverter(from: KType, to: KType, options: ParserOptions? = n

Char::class -> when (toClass) {
Int::class -> convert<Char> { it.code }
else -> null

else -> // convert char to string and then to target type
getConverter(typeOf<String>(), to, options)?.let { stringConverter ->
convert<Char> {
stringConverter(it.toString())
}
}
}

Int::class -> when (toClass) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -716,29 +716,24 @@ internal fun <T> DataFrame<T>.parseImpl(options: ParserOptions?, columns: Column
when {
// when a frame column is requested to be parsed,
// parse each value/frame column at any depth inside each DataFrame in the frame column
col.isFrameColumn() -> {
col.isFrameColumn() ->
col.map {
it.parseImpl(options) {
colsAtAnyDepth().filter { !it.isColumnGroup() }
}
}
}

// when a column group is requested to be parsed,
// parse each column in the group
col.isColumnGroup() -> {
col.isColumnGroup() ->
col.parseImpl(options) { all() }
.asColumnGroup(col.name())
.asDataColumn()
}

// Base case, parse the column if it's a `String?` column
col.isSubtypeOf<String?>() -> {
col.isSubtypeOf<String?>() ->
col.cast<String?>().tryParseImpl(options)
}

else -> {
col
}
else -> col
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ package org.jetbrains.kotlinx.dataframe.api
import io.kotest.assertions.throwables.shouldNotThrow
import io.kotest.assertions.throwables.shouldThrow
import io.kotest.matchers.shouldBe
import io.kotest.matchers.shouldNotBe
import kotlinx.datetime.Clock
import kotlinx.datetime.Instant
import kotlinx.datetime.LocalTime
Expand Down Expand Up @@ -69,6 +70,20 @@ class ConvertTests {
@Test
fun `convert string to enum`() {
columnOf("A", "B").convertTo<EnumClass>() shouldBe columnOf(EnumClass.A, EnumClass.B)

dataFrameOf(columnOf("A", "B") named "colA")
.convert("colA").to<EnumClass>()
.getColumn("colA") shouldBe columnOf(EnumClass.A, EnumClass.B).named("colA")
}

@Test
fun `convert char to enum`() {
// Char -> String -> Enum
columnOf('A', 'B').convertTo<EnumClass>() shouldBe columnOf(EnumClass.A, EnumClass.B)

dataFrameOf(columnOf('A', 'B') named "colA")
.convert("colA").to<EnumClass>()
.getColumn("colA") shouldBe columnOf(EnumClass.A, EnumClass.B).named("colA")
}

@JvmInline
Expand Down Expand Up @@ -199,6 +214,15 @@ class ConvertTests {
val col = columnOf(65, 66)
col.convertTo<Char>() shouldBe columnOf('A', 'B')
col.convertTo<Char>().convertTo<Int>() shouldBe col

// this means
columnOf('1', '2').convertToInt() shouldNotBe columnOf(1, 2)
columnOf('1', '2').convertToInt() shouldBe columnOf(49, 50)

// but
columnOf('1', '2').convertToString().convertToInt() shouldBe columnOf(1, 2)
// or
columnOf('1', '2').parse() shouldBe columnOf(1, 2)
}

@Test
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,22 @@ import kotlin.time.Instant as StdlibInstant
import kotlinx.datetime.Instant as DeprecatedInstant

class ParseTests {

@Test
fun `parse to chars`() {
val char = columnOf('a', 'b', 'c')
char.parse() shouldBe char
char.tryParse() shouldBe char
char.convertToString().parse() shouldBe char
}

@Test
fun `parse chars to int`() {
val char = columnOf('1', '2', '3')
char.parse() shouldBe columnOf(1, 2, 3)
char.tryParse() shouldBe columnOf(1, 2, 3)
}

@Test
fun parseDate() {
val currentLocale = Locale.getDefault()
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,12 @@ class ParserTests {
DataFrame.parser.resetToDefault()
}

@Test
fun `parse to Char`() {
val col by columnOf("a", "b")
col.parse().type() shouldBe typeOf<Char>()
}

@Test(expected = IllegalStateException::class)
fun `parse should throw`() {
val col by columnOf("a", "bc")
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,7 @@ public class ConverterScope(public val fromType: KType, public val toSchema: Col
* df.convertTo<SomeSchema> {
* // defines how to convert Int? -> String
* convert<Int?>().with { it?.toString() ?: "No input given" }
* // defines how to convert String -> SomeType
* // defines how to convert String/Char -> SomeType
* parser { SomeType(it) }
* // fill missing column `sum` with expression `a+b`
* fill { sum }.with { a + b }
Expand Down Expand Up @@ -102,6 +102,10 @@ public fun <T, C> ConvertToFill<T, C>.with(expr: RowExpression<T, C>) {

/**
* Defines how to convert `String` values into given type [C].
*
* This method is a shortcut for `convert<String>().with { }`.
*
* If no converter is defined for `Char` values, this converter will be used for them as well.
*/
public inline fun <reified C> ConvertSchemaDsl<*>.parser(noinline parser: (String) -> C): Unit =
convert<String>().with(parser)
Expand Down
40 changes: 38 additions & 2 deletions core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/parse.kt
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,6 @@ import org.jetbrains.kotlinx.dataframe.impl.api.StringParser
import org.jetbrains.kotlinx.dataframe.impl.api.parseImpl
import org.jetbrains.kotlinx.dataframe.impl.api.tryParseImpl
import org.jetbrains.kotlinx.dataframe.impl.io.FastDoubleParser
import org.jetbrains.kotlinx.dataframe.typeClass
import org.jetbrains.kotlinx.dataframe.util.DEPRECATED_ACCESS_API
import org.jetbrains.kotlinx.dataframe.util.PARSER_OPTIONS
import org.jetbrains.kotlinx.dataframe.util.PARSER_OPTIONS_COPY
Expand Down Expand Up @@ -302,6 +301,23 @@ public class ParserOptions(
/** @include [tryParseImpl] */
public fun DataColumn<String?>.tryParse(options: ParserOptions? = null): DataColumn<*> = tryParseImpl(options)

/**
* Tries to parse a column of chars into a column of a different type.
* Each parser in [Parsers] is run in order until a valid parser is found,
* a.k.a. that parser was able to parse all values in the column successfully. If a parser
* fails to parse any value, the next parser is tried. If all the others fail, the final parser
* returns strings.
*
* Parsers that are [covered by][StringParser.coveredBy] other parsers are skipped.
*
* @param options options for parsing, like providing a locale or a custom date-time formatter
* @throws IllegalStateException if no valid parser is found (unlikely, unless the `String` parser is disabled)
* @return a new column with parsed values
*/
@JvmName("tryParseChar")
public fun DataColumn<Char?>.tryParse(options: ParserOptions? = null): DataColumn<*> =
map { it?.toString() }.tryParseImpl(options)

public fun <T> DataFrame<T>.parse(options: ParserOptions? = null): DataFrame<T> =
parse(options) {
colsAtAnyDepth().filter { !it.isColumnGroup() }
Expand All @@ -323,7 +339,27 @@ public fun <T> DataFrame<T>.parse(options: ParserOptions? = null): DataFrame<T>
* @return a new column with parsed values
*/
public fun DataColumn<String?>.parse(options: ParserOptions? = null): DataColumn<*> =
tryParse(options).also { if (it.typeClass == String::class) error("Can't guess column type") }
tryParse(options).also { if (it.isSubtypeOf<String?>()) error("Can't guess column type") }

/**
* Tries to parse a column of chars as strings into a column of a different type.
* Each parser in [Parsers] is run in order until a valid parser is found,
* a.k.a. that parser was able to parse all values in the column successfully. If a parser
* fails to parse any value, the next parser is tried.
*
* If all fail [IllegalStateException] is thrown. If you don't want this exception to be thrown,
* use [tryParse] instead.
*
* Parsers that are [covered by][StringParser.coveredBy] other parsers are skipped.
*
* @param options options for parsing, like providing a locale or a custom date-time formatter
* @return a new column with parsed values
*/
@JvmName("parseChar")
public fun DataColumn<Char?>.parse(options: ParserOptions? = null): DataColumn<*> =
map { it?.toString() }
.tryParse(options)
.also { if (it.isSubtypeOf<Char?>() || it.isSubtypeOf<String?>()) error("Can't guess column type") }

@JvmName("parseAnyFrameNullable")
public fun DataColumn<AnyFrame?>.parse(options: ParserOptions? = null): DataColumn<AnyFrame?> =
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -230,6 +230,8 @@ internal fun getConverter(from: KType, to: KType, options: ParserOptions? = null

internal typealias TypeConverter = (Any) -> Any?

private val TypeConverterIdentity: TypeConverter = { it }

internal fun Any.convertTo(type: KType): Any? {
val clazz = javaClass.kotlin
if (clazz.isSubclassOf(type.jvmErasure)) return this
Expand All @@ -242,6 +244,7 @@ internal inline fun <T> convert(crossinline converter: (T) -> Any?): TypeConvert

private enum class DummyEnum

@Suppress("UNCHECKED_CAST")
internal fun createConverter(from: KType, to: KType, options: ParserOptions? = null): TypeConverter? {
if (from.arguments.isNotEmpty() || to.arguments.isNotEmpty()) return null
if (from.isMarkedNullable) {
Expand All @@ -250,25 +253,24 @@ internal fun createConverter(from: KType, to: KType, options: ParserOptions? = n
}
val fromClass = from.jvmErasure
val toClass = to.jvmErasure
return when {
fromClass == toClass -> TypeConverterIdentity

if (fromClass == toClass) return { it }

if (toClass.isValue) {
val constructor =
toClass.primaryConstructor ?: error("Value type $toClass doesn't have primary constructor")
val underlyingType = constructor.parameters.single().type
val converter = getConverter(from, underlyingType)
?: throw TypeConverterNotFoundException(from, underlyingType, null)
return convert<Any> {
val converted = converter(it)
if (converted == null && !underlyingType.isMarkedNullable) {
throw TypeConversionException(it, from, underlyingType, null)
toClass.isValue -> {
val constructor =
toClass.primaryConstructor ?: error("Value type $toClass doesn't have primary constructor")
val underlyingType = constructor.parameters.single().type
val converter = getConverter(from, underlyingType)
?: throw TypeConverterNotFoundException(from, underlyingType, null)
return convert<Any> {
val converted = converter(it)
if (converted == null && !underlyingType.isMarkedNullable) {
throw TypeConversionException(it, from, underlyingType, null)
}
constructor.call(converted)
}
constructor.call(converted)
}
}

return when {
fromClass == String::class -> {
val parser = Parsers[to.withNullability(false)]
when {
Expand Down Expand Up @@ -369,7 +371,13 @@ internal fun createConverter(from: KType, to: KType, options: ParserOptions? = n

Char::class -> when (toClass) {
Int::class -> convert<Char> { it.code }
else -> null

else -> // convert char to string and then to target type
getConverter(typeOf<String>(), to, options)?.let { stringConverter ->
convert<Char> {
stringConverter(it.toString())
}
}
}

Int::class -> when (toClass) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -45,8 +45,10 @@ import org.jetbrains.kotlinx.dataframe.schema.ColumnSchema
import org.jetbrains.kotlinx.dataframe.schema.DataFrameSchema
import org.jetbrains.kotlinx.dataframe.size
import kotlin.reflect.KType
import kotlin.reflect.full.isSubtypeOf
import kotlin.reflect.full.withNullability
import kotlin.reflect.jvm.jvmErasure
import kotlin.reflect.typeOf

private val logger = KotlinLogging.logger {}

Expand Down Expand Up @@ -144,6 +146,25 @@ internal fun AnyFrame.convertToImpl(
val from = originalColumn.type()
val to = targetSchema.type
val converter = dsl.getConverter(from, targetSchema)
?: run {
// Special case for Char columns:
// If there is no explicit Char converter,
// check if we have any converters for String -> target
// if so, we can convert Char -> String -> target
// this allows `parser {}` to work both for Strings and Chars :)

if (!from.isSubtypeOf(typeOf<Char?>())) return@run null

val stringConverter = dsl.getConverter(
fromType = typeOf<String>().withNullability(from.isMarkedNullable),
toSchema = targetSchema,
) ?: return@run null

Converter(
transform = { stringConverter.transform(this, (it as Char?)?.toString()) },
skipNulls = stringConverter.skipNulls,
)
}

val convertedColumn = if (converter != null) {
val nullsAllowed = to.isMarkedNullable
Expand Down
Loading
Loading