Skip to content

[SPARK-51419][SQL] Get hours of TIME datatype #50355

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 10 commits into from
Original file line number Diff line number Diff line change
Expand Up @@ -640,7 +640,7 @@ object FunctionRegistry {
expression[DayOfMonth]("dayofmonth"),
expression[FromUnixTime]("from_unixtime"),
expression[FromUTCTimestamp]("from_utc_timestamp"),
expression[Hour]("hour"),
expressionBuilder("hour", HourExpressionBuilder),
expression[LastDay]("last_day"),
expressionBuilder("minute", MinuteExpressionBuilder),
expression[Month]("month"),
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -400,15 +400,6 @@ trait GetTimeField extends UnaryExpression
}
}

@ExpressionDescription(
usage = "_FUNC_(timestamp) - Returns the hour component of the string/timestamp.",
examples = """
Examples:
> SELECT _FUNC_('2009-07-30 12:58:59');
12
""",
group = "datetime_funcs",
since = "1.5.0")
case class Hour(child: Expression, timeZoneId: Option[String] = None) extends GetTimeField {
def this(child: Expression) = this(child, None)
override def withTimeZone(timeZoneId: String): Hour = copy(timeZoneId = Option(timeZoneId))
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -233,3 +233,60 @@ object MinuteExpressionBuilder extends ExpressionBuilder {
}
}
}

case class HoursOfTime(child: Expression)
extends RuntimeReplaceable
with ExpectsInputTypes {

override def replacement: Expression = StaticInvoke(
classOf[DateTimeUtils.type],
IntegerType,
"getHoursOfTime",
Seq(child),
Seq(child.dataType)
)

override def inputTypes: Seq[AbstractDataType] = Seq(TimeType())
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@senthh Could you open a follow up PR and allow any valid precision of the TIME type other it fails now with the error:

spark-sql (default)> select hour(cast('12:30' as time(0)));
[DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE] Cannot resolve "hour(CAST(12:30 AS TIME(0)))" due to data type mismatch: The first parameter requires the "TIME(6)" type, however "CAST(12:30 AS TIME(0))" has the type "TIME(0)". SQLSTATE: 42K09; line 1 pos 7;

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@MaxGekk Sure Sure


override def children: Seq[Expression] = Seq(child)

override def prettyName: String = "hour"

override protected def withNewChildrenInternal(
newChildren: IndexedSeq[Expression]): Expression = {
copy(child = newChildren.head)
}
}

@ExpressionDescription(
usage = """
_FUNC_(expr) - Returns the hour component of the given expression.

If `expr` is a TIMESTAMP or a string that can be cast to timestamp,
it returns the hour of that timestamp.
If `expr` is a TIME type (since 4.1.0), it returns the hour of the time-of-day.
""",
examples = """
Examples:
> SELECT _FUNC_('2018-02-14 12:58:59');
12
> SELECT _FUNC_(TIME'13:59:59.999999');
13
""",
since = "1.5.0",
group = "datetime_funcs")
object HourExpressionBuilder extends ExpressionBuilder {
override def build(name: String, expressions: Seq[Expression]): Expression = {
if (expressions.isEmpty) {
throw QueryCompilationErrors.wrongNumArgsError(name, Seq("> 0"), expressions.length)
} else {
val child = expressions.head
child.dataType match {
case _: TimeType =>
HoursOfTime(child)
case _ =>
Hour(child)
}
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -105,6 +105,13 @@ object DateTimeUtils extends SparkDateTimeUtils {
getLocalDateTime(micros, zoneId).getHour
}

/**
* Returns the hour value of a given TIME (TimeType) value.
*/
def getHoursOfTime(micros: Long): Int = {
microsToLocalTime(micros).getHour
}

/**
* Returns the minute value of a given timestamp value. The timestamp is expressed in
* microseconds since the epoch.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,61 @@ class TimeExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper {
parameters = Map("input" -> "'100:50'", "format" -> "'mm:HH'"))
}

test("HourExpressionBuilder") {
// Empty expressions list
checkError(
exception = intercept[AnalysisException] {
HourExpressionBuilder.build("hour", Seq.empty)
},
condition = "WRONG_NUM_ARGS.WITHOUT_SUGGESTION",
parameters = Map(
"functionName" -> "`hour`",
"expectedNum" -> "> 0",
"actualNum" -> "0",
"docroot" -> SPARK_DOC_ROOT)
)

// test TIME-typed child should build HoursOfTime
val timeExpr = Literal(localTime(12, 58, 59), TimeType())
val builtExprForTime = HourExpressionBuilder.build("hour", Seq(timeExpr))
assert(builtExprForTime.isInstanceOf[HoursOfTime])
assert(builtExprForTime.asInstanceOf[HoursOfTime].child eq timeExpr)

// test non TIME-typed child should build hour
val tsExpr = Literal("2007-09-03 10:45:23")
val builtExprForTs = HourExpressionBuilder.build("hour", Seq(tsExpr))
assert(builtExprForTs.isInstanceOf[Hour])
assert(builtExprForTs.asInstanceOf[Hour].child eq tsExpr)
}

test("Hour with TIME type") {
// A few test times in microseconds since midnight:
// time in microseconds -> expected hour
val testTimes = Seq(
localTime() -> 0,
localTime(1) -> 1,
localTime(0, 59) -> 0,
localTime(14, 30) -> 14,
localTime(12, 58, 59) -> 12,
localTime(23, 0, 1) -> 23,
localTime(23, 59, 59, 999999) -> 23
)

// Create a literal with TimeType() for each test microsecond value
// evaluate HoursOfTime(...), and check that the result matches the expected hour.
testTimes.foreach { case (micros, expectedHour) =>
checkEvaluation(
HoursOfTime(Literal(micros, TimeType())),
expectedHour)
}

// Verify NULL handling
checkEvaluation(
HoursOfTime(Literal.create(null, TimeType(TimeType.MICROS_PRECISION))),
null
)
}

test("MinuteExpressionBuilder") {
// Empty expressions list
checkError(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -159,7 +159,7 @@
| org.apache.spark.sql.catalyst.expressions.Hex | hex | SELECT hex(17) | struct<hex(17):string> |
| org.apache.spark.sql.catalyst.expressions.HllSketchEstimate | hll_sketch_estimate | SELECT hll_sketch_estimate(hll_sketch_agg(col)) FROM VALUES (1), (1), (2), (2), (3) tab(col) | struct<hll_sketch_estimate(hll_sketch_agg(col, 12)):bigint> |
| org.apache.spark.sql.catalyst.expressions.HllUnion | hll_union | SELECT hll_sketch_estimate(hll_union(hll_sketch_agg(col1), hll_sketch_agg(col2))) FROM VALUES (1, 4), (1, 4), (2, 5), (2, 5), (3, 6) tab(col1, col2) | struct<hll_sketch_estimate(hll_union(hll_sketch_agg(col1, 12), hll_sketch_agg(col2, 12), false)):bigint> |
| org.apache.spark.sql.catalyst.expressions.Hour | hour | SELECT hour('2009-07-30 12:58:59') | struct<hour(2009-07-30 12:58:59):int> |
| org.apache.spark.sql.catalyst.expressions.HourExpressionBuilder | hour | SELECT hour('2018-02-14 12:58:59') | struct<hour(2018-02-14 12:58:59):int> |
| org.apache.spark.sql.catalyst.expressions.Hypot | hypot | SELECT hypot(3, 4) | struct<HYPOT(3, 4):double> |
| org.apache.spark.sql.catalyst.expressions.ILike | ilike | SELECT ilike('Spark', '_Park') | struct<ilike(Spark, _Park):boolean> |
| org.apache.spark.sql.catalyst.expressions.If | if | SELECT if(1 < 2, 'a', 'b') | struct<(IF((1 < 2), a, b)):string> |
Expand Down