Skip to content

Commit 8a50b0f

Browse files
senthhMaxGekk
authored andcommitted
[SPARK-51419][SQL] Get hours of TIME datatype
### What changes were proposed in this pull request? This PR adds support for extracting the hour component from TIME (TimeType) values in Spark SQL. ``` scala> spark.sql("SELECT hour(TIME'07:01:09.12312321231232');").show() +----------------------------+ |hour(TIME '07:01:09.123123')| +----------------------------+ | 7| +----------------------------+ scala> spark.sql("SELECT hour('2009-07-30 12:58:59')").show() +-------------------------+ |hour(2009-07-30 12:58:59)| +-------------------------+ | 12| +-------------------------+ ``` ### Why are the changes needed? Spark previously supported hour() for only TIMESTAMP type values. TIME support was missing, leading to implicit casting attempt to TIMESTAMP, which was incorrect. This PR ensures that `hour(TIME'HH:MM:SS.######')` behaves correctly without unnecessary type coercion. ### Does this PR introduce _any_ user-facing change? Yes - Before this PR, calling hour(TIME'HH:MM:SS.######') resulted in a type mismatch error or an implicit cast attempt to TIMESTAMP, which was incorrect. - With this PR, hour(TIME'HH:MM:SS.######') now works correctly for TIME values without implicit casting. - Users can now extract the hour component from TIME values natively. ### How was this patch tested? By running new tests: ```$ build/sbt "test:testOnly *TimeExpressionsSuite"``` ### Was this patch authored or co-authored using generative AI tooling? No Closes #50355 from senthh/getHour. Authored-by: senthh <[email protected]> Signed-off-by: Max Gekk <[email protected]>
1 parent 2c492b2 commit 8a50b0f

File tree

6 files changed

+121
-11
lines changed

6 files changed

+121
-11
lines changed

sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -640,7 +640,7 @@ object FunctionRegistry {
640640
expression[DayOfMonth]("dayofmonth"),
641641
expression[FromUnixTime]("from_unixtime"),
642642
expression[FromUTCTimestamp]("from_utc_timestamp"),
643-
expression[Hour]("hour"),
643+
expressionBuilder("hour", HourExpressionBuilder),
644644
expression[LastDay]("last_day"),
645645
expressionBuilder("minute", MinuteExpressionBuilder),
646646
expression[Month]("month"),

sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala

Lines changed: 0 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -400,15 +400,6 @@ trait GetTimeField extends UnaryExpression
400400
}
401401
}
402402

403-
@ExpressionDescription(
404-
usage = "_FUNC_(timestamp) - Returns the hour component of the string/timestamp.",
405-
examples = """
406-
Examples:
407-
> SELECT _FUNC_('2009-07-30 12:58:59');
408-
12
409-
""",
410-
group = "datetime_funcs",
411-
since = "1.5.0")
412403
case class Hour(child: Expression, timeZoneId: Option[String] = None) extends GetTimeField {
413404
def this(child: Expression) = this(child, None)
414405
override def withTimeZone(timeZoneId: String): Hour = copy(timeZoneId = Option(timeZoneId))

sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/timeExpressions.scala

Lines changed: 57 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -233,3 +233,60 @@ object MinuteExpressionBuilder extends ExpressionBuilder {
233233
}
234234
}
235235
}
236+
237+
case class HoursOfTime(child: Expression)
238+
extends RuntimeReplaceable
239+
with ExpectsInputTypes {
240+
241+
override def replacement: Expression = StaticInvoke(
242+
classOf[DateTimeUtils.type],
243+
IntegerType,
244+
"getHoursOfTime",
245+
Seq(child),
246+
Seq(child.dataType)
247+
)
248+
249+
override def inputTypes: Seq[AbstractDataType] = Seq(TimeType())
250+
251+
override def children: Seq[Expression] = Seq(child)
252+
253+
override def prettyName: String = "hour"
254+
255+
override protected def withNewChildrenInternal(
256+
newChildren: IndexedSeq[Expression]): Expression = {
257+
copy(child = newChildren.head)
258+
}
259+
}
260+
261+
@ExpressionDescription(
262+
usage = """
263+
_FUNC_(expr) - Returns the hour component of the given expression.
264+
265+
If `expr` is a TIMESTAMP or a string that can be cast to timestamp,
266+
it returns the hour of that timestamp.
267+
If `expr` is a TIME type (since 4.1.0), it returns the hour of the time-of-day.
268+
""",
269+
examples = """
270+
Examples:
271+
> SELECT _FUNC_('2018-02-14 12:58:59');
272+
12
273+
> SELECT _FUNC_(TIME'13:59:59.999999');
274+
13
275+
""",
276+
since = "1.5.0",
277+
group = "datetime_funcs")
278+
object HourExpressionBuilder extends ExpressionBuilder {
279+
override def build(name: String, expressions: Seq[Expression]): Expression = {
280+
if (expressions.isEmpty) {
281+
throw QueryCompilationErrors.wrongNumArgsError(name, Seq("> 0"), expressions.length)
282+
} else {
283+
val child = expressions.head
284+
child.dataType match {
285+
case _: TimeType =>
286+
HoursOfTime(child)
287+
case _ =>
288+
Hour(child)
289+
}
290+
}
291+
}
292+
}

sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -105,6 +105,13 @@ object DateTimeUtils extends SparkDateTimeUtils {
105105
getLocalDateTime(micros, zoneId).getHour
106106
}
107107

108+
/**
109+
* Returns the hour value of a given TIME (TimeType) value.
110+
*/
111+
def getHoursOfTime(micros: Long): Int = {
112+
microsToLocalTime(micros).getHour
113+
}
114+
108115
/**
109116
* Returns the minute value of a given timestamp value. The timestamp is expressed in
110117
* microseconds since the epoch.

sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/TimeExpressionsSuite.scala

Lines changed: 55 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -53,6 +53,61 @@ class TimeExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper {
5353
parameters = Map("input" -> "'100:50'", "format" -> "'mm:HH'"))
5454
}
5555

56+
test("HourExpressionBuilder") {
57+
// Empty expressions list
58+
checkError(
59+
exception = intercept[AnalysisException] {
60+
HourExpressionBuilder.build("hour", Seq.empty)
61+
},
62+
condition = "WRONG_NUM_ARGS.WITHOUT_SUGGESTION",
63+
parameters = Map(
64+
"functionName" -> "`hour`",
65+
"expectedNum" -> "> 0",
66+
"actualNum" -> "0",
67+
"docroot" -> SPARK_DOC_ROOT)
68+
)
69+
70+
// test TIME-typed child should build HoursOfTime
71+
val timeExpr = Literal(localTime(12, 58, 59), TimeType())
72+
val builtExprForTime = HourExpressionBuilder.build("hour", Seq(timeExpr))
73+
assert(builtExprForTime.isInstanceOf[HoursOfTime])
74+
assert(builtExprForTime.asInstanceOf[HoursOfTime].child eq timeExpr)
75+
76+
// test non TIME-typed child should build hour
77+
val tsExpr = Literal("2007-09-03 10:45:23")
78+
val builtExprForTs = HourExpressionBuilder.build("hour", Seq(tsExpr))
79+
assert(builtExprForTs.isInstanceOf[Hour])
80+
assert(builtExprForTs.asInstanceOf[Hour].child eq tsExpr)
81+
}
82+
83+
test("Hour with TIME type") {
84+
// A few test times in microseconds since midnight:
85+
// time in microseconds -> expected hour
86+
val testTimes = Seq(
87+
localTime() -> 0,
88+
localTime(1) -> 1,
89+
localTime(0, 59) -> 0,
90+
localTime(14, 30) -> 14,
91+
localTime(12, 58, 59) -> 12,
92+
localTime(23, 0, 1) -> 23,
93+
localTime(23, 59, 59, 999999) -> 23
94+
)
95+
96+
// Create a literal with TimeType() for each test microsecond value
97+
// evaluate HoursOfTime(...), and check that the result matches the expected hour.
98+
testTimes.foreach { case (micros, expectedHour) =>
99+
checkEvaluation(
100+
HoursOfTime(Literal(micros, TimeType())),
101+
expectedHour)
102+
}
103+
104+
// Verify NULL handling
105+
checkEvaluation(
106+
HoursOfTime(Literal.create(null, TimeType(TimeType.MICROS_PRECISION))),
107+
null
108+
)
109+
}
110+
56111
test("MinuteExpressionBuilder") {
57112
// Empty expressions list
58113
checkError(

sql/core/src/test/resources/sql-functions/sql-expression-schema.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -159,7 +159,7 @@
159159
| org.apache.spark.sql.catalyst.expressions.Hex | hex | SELECT hex(17) | struct<hex(17):string> |
160160
| org.apache.spark.sql.catalyst.expressions.HllSketchEstimate | hll_sketch_estimate | SELECT hll_sketch_estimate(hll_sketch_agg(col)) FROM VALUES (1), (1), (2), (2), (3) tab(col) | struct<hll_sketch_estimate(hll_sketch_agg(col, 12)):bigint> |
161161
| org.apache.spark.sql.catalyst.expressions.HllUnion | hll_union | SELECT hll_sketch_estimate(hll_union(hll_sketch_agg(col1), hll_sketch_agg(col2))) FROM VALUES (1, 4), (1, 4), (2, 5), (2, 5), (3, 6) tab(col1, col2) | struct<hll_sketch_estimate(hll_union(hll_sketch_agg(col1, 12), hll_sketch_agg(col2, 12), false)):bigint> |
162-
| org.apache.spark.sql.catalyst.expressions.Hour | hour | SELECT hour('2009-07-30 12:58:59') | struct<hour(2009-07-30 12:58:59):int> |
162+
| org.apache.spark.sql.catalyst.expressions.HourExpressionBuilder | hour | SELECT hour('2018-02-14 12:58:59') | struct<hour(2018-02-14 12:58:59):int> |
163163
| org.apache.spark.sql.catalyst.expressions.Hypot | hypot | SELECT hypot(3, 4) | struct<HYPOT(3, 4):double> |
164164
| org.apache.spark.sql.catalyst.expressions.ILike | ilike | SELECT ilike('Spark', '_Park') | struct<ilike(Spark, _Park):boolean> |
165165
| org.apache.spark.sql.catalyst.expressions.If | if | SELECT if(1 < 2, 'a', 'b') | struct<(IF((1 < 2), a, b)):string> |

0 commit comments

Comments
 (0)