diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala index 9db4ec2cbbdb2..3f13bc046037a 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala @@ -640,7 +640,7 @@ object FunctionRegistry { expression[DayOfMonth]("dayofmonth"), expression[FromUnixTime]("from_unixtime"), expression[FromUTCTimestamp]("from_utc_timestamp"), - expression[Hour]("hour"), + expressionBuilder("hour", HourExpressionBuilder), expression[LastDay]("last_day"), expressionBuilder("minute", MinuteExpressionBuilder), expression[Month]("month"), diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala index 4b182607f4da6..6767731fc25bf 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala @@ -400,15 +400,6 @@ trait GetTimeField extends UnaryExpression } } -@ExpressionDescription( - usage = "_FUNC_(timestamp) - Returns the hour component of the string/timestamp.", - examples = """ - Examples: - > SELECT _FUNC_('2009-07-30 12:58:59'); - 12 - """, - group = "datetime_funcs", - since = "1.5.0") case class Hour(child: Expression, timeZoneId: Option[String] = None) extends GetTimeField { def this(child: Expression) = this(child, None) override def withTimeZone(timeZoneId: String): Hour = copy(timeZoneId = Option(timeZoneId)) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/timeExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/timeExpressions.scala index 9fbb8401cdf0e..ede5f10788474 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/timeExpressions.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/timeExpressions.scala @@ -233,3 +233,60 @@ object MinuteExpressionBuilder extends ExpressionBuilder { } } } + +case class HoursOfTime(child: Expression) + extends RuntimeReplaceable + with ExpectsInputTypes { + + override def replacement: Expression = StaticInvoke( + classOf[DateTimeUtils.type], + IntegerType, + "getHoursOfTime", + Seq(child), + Seq(child.dataType) + ) + + override def inputTypes: Seq[AbstractDataType] = Seq(TimeType()) + + override def children: Seq[Expression] = Seq(child) + + override def prettyName: String = "hour" + + override protected def withNewChildrenInternal( + newChildren: IndexedSeq[Expression]): Expression = { + copy(child = newChildren.head) + } +} + +@ExpressionDescription( + usage = """ + _FUNC_(expr) - Returns the hour component of the given expression. + + If `expr` is a TIMESTAMP or a string that can be cast to timestamp, + it returns the hour of that timestamp. + If `expr` is a TIME type (since 4.1.0), it returns the hour of the time-of-day. + """, + examples = """ + Examples: + > SELECT _FUNC_('2018-02-14 12:58:59'); + 12 + > SELECT _FUNC_(TIME'13:59:59.999999'); + 13 + """, + since = "1.5.0", + group = "datetime_funcs") +object HourExpressionBuilder extends ExpressionBuilder { + override def build(name: String, expressions: Seq[Expression]): Expression = { + if (expressions.isEmpty) { + throw QueryCompilationErrors.wrongNumArgsError(name, Seq("> 0"), expressions.length) + } else { + val child = expressions.head + child.dataType match { + case _: TimeType => + HoursOfTime(child) + case _ => + Hour(child) + } + } + } +} diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala index 01c4abdde00c1..50bdce58efc68 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala @@ -105,6 +105,13 @@ object DateTimeUtils extends SparkDateTimeUtils { getLocalDateTime(micros, zoneId).getHour } + /** + * Returns the hour value of a given TIME (TimeType) value. + */ + def getHoursOfTime(micros: Long): Int = { + microsToLocalTime(micros).getHour + } + /** * Returns the minute value of a given timestamp value. The timestamp is expressed in * microseconds since the epoch. diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/TimeExpressionsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/TimeExpressionsSuite.scala index 070da53d963d7..a70ede2ea9a22 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/TimeExpressionsSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/TimeExpressionsSuite.scala @@ -53,6 +53,61 @@ class TimeExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper { parameters = Map("input" -> "'100:50'", "format" -> "'mm:HH'")) } + test("HourExpressionBuilder") { + // Empty expressions list + checkError( + exception = intercept[AnalysisException] { + HourExpressionBuilder.build("hour", Seq.empty) + }, + condition = "WRONG_NUM_ARGS.WITHOUT_SUGGESTION", + parameters = Map( + "functionName" -> "`hour`", + "expectedNum" -> "> 0", + "actualNum" -> "0", + "docroot" -> SPARK_DOC_ROOT) + ) + + // test TIME-typed child should build HoursOfTime + val timeExpr = Literal(localTime(12, 58, 59), TimeType()) + val builtExprForTime = HourExpressionBuilder.build("hour", Seq(timeExpr)) + assert(builtExprForTime.isInstanceOf[HoursOfTime]) + assert(builtExprForTime.asInstanceOf[HoursOfTime].child eq timeExpr) + + // test non TIME-typed child should build hour + val tsExpr = Literal("2007-09-03 10:45:23") + val builtExprForTs = HourExpressionBuilder.build("hour", Seq(tsExpr)) + assert(builtExprForTs.isInstanceOf[Hour]) + assert(builtExprForTs.asInstanceOf[Hour].child eq tsExpr) + } + + test("Hour with TIME type") { + // A few test times in microseconds since midnight: + // time in microseconds -> expected hour + val testTimes = Seq( + localTime() -> 0, + localTime(1) -> 1, + localTime(0, 59) -> 0, + localTime(14, 30) -> 14, + localTime(12, 58, 59) -> 12, + localTime(23, 0, 1) -> 23, + localTime(23, 59, 59, 999999) -> 23 + ) + + // Create a literal with TimeType() for each test microsecond value + // evaluate HoursOfTime(...), and check that the result matches the expected hour. + testTimes.foreach { case (micros, expectedHour) => + checkEvaluation( + HoursOfTime(Literal(micros, TimeType())), + expectedHour) + } + + // Verify NULL handling + checkEvaluation( + HoursOfTime(Literal.create(null, TimeType(TimeType.MICROS_PRECISION))), + null + ) + } + test("MinuteExpressionBuilder") { // Empty expressions list checkError( diff --git a/sql/core/src/test/resources/sql-functions/sql-expression-schema.md b/sql/core/src/test/resources/sql-functions/sql-expression-schema.md index d40d2a9a2901f..9156a01349cf1 100644 --- a/sql/core/src/test/resources/sql-functions/sql-expression-schema.md +++ b/sql/core/src/test/resources/sql-functions/sql-expression-schema.md @@ -159,7 +159,7 @@ | org.apache.spark.sql.catalyst.expressions.Hex | hex | SELECT hex(17) | struct | | org.apache.spark.sql.catalyst.expressions.HllSketchEstimate | hll_sketch_estimate | SELECT hll_sketch_estimate(hll_sketch_agg(col)) FROM VALUES (1), (1), (2), (2), (3) tab(col) | struct | | org.apache.spark.sql.catalyst.expressions.HllUnion | hll_union | SELECT hll_sketch_estimate(hll_union(hll_sketch_agg(col1), hll_sketch_agg(col2))) FROM VALUES (1, 4), (1, 4), (2, 5), (2, 5), (3, 6) tab(col1, col2) | struct | -| org.apache.spark.sql.catalyst.expressions.Hour | hour | SELECT hour('2009-07-30 12:58:59') | struct | +| org.apache.spark.sql.catalyst.expressions.HourExpressionBuilder | hour | SELECT hour('2018-02-14 12:58:59') | struct | | org.apache.spark.sql.catalyst.expressions.Hypot | hypot | SELECT hypot(3, 4) | struct | | org.apache.spark.sql.catalyst.expressions.ILike | ilike | SELECT ilike('Spark', '_Park') | struct | | org.apache.spark.sql.catalyst.expressions.If | if | SELECT if(1 < 2, 'a', 'b') | struct<(IF((1 < 2), a, b)):string> |