Skip to content

Commit 1ad893b

Browse files
committed
[SPARK-53762][SQL] Add date and time conversions simplifier rule to optimizer
1 parent 46ac78e commit 1ad893b

File tree

6 files changed

+122
-0
lines changed

6 files changed

+122
-0
lines changed

sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1014,6 +1014,8 @@ case class DateFormatClass(left: Expression, right: Expression, timeZoneId: Opti
10141014
override protected def withNewChildrenInternal(
10151015
newLeft: Expression, newRight: Expression): DateFormatClass =
10161016
copy(left = newLeft, right = newRight)
1017+
1018+
final override def nodePatternsInternal(): Seq[TreePattern] = Seq(DATETIME)
10171019
}
10181020

10191021
/**
@@ -1147,6 +1149,8 @@ case class GetTimestamp(
11471149
newLeft: Expression,
11481150
newRight: Expression): Expression =
11491151
copy(left = newLeft, right = newRight)
1152+
1153+
final override def nodePatternsInternal(): Seq[TreePattern] = Seq(DATETIME)
11501154
}
11511155

11521156

sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -146,6 +146,7 @@ abstract class Optimizer(catalogManager: CatalogManager)
146146
PruneFilters,
147147
SimplifyCasts,
148148
SimplifyCaseConversionExpressions,
149+
SimplifyDateTimeConversions,
149150
RewriteCorrelatedScalarSubquery,
150151
RewriteLateralSubquery,
151152
EliminateSerialization,

sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/expressions.scala

Lines changed: 46 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1142,6 +1142,52 @@ object SimplifyCaseConversionExpressions extends Rule[LogicalPlan] {
11421142
}
11431143
}
11441144

1145+
/**
1146+
* Removes date and time related functions that are unnecessary.
1147+
*/
1148+
object SimplifyDateTimeConversions extends Rule[LogicalPlan] {
1149+
def apply(plan: LogicalPlan): LogicalPlan = plan.transformWithPruning(
1150+
_.containsPattern(DATETIME), ruleId) {
1151+
case q: LogicalPlan => q.transformExpressionsUpWithPruning(
1152+
_.containsPattern(DATETIME), ruleId) {
1153+
case DateFormatClass(
1154+
GetTimestamp(
1155+
e @ DateFormatClass(
1156+
_,
1157+
pattern,
1158+
timeZoneId),
1159+
pattern2,
1160+
TimestampType,
1161+
_,
1162+
timeZoneId2,
1163+
_),
1164+
pattern3,
1165+
timeZoneId3)
1166+
if pattern.semanticEquals(pattern2) && pattern.semanticEquals(pattern3)
1167+
&& timeZoneId == timeZoneId2 && timeZoneId == timeZoneId3 =>
1168+
e
1169+
case GetTimestamp(
1170+
DateFormatClass(
1171+
e @ GetTimestamp(
1172+
_,
1173+
pattern,
1174+
TimestampType,
1175+
_,
1176+
timeZoneId,
1177+
_),
1178+
pattern2,
1179+
timeZoneId2),
1180+
pattern3,
1181+
TimestampType,
1182+
_,
1183+
timeZoneId3,
1184+
_)
1185+
if pattern.semanticEquals(pattern2) && pattern.semanticEquals(pattern3)
1186+
&& timeZoneId == timeZoneId2 && timeZoneId == timeZoneId3 =>
1187+
e
1188+
}
1189+
}
1190+
}
11451191

11461192
/**
11471193
* Combine nested [[Concat]] expressions.

sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/rules/RuleIdCollection.scala

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -175,6 +175,7 @@ object RuleIdCollection {
175175
"org.apache.spark.sql.catalyst.optimizer.RewriteAsOfJoin" ::
176176
"org.apache.spark.sql.catalyst.optimizer.SimplifyBinaryComparison" ::
177177
"org.apache.spark.sql.catalyst.optimizer.SimplifyCaseConversionExpressions" ::
178+
"org.apache.spark.sql.catalyst.optimizer.SimplifyDateTimeConversions" ::
178179
"org.apache.spark.sql.catalyst.optimizer.SimplifyCasts" ::
179180
"org.apache.spark.sql.catalyst.optimizer.SimplifyConditionals" ::
180181
"org.apache.spark.sql.catalyst.optimizer.SimplifyExtractValueOps" ::

sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/trees/TreePatterns.scala

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,7 @@ object TreePattern extends Enumeration {
4242
val COUNT: Value = Value
4343
val CREATE_NAMED_STRUCT: Value = Value
4444
val CURRENT_LIKE: Value = Value
45+
val DATETIME: Value = Value
4546
val DYNAMIC_PRUNING_EXPRESSION: Value = Value
4647
val DYNAMIC_PRUNING_SUBQUERY: Value = Value
4748
val EXISTS_SUBQUERY = Value
Lines changed: 69 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,69 @@
1+
/*
2+
* Licensed to the Apache Software Foundation (ASF) under one or more
3+
* contributor license agreements. See the NOTICE file distributed with
4+
* this work for additional information regarding copyright ownership.
5+
* The ASF licenses this file to You under the Apache License, Version 2.0
6+
* (the "License"); you may not use this file except in compliance with
7+
* the License. You may obtain a copy of the License at
8+
*
9+
* http://www.apache.org/licenses/LICENSE-2.0
10+
*
11+
* Unless required by applicable law or agreed to in writing, software
12+
* distributed under the License is distributed on an "AS IS" BASIS,
13+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14+
* See the License for the specific language governing permissions and
15+
* limitations under the License.
16+
*/
17+
18+
package org.apache.spark.sql.catalyst.optimizer
19+
20+
import org.apache.spark.sql.catalyst.dsl.expressions._
21+
import org.apache.spark.sql.catalyst.dsl.plans._
22+
import org.apache.spark.sql.catalyst.expressions.{DateFormatClass, GetTimestamp}
23+
import org.apache.spark.sql.catalyst.plans.PlanTest
24+
import org.apache.spark.sql.catalyst.plans.logical._
25+
import org.apache.spark.sql.catalyst.rules.RuleExecutor
26+
import org.apache.spark.sql.types._
27+
28+
class SimplifyDateTimeConversionsSuite extends PlanTest {
29+
30+
object Optimize extends RuleExecutor[LogicalPlan] {
31+
val batches =
32+
Batch("SimplifyDateTimeConversions", FixedPoint(50), SimplifyDateTimeConversions) :: Nil
33+
}
34+
35+
val testRelation = LocalRelation($"ts".timestamp, $"s".string)
36+
37+
test("SPARK-53762: Remove DateFormat - GetTimestamp groups") {
38+
val pattern = "yyyy-MM-dd"
39+
40+
val df = DateFormatClass($"ts", pattern)
41+
val gt = GetTimestamp($"s", pattern, TimestampType)
42+
43+
val originalQuery = testRelation
44+
.select(
45+
DateFormatClass(
46+
GetTimestamp(
47+
df,
48+
pattern,
49+
TimestampType),
50+
pattern) as "c1",
51+
GetTimestamp(
52+
DateFormatClass(
53+
gt,
54+
pattern),
55+
pattern,
56+
TimestampType) as "c2")
57+
.analyze
58+
59+
val optimized = Optimize.execute(originalQuery)
60+
61+
val expected = testRelation
62+
.select(
63+
df as "c1",
64+
gt as "c2")
65+
.analyze
66+
67+
comparePlans(optimized, expected)
68+
}
69+
}

0 commit comments

Comments
 (0)