-
Notifications
You must be signed in to change notification settings - Fork 1.9k
Support API for "pre-image" for pruning predicate evaluation #19722
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
d94889a
4aa7f4e
2329c12
7ac8325
7a3e8b3
fbd5dcc
d920735
5ffb704
2fdc14c
c2b0cd3
a0b6564
0a24d60
59235de
9ae434e
9f845e7
510b5bc
b9f5c2c
ec8cc7e
47a18dc
01b254b
d8b4f0f
116d6e2
c0ed63c
5856150
c53a9fc
9b32843
53f72ed
ba5be8a
46a941f
798d88f
fb155f6
a070246
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,29 @@ | ||
| // Licensed to the Apache Software Foundation (ASF) under one | ||
| // or more contributor license agreements. See the NOTICE file | ||
| // distributed with this work for additional information | ||
| // regarding copyright ownership. The ASF licenses this file | ||
| // to you under the Apache License, Version 2.0 (the | ||
| // "License"); you may not use this file except in compliance | ||
| // with the License. You may obtain a copy of the License at | ||
| // | ||
| // http://www.apache.org/licenses/LICENSE-2.0 | ||
| // | ||
| // Unless required by applicable law or agreed to in writing, | ||
| // software distributed under the License is distributed on an | ||
| // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY | ||
| // KIND, either express or implied. See the License for the | ||
| // specific language governing permissions and limitations | ||
| // under the License. | ||
|
|
||
| use datafusion_expr_common::interval_arithmetic::Interval; | ||
|
|
||
| use crate::Expr; | ||
|
|
||
| /// Return from [`crate::ScalarUDFImpl::preimage`] | ||
| pub enum PreimageResult { | ||
| /// No preimage exists for the specified value | ||
| None, | ||
| /// The expression always evaluates to the specified constant | ||
| /// given that `expr` is within the interval | ||
| Range { expr: Expr, interval: Box<Interval> }, | ||
alamb marked this conversation as resolved.
Show resolved
Hide resolved
|
||
| } | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -39,7 +39,7 @@ use datafusion_common::{ | |
| }; | ||
| use datafusion_expr::{ | ||
| BinaryExpr, Case, ColumnarValue, Expr, Like, Operator, Volatility, and, | ||
| binary::BinaryTypeCoercer, lit, or, | ||
| binary::BinaryTypeCoercer, lit, or, preimage::PreimageResult, | ||
| }; | ||
| use datafusion_expr::{Cast, TryCast, simplify::ExprSimplifyResult}; | ||
| use datafusion_expr::{expr::ScalarFunction, interval_arithmetic::NullableInterval}; | ||
|
|
@@ -51,14 +51,17 @@ use datafusion_physical_expr::{create_physical_expr, execution_props::ExecutionP | |
|
|
||
| use super::inlist_simplifier::ShortenInListSimplifier; | ||
| use super::utils::*; | ||
| use crate::analyzer::type_coercion::TypeCoercionRewriter; | ||
| use crate::simplify_expressions::SimplifyContext; | ||
| use crate::simplify_expressions::regex::simplify_regex_expr; | ||
| use crate::simplify_expressions::unwrap_cast::{ | ||
| is_cast_expr_and_support_unwrap_cast_in_comparison_for_binary, | ||
| is_cast_expr_and_support_unwrap_cast_in_comparison_for_inlist, | ||
| unwrap_cast_in_comparison_for_binary, | ||
| }; | ||
| use crate::{ | ||
| analyzer::type_coercion::TypeCoercionRewriter, | ||
| simplify_expressions::udf_preimage::rewrite_with_preimage, | ||
| }; | ||
| use datafusion_expr::expr_rewriter::rewrite_with_guarantees_map; | ||
| use datafusion_expr_common::casts::try_cast_literal_to_type; | ||
| use indexmap::IndexSet; | ||
|
|
@@ -1969,12 +1972,85 @@ impl TreeNodeRewriter for Simplifier<'_> { | |
| })) | ||
| } | ||
|
|
||
| // ======================================= | ||
sdf-jkl marked this conversation as resolved.
Show resolved
Hide resolved
|
||
| // preimage_in_comparison | ||
| // ======================================= | ||
| // | ||
| // For case: | ||
| // date_part('YEAR', expr) op literal | ||
| // | ||
| // For details see datafusion_expr::ScalarUDFImpl::preimage | ||
| Expr::BinaryExpr(BinaryExpr { left, op, right }) => { | ||
| use datafusion_expr::Operator::*; | ||
| let is_preimage_op = matches!( | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. it might be nice (as a follow on PR) to mention this list in the docs for preimage -- e.g. that it only applies to predicates |
||
| op, | ||
| Eq | NotEq | ||
| | Lt | ||
| | LtEq | ||
| | Gt | ||
| | GtEq | ||
| | IsDistinctFrom | ||
| | IsNotDistinctFrom | ||
| ); | ||
| if !is_preimage_op || is_null(&right) { | ||
| return Ok(Transformed::no(Expr::BinaryExpr(BinaryExpr { | ||
| left, | ||
| op, | ||
| right, | ||
| }))); | ||
| } | ||
|
|
||
| if let PreimageResult::Range { interval, expr } = | ||
| get_preimage(left.as_ref(), right.as_ref(), info)? | ||
| { | ||
| rewrite_with_preimage(*interval, op, expr)? | ||
| } else if let Some(swapped) = op.swap() { | ||
| if let PreimageResult::Range { interval, expr } = | ||
| get_preimage(right.as_ref(), left.as_ref(), info)? | ||
| { | ||
| rewrite_with_preimage(*interval, swapped, expr)? | ||
| } else { | ||
| Transformed::no(Expr::BinaryExpr(BinaryExpr { left, op, right })) | ||
| } | ||
| } else { | ||
| Transformed::no(Expr::BinaryExpr(BinaryExpr { left, op, right })) | ||
| } | ||
| } | ||
|
|
||
| // no additional rewrites possible | ||
| expr => Transformed::no(expr), | ||
| }) | ||
| } | ||
| } | ||
|
|
||
| fn get_preimage( | ||
| left_expr: &Expr, | ||
| right_expr: &Expr, | ||
| info: &SimplifyContext, | ||
| ) -> Result<PreimageResult> { | ||
| let Expr::ScalarFunction(ScalarFunction { func, args }) = left_expr else { | ||
| return Ok(PreimageResult::None); | ||
| }; | ||
| if !is_literal_or_literal_cast(right_expr) { | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I wonder if there is a reason to limit this to literal ? It seems like the call to
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think this is still an open question, but it is ok to handle as a follow on PR (aka widen the expressions)
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Do you have an example where we could use a non-literal
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I was thinking something like extracting the year from a computed value. For example, if we had a table with a base date and an interval, it seems like we could do something like): WHERE 2025 = date_part(YEAR, t.base_date + t.interval)rewrite to WHERE (t.base_date + t.interval) >= 2025-01-01 && (t.base_date + t.interval) < 2026-01-01However, in this case I agree there is a tradeoff that this actually might be worse to optimize (take longer to evaluate) 🤔
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think this should already work. The |
||
| return Ok(PreimageResult::None); | ||
| } | ||
| if func.signature().volatility != Volatility::Immutable { | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Also for a follow on PR, I think it would be safe to rewrite stable functions (whose values don't change during the statement) |
||
| return Ok(PreimageResult::None); | ||
| } | ||
| func.preimage(args, right_expr, info) | ||
| } | ||
|
|
||
| fn is_literal_or_literal_cast(expr: &Expr) -> bool { | ||
| match expr { | ||
| Expr::Literal(_, _) => true, | ||
| Expr::Cast(Cast { expr, .. }) => matches!(expr.as_ref(), Expr::Literal(_, _)), | ||
| Expr::TryCast(TryCast { expr, .. }) => { | ||
| matches!(expr.as_ref(), Expr::Literal(_, _)) | ||
| } | ||
| _ => false, | ||
| } | ||
| } | ||
|
|
||
| fn as_string_scalar(expr: &Expr) -> Option<(DataType, &Option<String>)> { | ||
| match expr { | ||
| Expr::Literal(ScalarValue::Utf8(s), _) => Some((DataType::Utf8, s)), | ||
|
|
||
Uh oh!
There was an error while loading. Please reload this page.