Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 3 additions & 2 deletions datafusion/expr/src/logical_plan/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -41,8 +41,9 @@ pub use plan::{
projection_schema, Aggregate, Analyze, ColumnUnnestList, DescribeTable, Distinct,
DistinctOn, EmptyRelation, Explain, ExplainFormat, ExplainOption, Extension,
FetchType, Filter, Join, JoinConstraint, JoinType, Limit, LogicalPlan, Partitioning,
PlanType, Projection, RecursiveQuery, Repartition, SkipType, Sort, StringifiedPlan,
Subquery, SubqueryAlias, TableScan, ToStringifiedPlan, Union, Unnest, Values, Window,
PlanType, Projection, RecursiveQuery, Repartition, ScanOrdering, SkipType, Sort,
StringifiedPlan, Subquery, SubqueryAlias, TableScan, ToStringifiedPlan, Union,
Unnest, Values, Window,
};
pub use statement::{
Deallocate, Execute, Prepare, SetVariable, Statement, TransactionAccessMode,
Expand Down
105 changes: 105 additions & 0 deletions datafusion/expr/src/logical_plan/plan.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2537,6 +2537,43 @@ impl PartialOrd for Window {
}
}

#[derive(Clone, PartialEq, Eq, Hash, PartialOrd, Default)]
pub struct ScanOrdering {
/// Optional preferred ordering for the scan that matches the output order of upstream query nodes.
/// It is optional / best effort for the scan to produce this ordering.
/// If the scan produces this exact ordering and sets it's properties to reflect this upstream sorts may be optimized away.
/// Otherwise the sorts may remain in place but partial ordering may be exploited e.g. to do early stopping or reduce complexity of the sort.
/// Thus it is recommended for the scan to also do a best effort to produce partially sorted data if possible.
pub preferred_ordering: Option<Vec<SortExpr>>,
}

impl ScanOrdering {
/// Create a new ScanOrdering
pub fn with_preferred_ordering(mut self, preferred_ordering: Vec<SortExpr>) -> Self {
self.preferred_ordering = Some(preferred_ordering);
self
}
}

impl Debug for ScanOrdering {
fn fmt(&self, f: &mut Formatter) -> fmt::Result {
let ordering_display = self
.preferred_ordering
.as_ref()
.map(|ordering| {
ordering
.iter()
.map(|e| e.to_string())
.collect::<Vec<String>>()
.join(", ")
})
.unwrap_or_else(|| "None".to_string());
f.debug_struct("ScanOrdering")
.field("preferred_ordering", &ordering_display)
.finish_non_exhaustive()
}
}

/// Produces rows from a table provider by reference or from the context
#[derive(Clone)]
pub struct TableScan {
Expand All @@ -2552,6 +2589,8 @@ pub struct TableScan {
pub filters: Vec<Expr>,
/// Optional number of rows to read
pub fetch: Option<usize>,
/// Ordering for the scan
pub ordering: Option<ScanOrdering>,
}

impl Debug for TableScan {
Expand All @@ -2563,6 +2602,7 @@ impl Debug for TableScan {
.field("projected_schema", &self.projected_schema)
.field("filters", &self.filters)
.field("fetch", &self.fetch)
.field("ordering", &self.ordering)
.finish_non_exhaustive()
}
}
Expand All @@ -2574,6 +2614,7 @@ impl PartialEq for TableScan {
&& self.projected_schema == other.projected_schema
&& self.filters == other.filters
&& self.fetch == other.fetch
&& self.ordering == other.ordering
}
}

Expand All @@ -2593,18 +2634,22 @@ impl PartialOrd for TableScan {
pub filters: &'a Vec<Expr>,
/// Optional number of rows to read
pub fetch: &'a Option<usize>,
/// Optional preferred ordering for the scan
pub ordering: &'a Option<ScanOrdering>,
}
let comparable_self = ComparableTableScan {
table_name: &self.table_name,
projection: &self.projection,
filters: &self.filters,
fetch: &self.fetch,
ordering: &self.ordering,
};
let comparable_other = ComparableTableScan {
table_name: &other.table_name,
projection: &other.projection,
filters: &other.filters,
fetch: &other.fetch,
ordering: &other.ordering,
};
comparable_self.partial_cmp(&comparable_other)
}
Expand All @@ -2617,6 +2662,7 @@ impl Hash for TableScan {
self.projected_schema.hash(state);
self.filters.hash(state);
self.fetch.hash(state);
self.ordering.hash(state);
}
}

Expand Down Expand Up @@ -2670,8 +2716,65 @@ impl TableScan {
projected_schema,
filters,
fetch,
ordering: None,
})
}

/// Sets the preferred ordering for this table scan using the builder pattern.
///
/// The preferred ordering serves as a hint to table providers about the desired
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

mybe we can move some of this comment to PreferredOrdering if we go with the struct approach

/// sort order for the data. Table providers can use this information to optimize
/// data access patterns, choose appropriate indexes, or leverage existing sort
/// orders in the underlying storage.
///
/// # Parameters
///
/// * `preferred_ordering` - An optional vector of sort expressions representing
/// the desired ordering. `None` indicates no specific ordering preference.
///
/// # Returns
///
/// Returns `self` to enable method chaining in the builder pattern.
///
/// # Examples
///
/// ```rust
/// use datafusion_expr::{col, SortExpr};
/// # use datafusion_expr::logical_plan::{TableScan, builder::table_source};
/// # use std::sync::Arc;
/// # use datafusion_common::{TableReference, DFSchema};
/// # use arrow::datatypes::{Schema, Field, DataType};
///
/// // Create a table scan with preferred ordering by column 'a' ascending
/// # let table_name = TableReference::bare("test");
/// # let schema = Schema::new(vec![Field::new("a", DataType::Int32, false)]);
/// # let source = table_source(&schema);
/// # let projection = None;
/// # let projected_schema = Arc::new(datafusion_common::DFSchema::empty());
/// # let filters = vec![];
/// # let fetch = None;
/// let table_scan = TableScan {
/// table_name,
/// source,
/// projection,
/// projected_schema,
/// filters,
/// fetch,
/// preferred_ordering: None,
/// }.with_preferred_ordering(Some(vec![
/// SortExpr::new(col("a"), true, false) // ASC NULLS LAST
/// ]));
/// ```
///
/// # Notes
///
/// This is purely an optimization hint. The table provider may choose to ignore
/// the preferred ordering if it cannot be efficiently satisfied, and the query
/// execution engine should not rely on the data being returned in this order.
pub fn with_ordering(mut self, ordering: ScanOrdering) -> Self {
self.ordering = Some(ordering);
self
}
}

// Repartition the plan based on a partitioning scheme.
Expand Down Expand Up @@ -4896,6 +4999,7 @@ mod tests {
projected_schema: Arc::clone(&schema),
filters: vec![],
fetch: None,
ordering: None,
}));
let col = schema.field_names()[0].clone();

Expand Down Expand Up @@ -4926,6 +5030,7 @@ mod tests {
projected_schema: Arc::clone(&unique_schema),
filters: vec![],
fetch: None,
ordering: None,
}));
let col = schema.field_names()[0].clone();

Expand Down
2 changes: 2 additions & 0 deletions datafusion/expr/src/logical_plan/tree_node.rs
Original file line number Diff line number Diff line change
Expand Up @@ -599,6 +599,7 @@ impl LogicalPlan {
projected_schema,
filters,
fetch,
ordering,
}) => filters.map_elements(f)?.update_data(|filters| {
LogicalPlan::TableScan(TableScan {
table_name,
Expand All @@ -607,6 +608,7 @@ impl LogicalPlan {
projected_schema,
filters,
fetch,
ordering,
})
}),
LogicalPlan::Distinct(Distinct::On(DistinctOn {
Expand Down
1 change: 1 addition & 0 deletions datafusion/optimizer/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,7 @@ pub mod optimizer;
pub mod propagate_empty_relation;
pub mod push_down_filter;
pub mod push_down_limit;
pub mod push_down_sort;
pub mod replace_distinct_aggregate;
pub mod scalar_subquery_to_join;
pub mod simplify_expressions;
Expand Down
5 changes: 5 additions & 0 deletions datafusion/optimizer/src/optimize_projections/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -242,6 +242,7 @@ fn optimize_projections(
filters,
fetch,
projected_schema: _,
ordering,
} = table_scan;

// Get indices referred to in the original (schema with all fields)
Expand All @@ -257,6 +258,10 @@ fn optimize_projections(
filters,
fetch,
)
.map(|s| match ordering {
Some(ordering) => s.with_ordering(ordering),
None => s,
})
.map(LogicalPlan::TableScan)
.map(Transformed::yes);
}
Expand Down
3 changes: 3 additions & 0 deletions datafusion/optimizer/src/optimizer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,7 @@ use crate::plan_signature::LogicalPlanSignature;
use crate::propagate_empty_relation::PropagateEmptyRelation;
use crate::push_down_filter::PushDownFilter;
use crate::push_down_limit::PushDownLimit;
use crate::push_down_sort::PushDownSort;
use crate::replace_distinct_aggregate::ReplaceDistinctWithAggregate;
use crate::scalar_subquery_to_join::ScalarSubqueryToJoin;
use crate::simplify_expressions::SimplifyExpressions;
Expand Down Expand Up @@ -242,6 +243,8 @@ impl Optimizer {
Arc::new(EliminateOuterJoin::new()),
// Filters can't be pushed down past Limits, we should do PushDownFilter after PushDownLimit
Arc::new(PushDownLimit::new()),
// Sort pushdown should happen before filter pushdown to maximize optimization opportunities
Arc::new(PushDownSort::new()),
Arc::new(PushDownFilter::new()),
Arc::new(SingleDistinctToGroupBy::new()),
// The previous optimizations added expressions and projections,
Expand Down
1 change: 1 addition & 0 deletions datafusion/optimizer/src/push_down_filter.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3055,6 +3055,7 @@ mod tests {
projection,
source: Arc::new(test_provider),
fetch: None,
ordering: None,
});

Ok(LogicalPlanBuilder::from(table_scan))
Expand Down
Loading
Loading