1919
2020use std:: collections:: HashMap ;
2121use std:: fmt:: Write ;
22- use std:: sync:: Arc ;
22+ use std:: sync:: atomic:: AtomicI32 ;
23+ use std:: sync:: { Arc , OnceLock } ;
2324
2425use crate :: datasource:: file_format:: arrow:: ArrowFormat ;
2526use crate :: datasource:: file_format:: avro:: AvroFormat ;
@@ -89,8 +90,8 @@ use datafusion_expr::expr::{
8990use datafusion_expr:: expr_rewriter:: unnormalize_cols;
9091use datafusion_expr:: logical_plan:: builder:: wrap_projection_for_join_if_necessary;
9192use datafusion_expr:: {
92- DescribeTable , DmlStatement , ScalarFunctionDefinition , StringifiedPlan , WindowFrame ,
93- WindowFrameBound , WriteOp , NamedRelation , RecursiveQuery ,
93+ DescribeTable , DmlStatement , NamedRelation , RecursiveQuery , ScalarFunctionDefinition ,
94+ StringifiedPlan , WindowFrame , WindowFrameBound , WriteOp ,
9495} ;
9596use datafusion_physical_expr:: expressions:: Literal ;
9697use datafusion_physical_plan:: placeholder_row:: PlaceholderRowExec ;
@@ -452,11 +453,13 @@ impl PhysicalPlanner for DefaultPhysicalPlanner {
452453 logical_plan : & LogicalPlan ,
453454 session_state : & SessionState ,
454455 ) -> Result < Arc < dyn ExecutionPlan > > {
456+ reset_recursive_cte_physical_plan_branch_number ( ) ;
457+
455458 match self . handle_explain ( logical_plan, session_state) . await ? {
456459 Some ( plan) => Ok ( plan) ,
457460 None => {
458461 let plan = self
459- . create_initial_plan ( logical_plan, session_state)
462+ . create_initial_plan ( logical_plan, session_state, None )
460463 . await ?;
461464 self . optimize_internal ( plan, session_state, |_, _| { } )
462465 }
@@ -487,6 +490,23 @@ impl PhysicalPlanner for DefaultPhysicalPlanner {
487490 }
488491}
489492
493+ // atomic global incrmenter
494+
495+ static RECURSIVE_CTE_PHYSICAL_PLAN_BRANCH : OnceLock < AtomicI32 > = OnceLock :: new ( ) ;
496+
497+ fn new_recursive_cte_physical_plan_branch_number ( ) -> u32 {
498+ let counter = RECURSIVE_CTE_PHYSICAL_PLAN_BRANCH
499+ . get_or_init ( || AtomicI32 :: new ( 0 ) )
500+ . fetch_add ( 1 , std:: sync:: atomic:: Ordering :: SeqCst ) ;
501+ counter as u32
502+ }
503+
504+ fn reset_recursive_cte_physical_plan_branch_number ( ) {
505+ RECURSIVE_CTE_PHYSICAL_PLAN_BRANCH
506+ . get_or_init ( || AtomicI32 :: new ( 0 ) )
507+ . store ( 0 , std:: sync:: atomic:: Ordering :: SeqCst ) ;
508+ }
509+
490510impl DefaultPhysicalPlanner {
491511 /// Create a physical planner that uses `extension_planners` to
492512 /// plan user-defined logical nodes [`LogicalPlan::Extension`].
@@ -507,6 +527,7 @@ impl DefaultPhysicalPlanner {
507527 & ' a self ,
508528 logical_plans : impl IntoIterator < Item = & ' a LogicalPlan > + Send + ' a ,
509529 session_state : & ' a SessionState ,
530+ ctx : Option < & ' a String > ,
510531 ) -> BoxFuture < ' a , Result < Vec < Arc < dyn ExecutionPlan > > > > {
511532 async move {
512533 // First build futures with as little references as possible, then performing some stream magic.
@@ -519,7 +540,7 @@ impl DefaultPhysicalPlanner {
519540 . into_iter ( )
520541 . enumerate ( )
521542 . map ( |( idx, lp) | async move {
522- let plan = self . create_initial_plan ( lp, session_state) . await ?;
543+ let plan = self . create_initial_plan ( lp, session_state, ctx ) . await ?;
523544 Ok ( ( idx, plan) ) as Result < _ >
524545 } )
525546 . collect :: < Vec < _ > > ( ) ;
@@ -548,6 +569,7 @@ impl DefaultPhysicalPlanner {
548569 & ' a self ,
549570 logical_plan : & ' a LogicalPlan ,
550571 session_state : & ' a SessionState ,
572+ ctx : Option < & ' a String > ,
551573 ) -> BoxFuture < ' a , Result < Arc < dyn ExecutionPlan > > > {
552574 async move {
553575 let exec_plan: Result < Arc < dyn ExecutionPlan > > = match logical_plan {
@@ -572,7 +594,7 @@ impl DefaultPhysicalPlanner {
572594 single_file_output,
573595 copy_options,
574596 } ) => {
575- let input_exec = self . create_initial_plan ( input, session_state) . await ?;
597+ let input_exec = self . create_initial_plan ( input, session_state, ctx ) . await ?;
576598 let parsed_url = ListingTableUrl :: parse ( output_url) ?;
577599 let object_store_url = parsed_url. object_store ( ) ;
578600
@@ -620,7 +642,7 @@ impl DefaultPhysicalPlanner {
620642 let name = table_name. table ( ) ;
621643 let schema = session_state. schema_for_ref ( table_name) ?;
622644 if let Some ( provider) = schema. table ( name) . await {
623- let input_exec = self . create_initial_plan ( input, session_state) . await ?;
645+ let input_exec = self . create_initial_plan ( input, session_state, ctx ) . await ?;
624646 provider. insert_into ( session_state, input_exec, false ) . await
625647 } else {
626648 return exec_err ! (
@@ -637,7 +659,7 @@ impl DefaultPhysicalPlanner {
637659 let name = table_name. table ( ) ;
638660 let schema = session_state. schema_for_ref ( table_name) ?;
639661 if let Some ( provider) = schema. table ( name) . await {
640- let input_exec = self . create_initial_plan ( input, session_state) . await ?;
662+ let input_exec = self . create_initial_plan ( input, session_state, ctx ) . await ?;
641663 provider. insert_into ( session_state, input_exec, true ) . await
642664 } else {
643665 return exec_err ! (
@@ -678,7 +700,7 @@ impl DefaultPhysicalPlanner {
678700 ) ;
679701 }
680702
681- let input_exec = self . create_initial_plan ( input, session_state) . await ?;
703+ let input_exec = self . create_initial_plan ( input, session_state, ctx ) . await ?;
682704
683705 // at this moment we are guaranteed by the logical planner
684706 // to have all the window_expr to have equal sort key
@@ -774,7 +796,7 @@ impl DefaultPhysicalPlanner {
774796 ..
775797 } ) => {
776798 // Initially need to perform the aggregate and then merge the partitions
777- let input_exec = self . create_initial_plan ( input, session_state) . await ?;
799+ let input_exec = self . create_initial_plan ( input, session_state, ctx ) . await ?;
778800 let physical_input_schema = input_exec. schema ( ) ;
779801 let logical_input_schema = input. as_ref ( ) . schema ( ) ;
780802
@@ -848,7 +870,7 @@ impl DefaultPhysicalPlanner {
848870 ) ?) )
849871 }
850872 LogicalPlan :: Projection ( Projection { input, expr, .. } ) => {
851- let input_exec = self . create_initial_plan ( input, session_state) . await ?;
873+ let input_exec = self . create_initial_plan ( input, session_state, ctx ) . await ?;
852874 let input_schema = input. as_ref ( ) . schema ( ) ;
853875
854876 let physical_exprs = expr
@@ -900,7 +922,7 @@ impl DefaultPhysicalPlanner {
900922 ) ?) )
901923 }
902924 LogicalPlan :: Filter ( filter) => {
903- let physical_input = self . create_initial_plan ( & filter. input , session_state) . await ?;
925+ let physical_input = self . create_initial_plan ( & filter. input , session_state, ctx ) . await ?;
904926 let input_schema = physical_input. as_ref ( ) . schema ( ) ;
905927 let input_dfschema = filter. input . schema ( ) ;
906928
@@ -914,16 +936,16 @@ impl DefaultPhysicalPlanner {
914936 let filter = FilterExec :: try_new ( runtime_expr, physical_input) ?;
915937 Ok ( Arc :: new ( filter. with_default_selectivity ( selectivity) ?) )
916938 }
917- LogicalPlan :: Union ( Union { inputs, .. } ) => {
918- let physical_plans = self . create_initial_plan_multi ( inputs. iter ( ) . map ( |lp| lp. as_ref ( ) ) , session_state) . await ?;
939+ LogicalPlan :: Union ( Union { inputs, schema } ) => {
940+ let physical_plans = self . create_initial_plan_multi ( inputs. iter ( ) . map ( |lp| lp. as_ref ( ) ) , session_state, ctx ) . await ?;
919941
920942 Ok ( Arc :: new ( UnionExec :: new ( physical_plans) ) )
921943 }
922944 LogicalPlan :: Repartition ( Repartition {
923945 input,
924946 partitioning_scheme,
925947 } ) => {
926- let physical_input = self . create_initial_plan ( input, session_state) . await ?;
948+ let physical_input = self . create_initial_plan ( input, session_state, ctx ) . await ?;
927949 let input_schema = physical_input. schema ( ) ;
928950 let input_dfschema = input. as_ref ( ) . schema ( ) ;
929951 let physical_partitioning = match partitioning_scheme {
@@ -954,7 +976,7 @@ impl DefaultPhysicalPlanner {
954976 ) ?) )
955977 }
956978 LogicalPlan :: Sort ( Sort { expr, input, fetch, .. } ) => {
957- let physical_input = self . create_initial_plan ( input, session_state) . await ?;
979+ let physical_input = self . create_initial_plan ( input, session_state, ctx ) . await ?;
958980 let input_schema = physical_input. as_ref ( ) . schema ( ) ;
959981 let input_dfschema = input. as_ref ( ) . schema ( ) ;
960982 let sort_expr = expr
@@ -1045,12 +1067,12 @@ impl DefaultPhysicalPlanner {
10451067 } ;
10461068
10471069 return self
1048- . create_initial_plan ( & join_plan, session_state)
1070+ . create_initial_plan ( & join_plan, session_state, ctx )
10491071 . await ;
10501072 }
10511073
10521074 // All equi-join keys are columns now, create physical join plan
1053- let left_right = self . create_initial_plan_multi ( [ left. as_ref ( ) , right. as_ref ( ) ] , session_state) . await ?;
1075+ let left_right = self . create_initial_plan_multi ( [ left. as_ref ( ) , right. as_ref ( ) ] , session_state, ctx ) . await ?;
10541076 let [ physical_left, physical_right] : [ Arc < dyn ExecutionPlan > ; 2 ] = left_right. try_into ( ) . map_err ( |_| DataFusionError :: Internal ( "`create_initial_plan_multi` is broken" . to_string ( ) ) ) ?;
10551077 let left_df_schema = left. schema ( ) ;
10561078 let right_df_schema = right. schema ( ) ;
@@ -1185,7 +1207,7 @@ impl DefaultPhysicalPlanner {
11851207 }
11861208 }
11871209 LogicalPlan :: CrossJoin ( CrossJoin { left, right, .. } ) => {
1188- let left_right = self . create_initial_plan_multi ( [ left. as_ref ( ) , right. as_ref ( ) ] , session_state) . await ?;
1210+ let left_right = self . create_initial_plan_multi ( [ left. as_ref ( ) , right. as_ref ( ) ] , session_state, ctx ) . await ?;
11891211 let [ left, right] : [ Arc < dyn ExecutionPlan > ; 2 ] = left_right. try_into ( ) . map_err ( |_| DataFusionError :: Internal ( "`create_initial_plan_multi` is broken" . to_string ( ) ) ) ?;
11901212 Ok ( Arc :: new ( CrossJoinExec :: new ( left, right) ) )
11911213 }
@@ -1203,10 +1225,10 @@ impl DefaultPhysicalPlanner {
12031225 SchemaRef :: new ( schema. as_ref ( ) . to_owned ( ) . into ( ) ) ,
12041226 ) ) ) ,
12051227 LogicalPlan :: SubqueryAlias ( SubqueryAlias { input, .. } ) => {
1206- self . create_initial_plan ( input, session_state) . await
1228+ self . create_initial_plan ( input, session_state, ctx ) . await
12071229 }
12081230 LogicalPlan :: Limit ( Limit { input, skip, fetch, .. } ) => {
1209- let input = self . create_initial_plan ( input, session_state) . await ?;
1231+ let input = self . create_initial_plan ( input, session_state, ctx ) . await ?;
12101232
12111233 // GlobalLimitExec requires a single partition for input
12121234 let input = if input. output_partitioning ( ) . partition_count ( ) == 1 {
@@ -1224,7 +1246,7 @@ impl DefaultPhysicalPlanner {
12241246 Ok ( Arc :: new ( GlobalLimitExec :: new ( input, * skip, * fetch) ) )
12251247 }
12261248 LogicalPlan :: Unnest ( Unnest { input, column, schema, options } ) => {
1227- let input = self . create_initial_plan ( input, session_state) . await ?;
1249+ let input = self . create_initial_plan ( input, session_state, ctx ) . await ?;
12281250 let column_exec = schema. index_of_column ( column)
12291251 . map ( |idx| Column :: new ( & column. name , idx) ) ?;
12301252 let schema = SchemaRef :: new ( schema. as_ref ( ) . to_owned ( ) . into ( ) ) ;
@@ -1277,7 +1299,7 @@ impl DefaultPhysicalPlanner {
12771299 "Unsupported logical plan: Analyze must be root of the plan"
12781300 ) ,
12791301 LogicalPlan :: Extension ( e) => {
1280- let physical_inputs = self . create_initial_plan_multi ( e. node . inputs ( ) , session_state) . await ?;
1302+ let physical_inputs = self . create_initial_plan_multi ( e. node . inputs ( ) , session_state, ctx ) . await ?;
12811303
12821304 let mut maybe_plan = None ;
12831305 for planner in & self . extension_planners {
@@ -1313,13 +1335,19 @@ impl DefaultPhysicalPlanner {
13131335 Ok ( plan)
13141336 }
13151337 }
1338+ // LogicalPlan::SubqueryAlias(SubqueryAlias())
13161339 LogicalPlan :: RecursiveQuery ( RecursiveQuery { name, static_term, recursive_term, is_distinct } ) => {
1317- let static_term = self . create_initial_plan ( static_term, session_state) . await ?;
1318- let recursive_term = self . create_initial_plan ( recursive_term, session_state) . await ?;
1340+ let name = format ! ( "{}-{}" , name, new_recursive_cte_physical_plan_branch_number( ) ) ;
1341+
1342+ let ctx = Some ( & name) ;
1343+
1344+ let static_term = self . create_initial_plan ( static_term, session_state, ctx) . await ?;
1345+ let recursive_term = self . create_initial_plan ( recursive_term, session_state, ctx) . await ?;
13191346
13201347 Ok ( Arc :: new ( RecursiveQueryExec :: new ( name. clone ( ) , static_term, recursive_term, * is_distinct) ) )
13211348 }
1322- LogicalPlan :: NamedRelation ( NamedRelation { name, schema} ) => {
1349+ LogicalPlan :: NamedRelation ( NamedRelation { schema, ..} ) => {
1350+ let name = ctx. expect ( "NamedRelation must have a context that contains the recursive query's branch name" ) ;
13231351 // Named relations is how we represent access to any sort of dynamic data provider. They
13241352 // differ from tables in the sense that they can start existing dynamically during the
13251353 // execution of a query and then disappear before it even finishes.
@@ -1866,6 +1894,8 @@ impl DefaultPhysicalPlanner {
18661894 logical_plan : & LogicalPlan ,
18671895 session_state : & SessionState ,
18681896 ) -> Result < Option < Arc < dyn ExecutionPlan > > > {
1897+ reset_recursive_cte_physical_plan_branch_number ( ) ;
1898+
18691899 if let LogicalPlan :: Explain ( e) = logical_plan {
18701900 use PlanType :: * ;
18711901 let mut stringified_plans = vec ! [ ] ;
@@ -1881,7 +1911,7 @@ impl DefaultPhysicalPlanner {
18811911
18821912 if !config. logical_plan_only && e. logical_optimization_succeeded {
18831913 match self
1884- . create_initial_plan ( e. plan . as_ref ( ) , session_state)
1914+ . create_initial_plan ( e. plan . as_ref ( ) , session_state, None )
18851915 . await
18861916 {
18871917 Ok ( input) => {
0 commit comments