@@ -28,7 +28,7 @@ use arrow::array::types::{
2828    ArrowTimestampType ,  TimestampMicrosecondType ,  TimestampMillisecondType , 
2929    TimestampNanosecondType ,  TimestampSecondType , 
3030} ; 
31- use  arrow:: array:: { Array ,  PrimitiveArray } ; 
31+ use  arrow:: array:: { Array ,  ArrayRef ,   Int64Array ,   PrimitiveArray } ; 
3232use  arrow:: datatypes:: DataType :: { self ,  Null ,  Timestamp ,  Utf8 ,  Utf8View } ; 
3333use  arrow:: datatypes:: TimeUnit :: { self ,  Microsecond ,  Millisecond ,  Nanosecond ,  Second } ; 
3434use  datafusion_common:: cast:: as_primitive_array; 
@@ -60,6 +60,8 @@ use chrono::{
6060    - hour / HOUR 
6161    - minute / MINUTE 
6262    - second / SECOND 
63+     - millisecond / MILLISECOND 
64+     - microsecond / MICROSECOND 
6365"# 
6466    ) ,  
6567    argument(  
@@ -185,6 +187,26 @@ impl ScalarUDFImpl for DateTruncFunc {
185187        )  -> Result < ColumnarValue >  { 
186188            let  parsed_tz = parse_tz ( tz_opt) ?; 
187189            let  array = as_primitive_array :: < T > ( array) ?; 
190+ 
191+             // fast path for fine granularities 
192+             if  matches ! ( 
193+                 granularity. as_str( ) , 
194+                 // For morden timezones, it's correct to truncate "minute" in this way. 
195+                 // Both datafusion and arrow are ignoring historical timezone's non-minute granularity 
196+                 // bias (e.g., Asia/Kathmandu before 1919 is UTC+05:41:16). 
197+                 "second"  | "minute"  | "millisecond"  | "microsecond" 
198+             )  ||
199+             // In UTC, "hour" and "day" have uniform durations and can be truncated with simple arithmetic 
200+             ( parsed_tz. is_none ( )  && matches ! ( granularity. as_str( ) ,  "hour"  | "day" ) ) 
201+             { 
202+                 let  result = general_date_trunc_array_fine_granularity ( 
203+                     T :: UNIT , 
204+                     array, 
205+                     granularity. as_str ( ) , 
206+                 ) ?; 
207+                 return  Ok ( ColumnarValue :: Array ( result) ) ; 
208+             } 
209+ 
188210            let  array:  PrimitiveArray < T >  = array
189211                . try_unary ( |x| { 
190212                    general_date_trunc ( T :: UNIT ,  x,  parsed_tz,  granularity. as_str ( ) ) 
@@ -423,6 +445,55 @@ fn date_trunc_coarse(granularity: &str, value: i64, tz: Option<Tz>) -> Result<i6
423445    Ok ( value. unwrap ( ) ) 
424446} 
425447
448+ /// Fast path for fine granularities (hour and smaller) that can be handled 
449+ /// with simple arithmetic operations without calendar complexity. 
450+ /// 
451+ /// This function is timezone-agnostic and should only be used when: 
452+ /// - No timezone is specified in the input, OR 
453+ /// - The granularity is less than hour as hour can be affected by DST transitions in some cases 
454+ fn  general_date_trunc_array_fine_granularity < T :  ArrowTimestampType > ( 
455+     tu :  TimeUnit , 
456+     array :  & PrimitiveArray < T > , 
457+     granularity :  & str , 
458+ )  -> Result < ArrayRef >  { 
459+     let  unit = match  ( tu,  granularity)  { 
460+         ( Second ,  "minute" )  => Some ( Int64Array :: new_scalar ( 60 ) ) , 
461+         ( Second ,  "hour" )  => Some ( Int64Array :: new_scalar ( 3600 ) ) , 
462+         ( Second ,  "day" )  => Some ( Int64Array :: new_scalar ( 86400 ) ) , 
463+ 
464+         ( Millisecond ,  "second" )  => Some ( Int64Array :: new_scalar ( 1_000 ) ) , 
465+         ( Millisecond ,  "minute" )  => Some ( Int64Array :: new_scalar ( 60_000 ) ) , 
466+         ( Millisecond ,  "hour" )  => Some ( Int64Array :: new_scalar ( 3_600_000 ) ) , 
467+         ( Millisecond ,  "day" )  => Some ( Int64Array :: new_scalar ( 86_400_000 ) ) , 
468+ 
469+         ( Microsecond ,  "millisecond" )  => Some ( Int64Array :: new_scalar ( 1_000 ) ) , 
470+         ( Microsecond ,  "second" )  => Some ( Int64Array :: new_scalar ( 1_000_000 ) ) , 
471+         ( Microsecond ,  "minute" )  => Some ( Int64Array :: new_scalar ( 60_000_000 ) ) , 
472+         ( Microsecond ,  "hour" )  => Some ( Int64Array :: new_scalar ( 3_600_000_000 ) ) , 
473+         ( Microsecond ,  "day" )  => Some ( Int64Array :: new_scalar ( 86_400_000_000 ) ) , 
474+ 
475+         ( Nanosecond ,  "microsecond" )  => Some ( Int64Array :: new_scalar ( 1_000 ) ) , 
476+         ( Nanosecond ,  "millisecond" )  => Some ( Int64Array :: new_scalar ( 1_000_000 ) ) , 
477+         ( Nanosecond ,  "second" )  => Some ( Int64Array :: new_scalar ( 1_000_000_000 ) ) , 
478+         ( Nanosecond ,  "minute" )  => Some ( Int64Array :: new_scalar ( 60_000_000_000 ) ) , 
479+         ( Nanosecond ,  "hour" )  => Some ( Int64Array :: new_scalar ( 3_600_000_000_000 ) ) , 
480+         ( Nanosecond ,  "day" )  => Some ( Int64Array :: new_scalar ( 86_400_000_000_000 ) ) , 
481+         _ => None , 
482+     } ; 
483+ 
484+     if  let  Some ( unit)  = unit { 
485+         let  original_type = array. data_type ( ) ; 
486+         let  array = arrow:: compute:: cast ( array,  & DataType :: Int64 ) ?; 
487+         let  array = arrow:: compute:: kernels:: numeric:: div ( & array,  & unit) ?; 
488+         let  array = arrow:: compute:: kernels:: numeric:: mul ( & array,  & unit) ?; 
489+         let  array = arrow:: compute:: cast ( & array,  original_type) ?; 
490+         Ok ( array) 
491+     }  else  { 
492+         // truncate to the same or smaller unit 
493+         Ok ( Arc :: new ( array. clone ( ) ) ) 
494+     } 
495+ } 
496+ 
426497// truncates a single value with the given timeunit to the specified granularity 
427498fn  general_date_trunc ( 
428499    tu :  TimeUnit , 
@@ -884,6 +955,21 @@ mod tests {
884955                    "2018-11-04T02:00:00-02" , 
885956                ] , 
886957            ) , 
958+             ( 
959+                 vec![ 
960+                     "2024-10-26T23:30:00Z" , 
961+                     "2024-10-27T00:30:00Z" , 
962+                     "2024-10-27T01:30:00Z" , 
963+                     "2024-10-27T02:30:00Z" , 
964+                 ] , 
965+                 Some ( "Asia/Kathmandu" . into( ) ) ,  // UTC+5:45 
966+                 vec![ 
967+                     "2024-10-27T05:00:00+05:45" , 
968+                     "2024-10-27T06:00:00+05:45" , 
969+                     "2024-10-27T07:00:00+05:45" , 
970+                     "2024-10-27T08:00:00+05:45" , 
971+                 ] , 
972+             ) , 
887973        ] ; 
888974
889975        cases. iter ( ) . for_each ( |( original,  tz_opt,  expected) | { 
0 commit comments