3
3
#![ allow( clippy:: needless_return) ]
4
4
5
5
use crate :: float:: Float ;
6
- use crate :: int:: { CastInto , DInt , HInt , Int } ;
6
+ use crate :: int:: { CastInto , DInt , HInt , Int , MinInt } ;
7
+
8
+ use super :: HalfRep ;
7
9
8
10
fn div32 < F : Float > ( a : F , b : F ) -> F
9
11
where
37
39
let quiet_bit = implicit_bit >> 1 ;
38
40
let qnan_rep = exponent_mask | quiet_bit;
39
41
42
+ // #[inline(always)]
43
+ // fn negate<T: Int>(a: T) -> T {
44
+ // T::wrapping_neg(a.signe)
45
+ // }
46
+
40
47
#[ inline( always) ]
41
48
fn negate_u32 ( a : u32 ) -> u32 {
42
49
( <i32 >:: wrapping_neg ( a as i32 ) ) as u32
@@ -459,10 +466,14 @@ where
459
466
i32 : CastInto < F :: Int > ,
460
467
F :: Int : CastInto < i32 > ,
461
468
u64 : CastInto < F :: Int > ,
469
+ u64 : CastInto < HalfRep < F > > ,
470
+ F :: Int : CastInto < HalfRep < F > > ,
471
+ F :: Int : From < HalfRep < F > > ,
472
+ F :: Int : From < u8 > ,
462
473
F :: Int : CastInto < u64 > ,
463
474
i64 : CastInto < F :: Int > ,
464
475
F :: Int : CastInto < i64 > ,
465
- F :: Int : HInt ,
476
+ F :: Int : HInt + DInt ,
466
477
{
467
478
const NUMBER_OF_HALF_ITERATIONS : usize = 3 ;
468
479
const NUMBER_OF_FULL_ITERATIONS : usize = 1 ;
@@ -471,7 +482,7 @@ where
471
482
let one = F :: Int :: ONE ;
472
483
let zero = F :: Int :: ZERO ;
473
484
let hw = F :: BITS / 2 ;
474
- let lo_mask = u64 :: MAX >> hw;
485
+ let lo_mask = F :: Int :: MAX >> hw;
475
486
476
487
let significand_bits = F :: SIGNIFICAND_BITS ;
477
488
let max_exponent = F :: EXPONENT_MAX ;
@@ -616,21 +627,23 @@ where
616
627
617
628
let mut x_uq0 = if NUMBER_OF_HALF_ITERATIONS > 0 {
618
629
// Starting with (n-1) half-width iterations
619
- let b_uq1_hw: u32 =
620
- ( CastInto :: < u64 > :: cast ( b_significand) >> ( significand_bits + 1 - hw) ) as u32 ;
630
+ let b_uq1_hw: HalfRep < F > = CastInto :: < HalfRep < F > > :: cast (
631
+ CastInto :: < u64 > :: cast ( b_significand) >> ( significand_bits + 1 - hw) ,
632
+ ) ;
621
633
622
634
// C is (3/4 + 1/sqrt(2)) - 1 truncated to W0 fractional bits as UQ0.HW
623
635
// with W0 being either 16 or 32 and W0 <= HW.
624
636
// That is, C is the aforementioned 3/4 + 1/sqrt(2) constant (from which
625
637
// b/2 is subtracted to obtain x0) wrapped to [0, 1) range.
626
638
627
639
// HW is at least 32. Shifting into the highest bits if needed.
628
- let c_hw = ( 0x7504F333_u64 as u32 ) . wrapping_shl ( hw. wrapping_sub ( 32 ) ) ;
640
+ let c_hw = ( CastInto :: < HalfRep < F > > :: cast ( 0x7504F333_u64 ) ) . wrapping_shl ( hw. wrapping_sub ( 32 ) ) ;
629
641
630
642
// b >= 1, thus an upper bound for 3/4 + 1/sqrt(2) - b/2 is about 0.9572,
631
643
// so x0 fits to UQ0.HW without wrapping.
632
- let x_uq0_hw: u32 = {
633
- let mut x_uq0_hw: u32 = c_hw. wrapping_sub ( b_uq1_hw /* exact b_hw/2 as UQ0.HW */ ) ;
644
+ let x_uq0_hw: HalfRep < F > = {
645
+ let mut x_uq0_hw: HalfRep < F > =
646
+ c_hw. wrapping_sub ( b_uq1_hw /* exact b_hw/2 as UQ0.HW */ ) ;
634
647
// dbg!(x_uq0_hw);
635
648
// An e_0 error is comprised of errors due to
636
649
// * x0 being an inherently imprecise first approximation of 1/b_hw
@@ -661,8 +674,9 @@ where
661
674
// no overflow occurred earlier: ((rep_t)x_UQ0_hw * b_UQ1_hw >> HW) is
662
675
// expected to be strictly positive because b_UQ1_hw has its highest bit set
663
676
// and x_UQ0_hw should be rather large (it converges to 1/2 < 1/b_hw <= 1).
664
- let corr_uq1_hw: u32 =
665
- 0 . wrapping_sub ( ( ( x_uq0_hw as u64 ) . wrapping_mul ( b_uq1_hw as u64 ) ) >> hw) as u32 ;
677
+ let corr_uq1_hw: HalfRep < F > = CastInto :: < HalfRep < F > > :: cast ( zero. wrapping_sub (
678
+ ( ( F :: Int :: from ( x_uq0_hw) ) . wrapping_mul ( F :: Int :: from ( b_uq1_hw) ) ) >> hw,
679
+ ) ) ;
666
680
// dbg!(corr_uq1_hw);
667
681
668
682
// Now, we should multiply UQ0.HW and UQ1.(HW-1) numbers, naturally
@@ -677,7 +691,9 @@ where
677
691
// The fact corr_UQ1_hw was virtually round up (due to result of
678
692
// multiplication being **first** truncated, then negated - to improve
679
693
// error estimations) can increase x_UQ0_hw by up to 2*Ulp of x_UQ0_hw.
680
- x_uq0_hw = ( ( x_uq0_hw as u64 ) . wrapping_mul ( corr_uq1_hw as u64 ) >> ( hw - 1 ) ) as u32 ;
694
+ x_uq0_hw = ( ( F :: Int :: from ( x_uq0_hw) ) . wrapping_mul ( F :: Int :: from ( corr_uq1_hw) )
695
+ >> ( hw - 1 ) )
696
+ . cast ( ) ;
681
697
// dbg!(x_uq0_hw);
682
698
// Now, either no overflow occurred or x_UQ0_hw is 0 or 1 in its half_rep_t
683
699
// representation. In the latter case, x_UQ0_hw will be either 0 or 1 after
@@ -707,7 +723,7 @@ where
707
723
// be not below that value (see g(x) above), so it is safe to decrement just
708
724
// once after the final iteration. On the other hand, an effective value of
709
725
// divisor changes after this point (from b_hw to b), so adjust here.
710
- x_uq0_hw. wrapping_sub ( 1_u32 )
726
+ x_uq0_hw. wrapping_sub ( HalfRep :: < F > :: ONE )
711
727
} ;
712
728
713
729
// Error estimations for full-precision iterations are calculated just
@@ -717,7 +733,7 @@ where
717
733
// Simulating operations on a twice_rep_t to perform a single final full-width
718
734
// iteration. Using ad-hoc multiplication implementations to take advantage
719
735
// of particular structure of operands.
720
- let blo: u64 = ( CastInto :: < u64 > :: cast ( b_uq1) ) & lo_mask;
736
+ let blo: F :: Int = b_uq1 & lo_mask;
721
737
// x_UQ0 = x_UQ0_hw * 2^HW - 1
722
738
// x_UQ0 * b_UQ1 = (x_UQ0_hw * 2^HW) * (b_UQ1_hw * 2^HW + blo) - b_UQ1
723
739
//
@@ -726,19 +742,20 @@ where
726
742
// + [ x_UQ0_hw * blo ]
727
743
// - [ b_UQ1 ]
728
744
// = [ result ][.... discarded ...]
729
- let corr_uq1 = negate_u64 (
730
- ( x_uq0_hw as u64 ) * ( b_uq1_hw as u64 ) + ( ( ( x_uq0_hw as u64 ) * ( blo) ) >> hw) - 1 ,
731
- ) ; // account for *possible* carry
732
- let lo_corr = corr_uq1 & lo_mask;
733
- let hi_corr = corr_uq1 >> hw;
745
+ let corr_uq1: F :: Int = ( F :: Int :: from ( x_uq0_hw) * F :: Int :: from ( b_uq1_hw)
746
+ + ( ( F :: Int :: from ( x_uq0_hw) * blo) >> hw) )
747
+ . wrapping_sub ( one)
748
+ . wrapping_neg ( ) ; // account for *possible* carry
749
+ let lo_corr: F :: Int = corr_uq1 & lo_mask;
750
+ let hi_corr: F :: Int = corr_uq1 >> hw;
734
751
// x_UQ0 * corr_UQ1 = (x_UQ0_hw * 2^HW) * (hi_corr * 2^HW + lo_corr) - corr_UQ1
735
- let mut x_uq0: < F as Float > :: Int = ( ( ( ( x_uq0_hw as u64 ) * hi_corr) << 1 )
736
- . wrapping_add ( ( ( x_uq0_hw as u64 ) * lo_corr) >> ( hw - 1 ) )
737
- . wrapping_sub ( 2 ) )
738
- . cast ( ) ; // 1 to account for the highest bit of corr_UQ1 can be 1
739
- // 1 to account for possible carry
740
- // Just like the case of half-width iterations but with possibility
741
- // of overflowing by one extra Ulp of x_UQ0.
752
+ let mut x_uq0: F :: Int = ( ( F :: Int :: from ( x_uq0_hw) * hi_corr) << 1 )
753
+ . wrapping_add ( ( F :: Int :: from ( x_uq0_hw) * lo_corr) >> ( hw - 1 ) )
754
+ . wrapping_sub ( F :: Int :: from ( 2u8 ) ) ;
755
+ // 1 to account for the highest bit of corr_UQ1 can be 1
756
+ // 1 to account for possible carry
757
+ // Just like the case of half-width iterations but with possibility
758
+ // of overflowing by one extra Ulp of x_UQ0.
742
759
x_uq0 -= one;
743
760
// ... and then traditional fixup by 2 should work
744
761
@@ -755,8 +772,8 @@ where
755
772
x_uq0
756
773
} else {
757
774
// C is (3/4 + 1/sqrt(2)) - 1 truncated to 64 fractional bits as UQ0.n
758
- let c: < F as Float > :: Int = ( 0x7504F333 << ( F :: BITS - 32 ) ) . cast ( ) ;
759
- let x_uq0: < F as Float > :: Int = c. wrapping_sub ( b_uq1) ;
775
+ let c: F :: Int = ( 0x7504F333 << ( F :: BITS - 32 ) ) . cast ( ) ;
776
+ let x_uq0: F :: Int = c. wrapping_sub ( b_uq1) ;
760
777
// E_0 <= 3/4 - 1/sqrt(2) + 2 * 2^-64
761
778
x_uq0
762
779
} ;
@@ -799,14 +816,27 @@ where
799
816
800
817
// Add 2 to U_N due to final decrement.
801
818
802
- let reciprocal_precision: <F as Float >:: Int = 220 . cast ( ) ;
819
+ let reciprocal_precision: F :: Int = if F :: BITS == 32
820
+ && NUMBER_OF_HALF_ITERATIONS == 2
821
+ && NUMBER_OF_FULL_ITERATIONS == 1
822
+ {
823
+ 74 . cast ( )
824
+ } else if F :: BITS == 32 && NUMBER_OF_HALF_ITERATIONS == 0 && NUMBER_OF_FULL_ITERATIONS == 3 {
825
+ 10 . cast ( )
826
+ } else if F :: BITS == 64 && NUMBER_OF_HALF_ITERATIONS == 3 && NUMBER_OF_FULL_ITERATIONS == 1 {
827
+ 220 . cast ( )
828
+ } else if F :: BITS == 128 && NUMBER_OF_HALF_ITERATIONS == 4 && NUMBER_OF_FULL_ITERATIONS == 1 {
829
+ 13922 . cast ( )
830
+ } else {
831
+ panic ! ( "invalid iterations for the specified bits" ) ;
832
+ } ;
803
833
804
834
// Suppose 1/b - P * 2^-W < x < 1/b + P * 2^-W
805
835
let x_uq0 = x_uq0 - reciprocal_precision;
806
836
// Now 1/b - (2*P) * 2^-W < x < 1/b
807
837
// FIXME Is x_UQ0 still >= 0.5?
808
838
809
- let mut quotient: < F as Float > :: Int = x_uq0. widen_mul ( a_significand << 1 ) . hi ( ) ;
839
+ let mut quotient: F :: Int = x_uq0. widen_mul ( a_significand << 1 ) . hi ( ) ;
810
840
// Now, a/b - 4*P * 2^-W < q < a/b for q=<quotient_UQ1:dummy> in UQ1.(SB+1+W).
811
841
812
842
// quotient_UQ1 is in [0.5, 2.0) as UQ1.(SB+1),
@@ -914,13 +944,8 @@ intrinsics! {
914
944
div64( a, b)
915
945
}
916
946
917
- // TODO: how should `HInt` be handled?
918
947
pub extern "C" fn __divtf3( a: f128, b: f128) -> f128 {
919
- if cfg!( target_pointer_width = "64" ) {
920
- div32( a, b)
921
- } else {
922
- div64( a, b)
923
- }
948
+ div64( a, b)
924
949
}
925
950
926
951
#[ cfg( target_arch = "arm" ) ]
0 commit comments