diff --git a/numerics/cbrt.cpp b/numerics/cbrt.cpp index 2dcc001021..383f73d8ec 100644 --- a/numerics/cbrt.cpp +++ b/numerics/cbrt.cpp @@ -11,6 +11,7 @@ #include "glog/logging.h" #include "numerics/double_precision.hpp" #include "numerics/fma.hpp" +#include "numerics/osaca.hpp" // 🧙 For OSACA_*. #include "quantities/elementary_functions.hpp" namespace principia { @@ -22,6 +23,24 @@ using namespace principia::numerics::_double_precision; using namespace principia::numerics::_fma; using namespace principia::quantities::_elementary_functions; +#define OSACA_ANALYSED_FUNCTION Cbrt +#define OSACA_ANALYSED_FUNCTION_NAMESPACE method_5²Z4¹FMA:: +#define OSACA_ANALYSED_FUNCTION_TEMPLATE_PARAMETERS +#define UNDER_OSACA_HYPOTHESES(expression) \ + [&] { \ + constexpr double y = 3; \ + constexpr double abs_y = y == 0 ? 0 : y > 0 ? y : -y; \ + /* Non-constexpr values have to be taken by reference (and must not be */ \ + /* used). */ \ + constexpr auto CorrectionPossiblyNeeded = [](double const& r₀, \ + double const& r₁, \ + double const& r̃, \ + double const τ) -> bool { \ + return false; \ + }; \ + return expression; \ + }() + // The computations in this file are described in documentation/cbrt.pdf; the // identifiers match the notation in that document. @@ -133,31 +152,32 @@ static_assert(σ₁⁻³ * y₁ == y₂); static_assert(σ₂⁻³ * y₂ == y₁); template -double Cbrt(double const y) { +double Cbrt(double y) { + OSACA_FUNCTION_BEGIN(y, ); __m128d Y_0 = _mm_set_sd(y); __m128d const sign = _mm_and_pd(masks::sign_bit, Y_0); Y_0 = _mm_andnot_pd(masks::sign_bit, Y_0); double const abs_y = _mm_cvtsd_f64(Y_0); - if (y != y) { + OSACA_IF(y != y) { // The usual logic will produce a qNaN when given a NaN, but will not // preserve the payload and will signal overflows (q will be a nonsensical // large value, and q³ will overflow). Further, the rescaling comparisons // will signal the invalid operation exception for quiet NaNs (although that // would be easy to work around using the unordered compare intrinsics). - return y + y; + OSACA_RETURN(y + y); } - if (abs_y < y₁) { - if (abs_y == 0) { - return y; + OSACA_IF(abs_y < y₁) { + OSACA_IF(abs_y == 0) { + OSACA_RETURN(y); } return Cbrt(y * σ₁⁻³) * σ₁; - } else if (abs_y > y₂) { + } OSACA_ELSE_IF(abs_y > y₂) { if (abs_y == std::numeric_limits::infinity()) { - return y; + OSACA_RETURN(y); } - return Cbrt(y * σ₂⁻³) * σ₂; + OSACA_RETURN(Cbrt(y * σ₂⁻³) * σ₂); } // Step 1. The constant Γʟ²ᴄ is the result of Canon optimization for step 2. @@ -197,12 +217,12 @@ double Cbrt(double const y) { double const r₁ = x_sign_y - r₀ - Δ; double const r̃ = r₀ + 2 * r₁; - if (rounding == Rounding::Correct && - CorrectionPossiblyNeeded(r₀, r₁, r̃, /*τ=*/0x1.7C73DBBD9FA60p-66)) { - return _mm_cvtsd_f64(_mm_or_pd( - _mm_set_sd(CorrectLastBit(abs_y, std::abs(r₀), std::abs(r̃))), sign)); + OSACA_IF(rounding == Rounding::Correct && + CorrectionPossiblyNeeded(r₀, r₁, r̃, /*τ=*/0x1.7C73DBBD9FA60p-66)) { + OSACA_RETURN(_mm_cvtsd_f64(_mm_or_pd( + _mm_set_sd(CorrectLastBit(abs_y, std::abs(r₀), std::abs(r̃))), sign))); } - return r₀; + OSACA_RETURN(r₀); } template double Cbrt(double y); template double Cbrt(double y); @@ -227,31 +247,32 @@ static_assert(σ₁⁻³ * std::numeric_limits::denorm_min() > y₁); static_assert(σ₂⁻³ * std::numeric_limits::max() < y₂); template -double Cbrt(double const y) { +double Cbrt(double y) { + OSACA_FUNCTION_BEGIN(y, ); __m128d Y_0 = _mm_set_sd(y); __m128d const sign = _mm_and_pd(masks::sign_bit, Y_0); Y_0 = _mm_andnot_pd(masks::sign_bit, Y_0); double const abs_y = _mm_cvtsd_f64(Y_0); - if (y != y) { + OSACA_IF(y != y) { // The usual logic will produce a qNaN when given a NaN, but will not // preserve the payload and will signal overflows (q will be a nonsensical // large value, and q³ will overflow). Further, the rescaling comparisons // will signal the invalid operation exception for quiet NaNs (although that // would be easy to work around using the unordered compare intrinsics). - return y + y; + OSACA_RETURN(y + y); } - if (abs_y < y₁) { - if (abs_y == 0) { - return y; + OSACA_IF(abs_y < y₁) { + OSACA_IF(abs_y == 0) { + OSACA_RETURN(y); } - return Cbrt(y * σ₁⁻³) * σ₁; - } else if (abs_y > y₂) { - if (abs_y == std::numeric_limits::infinity()) { - return y; + OSACA_RETURN(Cbrt(y * σ₁⁻³) * σ₁); + } OSACA_ELSE_IF(abs_y > y₂) { + OSACA_IF(abs_y == std::numeric_limits::infinity()) { + OSACA_RETURN(y); } - return Cbrt(y * σ₂⁻³) * σ₂; + OSACA_RETURN(Cbrt(y * σ₂⁻³) * σ₂); } // Step 1. The constant Γᴋ minimizes the error of q, as in [KB01], rather @@ -299,12 +320,12 @@ double Cbrt(double const y) { double const r₁ = FusedNegatedMultiplyAdd(Δ₁, Δ₂, x_sign_y - r₀); double const r̃ = r₀ + 2 * r₁; - if (rounding == Rounding::Correct && - CorrectionPossiblyNeeded(r₀, r₁, r̃, /*τ=*/0x1.E45E16EF5480Fp-76)) { - return _mm_cvtsd_f64(_mm_or_pd( - _mm_set_sd(CorrectLastBit(abs_y, std::abs(r₀), std::abs(r̃))), sign)); + OSACA_IF(rounding == Rounding::Correct && + CorrectionPossiblyNeeded(r₀, r₁, r̃, /*τ=*/0x1.E45E16EF5480Fp-76)) { + OSACA_RETURN(_mm_cvtsd_f64(_mm_or_pd( + _mm_set_sd(CorrectLastBit(abs_y, std::abs(r₀), std::abs(r̃))), sign))); } - return r₀; + OSACA_RETURN(r₀); } template double Cbrt(double y); template double Cbrt(double y); diff --git a/numerics/numerics.vcxproj b/numerics/numerics.vcxproj index 0de586e31a..ec75e0052d 100644 --- a/numerics/numerics.vcxproj +++ b/numerics/numerics.vcxproj @@ -81,6 +81,7 @@ + diff --git a/numerics/osaca.hpp b/numerics/osaca.hpp new file mode 100644 index 0000000000..3d5b032bcf --- /dev/null +++ b/numerics/osaca.hpp @@ -0,0 +1,203 @@ +#pragma once + +#define PRINCIPIA_USE_OSACA 0 + +// The macros OSACA_FUNCTION_BEGIN and OSACA_RETURN are used to analyse the +// latency of a double -> double function, as measured, e.g., by the +// nanobenchmarks; this notionally corresponds to the duration of an iteration +// of a loop `x = f(x)`. +// The latency-critical path of the function is reported as the loop-carried +// dependency by OSACA, and as the critical path by IACA in throughput analysis +// mode. +// OSACA and IACA are only suitable for benchmarking a single path. Any if +// statements, including in inline functions called by the function being +// analysed, should be replaced with OSACA_IF. The path should be determined by +// providing any necessary constexpr expressions in UNDER_OSACA_HYPOTHESES. +// If OSACA_EVALUATE_CONDITIONS is set to 1, code will be generated to evaluate +// the conditions for the branches taken (outside the loop-carried path, as they +// would be with correct branch prediction). +// OSACA_EVALUATE_CONDITIONS can be set to 0 to get a more streamlined +// dependency graph when the conditions are irrelevant. Note that this can +// result in artificially low port pressures. +#if 0 +// Usage: + double f(double const x) { + double const abs_x = std::abs(x); + if (abs_x < 0.1) { + return x; + } else if (x < 0) { + return -f_positive(abs_x); + } else { + return f_positive(abs_x); + } + } + double f_positive(double const abs_x) { + if (abs_x > 3) { + return 1; + } else { + // … + } + } +// becomes: + double f(double x) { // The argument cannot be const. + OSACA_FUNCTION_BEGIN(x); + double const abs_x = std::abs(x); + OSACA_IF(abs_x < 0.1) { + OSACA_RETURN(x); + } OSACA_ELSE_IF(x < 0) { // `else OSACA_IF` works, but upsets the linter. + OSACA_RETURN(-f_positive(abs_x)); + } else { + OSACA_RETURN(f_positive(abs_x)); + } + } + // Other functions can have const arguments and return normally, but need to + // use OSACA_IF: + double f_positive(double const abs_x) { + OSACA_IF(abs_x > 3) { + return 1; + } else { + // … + } + } +// To analyse it near x = 5: +# define OSACA_ANALYSED_FUNCTION f +# define OSACA_ANALYSED_FUNCTION_NAMESPACE +# define OSACA_ANALYSED_FUNCTION_TEMPLATE_PARAMETERS +# define UNDER_OSACA_HYPOTHESES(expression) \ + [&] { \ + constexpr double x = 5; \ + /* To avoid inconsistent definitions, constexpr code can be used as */ \ + /* needed. */ \ + constexpr double abs_x = x > 0 ? x : -x; \ + return (expression); \ + }() + +// If multiple functions principia::ns1::f and principia::ns2::f are marked up +// for analysis in a translation unit, use +# define OSACA_ANALYSED_FUNCTION_NAMESPACE ns1:: +// If f is templatized as, e.g, `template`, use + OSACA_FUNCTION_BEGIN(x, ) +// and +# define OSACA_ANALYSED_FUNCTION_TEMPLATE_PARAMETERS \ + +#endif +// In principle, the loop-carried dependency for function latency analysis is +// achieved by wrapping the body of the function in an infinite loop, assigning +// the result to the argument at each iteration, and adding IACA markers outside +// the loop. There are some subtleties: +// — We need to trick the compiler into believing the loop is finite, so that it +// doesn’t optimize away the end marker or even the function. This is +// achieved by exiting based on the value of `OSACA_loop_terminator`. +// — Return statements may be in if statements, and there may be several of +// them, so they cannot be the end of a loop started unconditionally. Instead +// we loop with goto. +// — We need to prevent the compiler from moving the start and end markers into +// the middle of register saving and restoring code, which would mess up the +// dependency analysis. This is done with additional conditional gotos. +// — Some volatile reads and writes are used to clarify identity of the +// registers in the generated code (where the names of `OSACA_result` and, if +// `OSACA_CARRY_LOOP_THROUGH_REGISTER` is set to 0, `OSACA_loop_carry` appear +// in movsd instructions). +// +// Carrying the loop dependency through a memory load and store can make the +// dependency graph easier to understand, as it forces any usage of the input to +// depend on the initial movsd, with the loop carried by a single backward edge +// to that initial movsd. +// If the loop is carried through a register, multiple usages of the input may +// result in multiple back edges from the final instruction that computed the +// result. Carrying the loop through the memory could also potentially prevent +// the compiler from reusing intermediate values in the next iteration, e.g., if +// the the computation of f(x) depends on -x and produces -f(x) before f(x), as +// in an even function defined in terms of its positive half, the compiler might +// reuse -f(x₀)=-x₁ instead of computing -x₁ from x₁=f(x₀). However: +// — it adds a spurious move to the latency; +// — some tools (IACA) cannot see the dependency through memory. +// Set OSACA_CARRY_LOOP_THROUGH_REGISTER to 0 to carry the loop through memory. + +#define OSACA_EVALUATE_CONDITIONS 1 +#define OSACA_CARRY_LOOP_THROUGH_REGISTER 1 + +#if PRINCIPIA_USE_OSACA + +#include "intel/iacaMarks.h" + +#define OSACA_QUALIFIED_ANALYSED_FUNCTION \ + OSACA_ANALYSED_FUNCTION_NAMESPACE OSACA_ANALYSED_FUNCTION \ + OSACA_ANALYSED_FUNCTION_TEMPLATE_PARAMETERS + +static bool volatile OSACA_loop_terminator = false; + +#define OSACA_FUNCTION_BEGIN(arg, ...) \ + double OSACA_LOOP_CARRY_QUALIFIER OSACA_loop_carry = arg; \ + OSACA_outer_loop: \ + constexpr auto* OSACA_analysed_function_with_current_parameters = \ + &OSACA_ANALYSED_FUNCTION __VA_ARGS__; \ + if constexpr (std::string_view(__func__) == \ + STRINGIFY_EXPANSION(OSACA_ANALYSED_FUNCTION) && \ + OSACA_analysed_function_with_current_parameters == \ + &OSACA_QUALIFIED_ANALYSED_FUNCTION) { \ + IACA_VC64_START; \ + } \ + _Pragma("warning(push)"); \ + _Pragma("warning(disable : 4102)"); \ + OSACA_loop: \ + _Pragma("warning(pop)"); \ + arg = OSACA_loop_carry + +#define OSACA_RETURN(result) \ + do { \ + if constexpr (std::string_view(__func__) == \ + STRINGIFY_EXPANSION(OSACA_ANALYSED_FUNCTION) && \ + OSACA_analysed_function_with_current_parameters == \ + &OSACA_QUALIFIED_ANALYSED_FUNCTION) { \ + OSACA_loop_carry = (result); \ + if (!OSACA_loop_terminator) { \ + goto OSACA_loop; \ + } \ + double volatile OSACA_result = OSACA_loop_carry; \ + IACA_VC64_END; \ + /* The outer loop prevents the the start and end marker from being */ \ + /* interleaved with register saving and restoring moves. */ \ + if (!OSACA_loop_terminator) { \ + goto OSACA_outer_loop; \ + } \ + return OSACA_result; \ + } else { \ + return (result); \ + } \ + } while (false) + +#if OSACA_CARRY_LOOP_THROUGH_REGISTER +#define OSACA_LOOP_CARRY_QUALIFIER +#else +#define OSACA_LOOP_CARRY_QUALIFIER volatile +#endif + +// The branch not taken, determined by evaluating the condition +// `UNDER_OSACA_HYPOTHESES`, is eliminated by `if constexpr`; the condition is +// also compiled normally and assigned to a boolean; whether this results in any +// generated code depends on `OSACA_EVALUATE_CONDITIONS`. Note that, with +// `OSACA_EVALUATE_CONDITIONS`, in `OSACA_IF(p) { } OSACA_ELSE_IF(q) { }`, if +// `p` holds `UNDER_OSACA_HYPOTHESES`, code is generated to evaluate `p`, but +// not `q`. + +#define OSACA_IF(condition) \ + if constexpr (bool OSACA_CONDITION_QUALIFIER OSACA_computed_condition = \ + (condition); \ + UNDER_OSACA_HYPOTHESES(condition)) + +#if OSACA_EVALUATE_CONDITIONS +#define OSACA_CONDITION_QUALIFIER volatile +#else +#define OSACA_CONDITION_QUALIFIER +#endif + +#else // if !PRINCIPIA_USE_OSACA + +#define OSACA_FUNCTION_BEGIN(...) ((void) 0) +#define OSACA_RETURN(result) return (result) +#define OSACA_IF(condition) if (condition) + +#endif // PRINCIPIA_USE_OSACA + +#define OSACA_ELSE_IF else OSACA_IF // NOLINT diff --git a/numerics/sin_cos.cpp b/numerics/sin_cos.cpp index 9315b27e9e..45a2960185 100644 --- a/numerics/sin_cos.cpp +++ b/numerics/sin_cos.cpp @@ -11,10 +11,25 @@ #include "numerics/accurate_tables.mathematica.h" #include "numerics/double_precision.hpp" #include "numerics/fma.hpp" +#include "numerics/osaca.hpp" // 🧙 For OSACA_*. #include "numerics/polynomial_evaluators.hpp" #include "quantities/elementary_functions.hpp" -#define OSACA_ANALYSED_FUNCTION + +namespace principia { +namespace numerics { +namespace _sin_cos { +namespace internal { + +using namespace principia::numerics::_accurate_tables; +using namespace principia::numerics::_double_precision; +using namespace principia::numerics::_fma; +using namespace principia::numerics::_polynomial_evaluators; +using namespace principia::quantities::_elementary_functions; + +#define OSACA_ANALYSED_FUNCTION Cos +#define OSACA_ANALYSED_FUNCTION_NAMESPACE +#define OSACA_ANALYSED_FUNCTION_TEMPLATE_PARAMETERS #define UNDER_OSACA_HYPOTHESES(expression) \ [&] { \ constexpr bool UseHardwareFMA = true; \ @@ -39,201 +54,6 @@ return expression; \ }() -#if defined(OSACA_ANALYSED_FUNCTION) -#define PRINCIPIA_USE_OSACA !PRINCIPIA_MACRO_IS_EMPTY(OSACA_ANALYSED_FUNCTION) -#endif - -#if PRINCIPIA_USE_OSACA - -#include "intel/iacaMarks.h" - -// The macros OSACA_FUNCTION_BEGIN and OSACA_RETURN are used to analyse the -// latency of a double -> double function, as measured, e.g., by the -// nanobenchmarks; this notionally corresponds to the duration of an iteration -// of a loop `x = f(x)`. -// The latency-critical path of the function is reported as the loop-carried -// dependency by OSACA, and as the critical path by IACA in throughput analysis -// mode. -// OSACA and IACA are only suitable for benchmarking a single path. Any if -// statements, including in inline functions called by the function being -// analysed, should be replaced with OSACA_IF. The path should be determined by -// providing any necessary constexpr expressions in UNDER_OSACA_HYPOTHESES. -// If OSACA_EVALUATE_CONDITIONS is set to 1, code will be generated to evaluate -// the conditions for the branches taken (outside the loop-carried path, as they -// would be with correct branch prediction). -// OSACA_EVALUATE_CONDITIONS can be set to 0 to get a more streamlined -// dependency graph when the conditions are irrelevant. Note that this can -// result in artificially low port pressures. -#if 0 -// Usage: - double f(double const x) { - double const abs_x = std::abs(x); - if (abs_x < 0.1) { - return x; - } else if (x < 0) { - return -f_positive(abs_x); - } else { - return f_positive(abs_x); - } - } - double f_positive(double const abs_x) { - if (abs_x > 3) { - return 1; - } else { - // … - } - } -// becomes: - double f(double x) { // The argument cannot be const. - OSACA_FUNCTION_BEGIN(x); - double const abs_x = std::abs(x); - OSACA_IF(abs_x < 0.1) { - OSACA_RETURN(x); - } OSACA_ELSE_IF(x < 0) { // `else OSACA_IF` works, but upsets the linter. - OSACA_RETURN(-f_positive(abs_x)); - } else { - OSACA_RETURN(f_positive(abs_x)); - } - } - // Other functions can have const arguments and return normally, but need to - // use OSACA_IF: - double f_positive(double const abs_x) { - OSACA_IF(abs_x > 3) { - return 1; - } else { - // … - } - } -// To analyse it near x = 5: -#define OSACA_ANALYSED_FUNCTION f -#define UNDER_OSACA_HYPOTHESES(expression) \ - [&] { \ - constexpr double x = 5; \ - /* To avoid inconsistent definitions, constexpr code can be used as */ \ - /* needed. */ \ - constexpr double abs_x = x > 0 ? x : -x; \ - return (expression); \ - }() -#endif -// In principle, the loop-carried dependency for function latency analysis is -// achieved by wrapping the body of the function in an infinite loop, assigning -// the result to the argument at each iteration, and adding IACA markers outside -// the loop. There are some subtleties: -// — We need to trick the compiler into believing the loop is finite, so that it -// doesn’t optimize away the end marker or even the function. This is -// achieved by exiting based on the value of `OSACA_loop_terminator`. -// — Return statements may be in if statements, and there may be several of -// them, so they cannot be the end of a loop started unconditionally. Instead -// we loop with goto. -// — We need to prevent the compiler from moving the start and end markers into -// the middle of register saving and restoring code, which would mess up the -// dependency analysis. This is done with additional conditional gotos. -// — Some volatile reads and writes are used to clarify identity of the -// registers in the generated code (where the names of `OSACA_result` and, if -// `OSACA_CARRY_LOOP_THROUGH_REGISTER` is set to 0, `OSACA_loop_carry` appear -// in movsd instructions). -// -// Carrying the loop dependency through a memory load and store can make the -// dependency graph easier to understand, as it forces any usage of the input to -// depend on the initial movsd, with the loop carried by a single backward edge -// to that initial movsd. -// If the loop is carried through a register, multiple usages of the input may -// result in multiple back edges from the final instruction that computed the -// result. Carrying the loop through the memory could also potentially prevent -// the compiler from reusing intermediate values in the next iteration, e.g., if -// the the computation of f(x) depends on -x and produces -f(x) before f(x), as -// in an even function defined in terms of its positive half, the compiler might -// reuse -f(x₀)=-x₁ instead of computing -x₁ from x₁=f(x₀). However: -// — it adds a spurious move to the latency; -// — some tools (IACA) cannot see the dependency through memory. -// Set OSACA_CARRY_LOOP_THROUGH_REGISTER to 0 to carry the loop through memory. - -#define OSACA_EVALUATE_CONDITIONS 1 -#define OSACA_CARRY_LOOP_THROUGH_REGISTER 1 - -static bool volatile OSACA_loop_terminator = false; - -#define OSACA_FUNCTION_BEGIN(arg) \ - double OSACA_LOOP_CARRY_QUALIFIER OSACA_loop_carry = arg; \ - OSACA_outer_loop: \ - if constexpr (std::string_view(__func__) == \ - STRINGIFY_EXPANSION(OSACA_ANALYSED_FUNCTION)) { \ - IACA_VC64_START; \ - } \ - _Pragma("warning(push)"); \ - _Pragma("warning(disable : 4102)"); \ - OSACA_loop: \ - _Pragma("warning(pop)"); \ - arg = OSACA_loop_carry - -#define OSACA_RETURN(result) \ - do { \ - if constexpr (std::string_view(__func__) == \ - STRINGIFY_EXPANSION(OSACA_ANALYSED_FUNCTION)) { \ - OSACA_loop_carry = (result); \ - if (!OSACA_loop_terminator) { \ - goto OSACA_loop; \ - } \ - double volatile OSACA_result = OSACA_loop_carry; \ - IACA_VC64_END; \ - /* The outer loop prevents the the start and end marker from being */ \ - /* interleaved with register saving and restoring moves. */ \ - if (!OSACA_loop_terminator) { \ - goto OSACA_outer_loop; \ - } \ - return OSACA_result; \ - } else { \ - return (result); \ - } \ - } while (false) - -#if OSACA_CARRY_LOOP_THROUGH_REGISTER -#define OSACA_LOOP_CARRY_QUALIFIER -#else -#define OSACA_LOOP_CARRY_QUALIFIER volatile -#endif - -// The branch not taken, determined by evaluating the condition -// `UNDER_OSACA_HYPOTHESES`, is eliminated by `if constexpr`; the condition is -// also compiled normally and assigned to a boolean; whether this results in any -// generated code depends on `OSACA_EVALUATE_CONDITIONS`. Note that, with -// `OSACA_EVALUATE_CONDITIONS`, in `OSACA_IF(p) { } OSACA_ELSE_IF(q) { }`, if -// `p` holds `UNDER_OSACA_HYPOTHESES`, code is generated to evaluate `p`, but -// not `q`. - -#define OSACA_IF(condition) \ - if constexpr (bool OSACA_CONDITION_QUALIFIER OSACA_computed_condition = \ - (condition); \ - UNDER_OSACA_HYPOTHESES(condition)) - -#if OSACA_EVALUATE_CONDITIONS -#define OSACA_CONDITION_QUALIFIER volatile -#else -#define OSACA_CONDITION_QUALIFIER -#endif - -#else // if !PRINCIPIA_USE_OSACA - -#define OSACA_FUNCTION_BEGIN(arg) -#define OSACA_RETURN(result) return (result) -#define OSACA_IF(condition) if (condition) - -#endif // PRINCIPIA_USE_OSACA - -#define OSACA_ELSE_IF else OSACA_IF // NOLINT - - -namespace principia { -namespace numerics { -namespace _sin_cos { -namespace internal { - -using namespace principia::numerics::_accurate_tables; -using namespace principia::numerics::_double_precision; -using namespace principia::numerics::_fma; -using namespace principia::numerics::_polynomial_evaluators; -using namespace principia::quantities::_elementary_functions; - using Argument = double; using Value = double; diff --git a/quantities/elementary_functions_body.hpp b/quantities/elementary_functions_body.hpp index 1c1345e929..89cee08261 100644 --- a/quantities/elementary_functions_body.hpp +++ b/quantities/elementary_functions_body.hpp @@ -94,7 +94,7 @@ Q Mod(Q const& argument, Q const& modulus) { template SquareRoot Sqrt(Q const& x) { -#if PRINCIPIA_USE_SSE3_INTRINSICS +#if PRINCIPIA_USE_SSE3_INTRINSICS() auto const x_128d = _mm_set_sd(x / si::Unit); return si::Unit> * _mm_cvtsd_f64(_mm_sqrt_sd(x_128d, x_128d)); #else