Skip to content

Commit

Permalink
Workaround for Clang bug in debug builds; Modernized Floor and Ceil; …
Browse files Browse the repository at this point in the history
…Added constexpr Floor & Ceil; Tests for Floor & Ceil; Single argument Fallback and Evaluate
  • Loading branch information
Epixu committed May 22, 2024
1 parent 51b28be commit 5fd7476
Show file tree
Hide file tree
Showing 23 changed files with 636 additions and 94 deletions.
4 changes: 2 additions & 2 deletions source/Add.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -113,7 +113,7 @@ namespace Langulus::SIMD
constexpr auto AddConstexpr(const auto& lhsOrig, const auto& rhsOrig) noexcept {
using DOUT = Decay<TypeOf<OUT>>;

return Evaluate<0, Unsupported, OUT>(
return Evaluate2<0, Unsupported, OUT>(
lhsOrig, rhsOrig, nullptr,
[](const DOUT& lhs, const DOUT& rhs) noexcept -> DOUT {
return lhs + rhs;
Expand All @@ -130,7 +130,7 @@ namespace Langulus::SIMD
using DOUT = Decay<TypeOf<OUT>>;
using REGISTER = Register<decltype(lhsOrig), decltype(rhsOrig), OUT>;

return Evaluate<0, REGISTER, OUT>(
return Evaluate2<0, REGISTER, OUT>(
lhsOrig, rhsOrig,
[](const REGISTER& lhs, const REGISTER& rhs) noexcept {
LANGULUS_SIMD_VERBOSE("Adding (SIMD) as ", NameOf<REGISTER>());
Expand Down
158 changes: 116 additions & 42 deletions source/Ceil.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -16,63 +16,137 @@ namespace Langulus::SIMD
namespace Inner
{

/// Get ceiling values via SIMD
/// Used to detect missing SIMD routine
template<CT::Decayed, CT::NotSIMD T> LANGULUS(INLINED)
constexpr Unsupported CeilSIMD(const T&) noexcept {
return {};
}

/// Get ceiled values via SIMD
/// @tparam T - the type of the array element
/// @tparam REGISTER - the register type (deducible)
/// @param value - the array
/// @return the ceiling values
/// @param value - the register
/// @return the ceiled values
template<CT::Decayed T, CT::SIMD REGISTER> LANGULUS(INLINED)
auto Ceil(UNUSED() const REGISTER& value) noexcept {
auto CeilSIMD(UNUSED() const REGISTER& value) noexcept {
static_assert(CT::Real<T>, "Suboptimal and pointless for whole numbers");

#if LANGULUS_SIMD(128BIT)
if constexpr (CT::SIMD128<REGISTER>) {
if constexpr (CT::Float<T>)
return simde_mm_ceil_ps(value);
else if constexpr (CT::Double<T>)
return simde_mm_ceil_pd(value);
else LANGULUS_ERROR("Unsupported type for 16-byte package");
}
#if LANGULUS_COMPILER(CLANG) and LANGULUS(DEBUG)
// WORKAROUND for a Clang bug, see:
// https://github.com/simd-everywhere/simde/issues/1014
//TODO hopefully it is fixed in the future
return Unsupported {};
#else
#if LANGULUS_SIMD(128BIT)
if constexpr (CT::SIMD128<REGISTER>) {
if constexpr (CT::Float<T>)
return simde_mm_ceil_ps(value);
else if constexpr (CT::Double<T>)
return simde_mm_ceil_pd(value);
else
#endif
LANGULUS_ERROR("Unsupported type for 16-byte package");
}
else
#endif

#if LANGULUS_SIMD(256BIT)
if constexpr (CT::SIMD256<REGISTER>) {
if constexpr (CT::Float<T>)
return simde_mm256_ceil_ps(value);
else if constexpr (CT::Double<T>)
return simde_mm256_ceil_pd(value);
else
LANGULUS_ERROR("Unsupported type for 32-byte package");
}
else
#endif

#if LANGULUS_SIMD(256BIT)
if constexpr (CT::SIMD256<REGISTER>) {
if constexpr (CT::Float<T>)
return simde_mm256_ceil_ps(value);
else if constexpr (CT::Double<T>)
return simde_mm256_ceil_pd(value);
else LANGULUS_ERROR("Unsupported type for 32-byte package");
}
#if LANGULUS_SIMD(512BIT)
if constexpr (CT::SIMD512<REGISTER>) {
if constexpr (CT::Float<T>)
return simde_mm512_ceil_ps(value);
else if constexpr (CT::Double<T>)
return simde_mm512_ceil_pd(value);
else
#endif
LANGULUS_ERROR("Unsupported type for 64-byte package");
}
else
#endif
LANGULUS_ERROR("Unsupported type");
#endif
}

/// Ceil (constexpr, no SIMD)
/// @tparam OUT - the desired element type (lossless by default)
/// @return array/scalar
template<CT::NotSemantic OUT> NOD() LANGULUS(INLINED)
constexpr auto CeilConstexpr(const auto& value) noexcept {
using DOUT = Decay<TypeOf<OUT>>;

#if LANGULUS_SIMD(512BIT)
if constexpr (CT::SIMD512<REGISTER>) {
if constexpr (CT::Float<T>)
return simde_mm512_ceil_ps(value);
else if constexpr (CT::Double<T>)
return simde_mm512_ceil_pd(value);
else LANGULUS_ERROR("Unsupported type for 64-byte package");
return Evaluate1<0, Unsupported, OUT>(
value, nullptr,
[](const DOUT& f) noexcept -> DOUT {
static_assert(CT::Real<DOUT>, "Pointless for whole numbers");
// std::ceil isn't constexpr :(
//TODO waiting for C++23 support
const int64_t i = static_cast<int64_t>(f);
return f > i ? i + 1 : i;
}
else
#endif
);
}

/// Ceil (SIMD)
/// @tparam OUT - the desired element type (lossless by default)
/// @return a register, if viable SIMD routine exists
/// or array/scalar if no viable SIMD routine exists
template<CT::NotSemantic OUT> NOD() LANGULUS(INLINED)
auto CeilDynamic(const auto& value) noexcept {
using DOUT = Decay<TypeOf<OUT>>;
using REGISTER = ToSIMD<decltype(value), OUT>;

LANGULUS_ERROR("Unsupported type");
return Evaluate1<0, REGISTER, OUT>(
value,
[](const REGISTER& v) noexcept {
LANGULUS_SIMD_VERBOSE("Ceiling (SIMD) as ", NameOf<REGISTER>());
return CeilSIMD<DOUT>(v);
},
[](const DOUT& v) noexcept -> DOUT {
static_assert(CT::Real<DOUT>, "Pointless for whole numbers");
LANGULUS_SIMD_VERBOSE("Ceiling (Fallback) ", v, " (", NameOf<DOUT>(), ")");
return std::ceil(v);
}
);
}

} // namespace Langulus::SIMD::Inner


/// Get the ceiling values
/// @param T - type of a single value
/// @param S - size of the array
/// @return a register, if viable SIMD routine exists
/// or array/scalar if no viable SIMD routine exists
template<CT::NotSemantic T> LANGULUS(INLINED)
auto Ceil(const T& value) noexcept {
using DT = Decay<TypeOf<T>>;
return Inner::Ceil<DT>(Load<0>(value));

/// Ceil numbers, and force output to desired place
/// @tparam VAL - array, scalar, or register (deducible)
/// @tparam OUT - the desired element type (deducible)
/// @attention may generate additional convert/store instructions in
/// order to fit the result in desired output
template<class VAL, CT::NotSemantic OUT> LANGULUS(INLINED)
constexpr void Ceil(const VAL& val, OUT& out) noexcept {
IF_CONSTEXPR() {
StoreConstexpr(
Inner::CeilConstexpr<OUT>(DesemCast(val)), out);
}
else Store(
Inner::CeilDynamic<OUT>(DesemCast(val)), out);
}

/// Ceil numbers
/// @tparam VAL - array, scalar, or register (deducible)
/// @tparam OUT - the desired output type (lossless array by default)
/// @attention may generate additional convert/store instructions in
/// order to fit the result in desired output
template<class VAL, CT::NotSemantic OUT = LosslessArray<VAL, VAL>>
LANGULUS(INLINED)
constexpr auto Ceil(const VAL& val) noexcept {
OUT out;
Ceil(DesemCast(val), out);
return out;
}

} // namespace Langulus::SIMD
15 changes: 13 additions & 2 deletions source/Common.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -768,7 +768,15 @@ namespace Langulus::SIMD
namespace Inner
{
template<class F, class T>
consteval auto InvocableResultInner() noexcept {
consteval auto InvocableResultInner1() noexcept {
if constexpr (CT::Nullptr<F>)
return Unsupported {};
else
return ::std::invoke_result_t<F, T> {};
}

template<class F, class T>
consteval auto InvocableResultInner2() noexcept {
if constexpr (CT::Nullptr<F>)
return Unsupported {};
else
Expand All @@ -777,7 +785,10 @@ namespace Langulus::SIMD
}

template<class F, class T>
using InvocableResult = decltype(Inner::InvocableResultInner<F, T>());
using InvocableResult1 = decltype(Inner::InvocableResultInner1<F, T>());

template<class F, class T>
using InvocableResult2 = decltype(Inner::InvocableResultInner2<F, T>());

} // namespace Langulus::SIMD

Expand Down
4 changes: 2 additions & 2 deletions source/Divide.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -218,7 +218,7 @@ namespace Langulus::SIMD
constexpr auto DivideConstexpr(const LHS& lhsOrig, const RHS& rhsOrig) {
using DOUT = Decay<TypeOf<Desem<OUT>>>;

return Inner::Evaluate<1, Unsupported, OUT>(
return Inner::Evaluate2<1, Unsupported, OUT>(
lhsOrig, rhsOrig, nullptr,
[](const DOUT& lhs, const DOUT& rhs) -> DOUT {
if (rhs == DOUT {0})
Expand All @@ -240,7 +240,7 @@ namespace Langulus::SIMD
using DOUT = Decay<TypeOf<Desem<OUT>>>;
using REGISTER = Inner::Register<LHS, RHS, OUT>;

return Inner::Evaluate<1, REGISTER, OUT>(
return Inner::Evaluate2<1, REGISTER, OUT>(
lhsOrig, rhsOrig,
[](const REGISTER& lhs, const REGISTER& rhs) {
return Inner::Divide<DOUT>(lhs, rhs);
Expand Down
4 changes: 2 additions & 2 deletions source/Equals.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -368,7 +368,7 @@ namespace Langulus::SIMD
// so make sure we operate on Lossless<LHS, RHS>
using DOUT = Decay<TypeOf<Lossless<LHS, RHS>>>;

return Inner::Evaluate<0, Unsupported, OUT>(
return Inner::Evaluate2<0, Unsupported, OUT>(
lhsOrig, rhsOrig, nullptr,
[](const DOUT& lhs, const DOUT& rhs) noexcept -> bool {
return lhs == rhs;
Expand All @@ -392,7 +392,7 @@ namespace Langulus::SIMD
using REGISTER = Inner::Register<LHS, RHS, LOSSLESS>;
constexpr auto S = OverlapCounts<LHS, RHS>();

return Inner::Evaluate<0, REGISTER, OUT>(
return Inner::Evaluate2<0, REGISTER, OUT>(
lhsOrig, rhsOrig,
[](const REGISTER& lhs, const REGISTER& rhs) noexcept {
return Inner::Equals<DOUT, S>(lhs, rhs);
Expand Down
4 changes: 2 additions & 2 deletions source/EqualsOrGreater.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -368,7 +368,7 @@ namespace Langulus::SIMD
// so make sure we operate on Lossless<LHS, RHS>
using DOUT = Decay<TypeOf<Lossless<LHS, RHS>>>;

return Inner::Evaluate<0, Unsupported, OUT>(
return Inner::Evaluate2<0, Unsupported, OUT>(
lhsOrig, rhsOrig, nullptr,
[](const DOUT& lhs, const DOUT& rhs) noexcept -> bool {
return lhs >= rhs;
Expand All @@ -392,7 +392,7 @@ namespace Langulus::SIMD
using REGISTER = Inner::Register<LHS, RHS, LOSSLESS>;
constexpr auto S = OverlapCounts<LHS, RHS>();

return Inner::Evaluate<0, REGISTER, OUT>(
return Inner::Evaluate2<0, REGISTER, OUT>(
lhsOrig, rhsOrig,
[](const REGISTER& lhs, const REGISTER& rhs) noexcept {
return Inner::EqualsOrGreater<DOUT, S>(lhs, rhs);
Expand Down
4 changes: 2 additions & 2 deletions source/EqualsOrLesser.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -368,7 +368,7 @@ namespace Langulus::SIMD
// so make sure we operate on Lossless<LHS, RHS>
using DOUT = Decay<TypeOf<Lossless<LHS, RHS>>>;

return Inner::Evaluate<0, Unsupported, OUT>(
return Inner::Evaluate2<0, Unsupported, OUT>(
lhsOrig, rhsOrig, nullptr,
[](const DOUT& lhs, const DOUT& rhs) noexcept -> bool {
return lhs <= rhs;
Expand All @@ -392,7 +392,7 @@ namespace Langulus::SIMD
using REGISTER = Inner::Register<LHS, RHS, LOSSLESS>;
constexpr auto S = OverlapCounts<LHS, RHS>();

return Inner::Evaluate<0, REGISTER, OUT>(
return Inner::Evaluate2<0, REGISTER, OUT>(
lhsOrig, rhsOrig,
[](const REGISTER& lhs, const REGISTER& rhs) noexcept {
return Inner::EqualsOrLesser<DOUT, S>(lhs, rhs);
Expand Down
Loading

0 comments on commit 5fd7476

Please sign in to comment.