Skip to content

Commit

Permalink
Major bug fixes
Browse files Browse the repository at this point in the history
  • Loading branch information
Epixu committed Jun 27, 2024
1 parent 9082599 commit 3514447
Show file tree
Hide file tree
Showing 11 changed files with 98 additions and 76 deletions.
32 changes: 22 additions & 10 deletions source/Attempt.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -89,33 +89,45 @@ namespace Langulus::SIMD::Inner
) {
using LHS = Deref<decltype(lhs)>;
using RHS = Deref<decltype(rhs)>;
using LOSSLESS = SIMD::LosslessArray<LHS, RHS>;
using OUT = Conditional<CT::Void<FORCE_OUT>,
SIMD::LosslessArray<LHS, RHS>,
SIMD::LosslessArray<FORCE_OUT>
>;
LOSSLESS, SIMD::LosslessArray<FORCE_OUT>>;
using E = TypeOf<OUT>;
using R = decltype(Load<DEF>(Fake<const SIMD::LosslessArray<LHS, RHS>&>()));
using R = decltype(Load<DEF>(Fake<const LOSSLESS&>()));
constexpr bool supported = CT::SIMD<InvocableResult2<decltype(opSIMD), R>>;

if constexpr (not supported) {
// Operating on scalars, or SIMD not supported, just fallback
return FallbackBinary<OUT>(lhs, rhs, opFALL);
}
else if constexpr (not CT::SIMD<decltype(Load<DEF>(lhs))>
or not CT::SIMD<decltype(Load<DEF>(rhs))>) {
else if constexpr (not CT::SIMD<decltype(Load<DEF, R>(lhs))>
or not CT::SIMD<decltype(Load<DEF, R>(rhs))>) {
// Arguments can't be loaded in registers, just fallback
return FallbackBinary<OUT>(lhs, rhs, opFALL);
}
else if constexpr (CT::Bool<E>) {
// If FORCE_OUT was boolean, we're doing some comparing, so
// don't convert to output data yet
return opSIMD(Load<DEF>(lhs), Load<DEF>(rhs));
// don't convert to output data yet. Instead, convert to the
// lossless of the two types.
const CT::SIMD auto loadL = Load<DEF, R>(lhs);
const CT::SIMD auto loadR = Load<DEF, R>(rhs);
using ALT_E = TypeOf<LOSSLESS>;

if constexpr (not CT::SIMD<decltype(ConvertSIMD<ALT_E>(loadL))>
or not CT::SIMD<decltype(ConvertSIMD<ALT_E>(loadR))>) {
// Arguments can't be converted to the desired type
return FallbackBinary<OUT>(lhs, rhs, opFALL);
}
else {
// Perform the SIMD operation
return opSIMD(ConvertSIMD<ALT_E>(loadL), ConvertSIMD<ALT_E>(loadR));
}
}
else {
// Load both arguments, convert them to the desired FORCE_OUT
// and perform the operation
const CT::SIMD auto loadL = Load<DEF>(lhs);
const CT::SIMD auto loadR = Load<DEF>(rhs);
const CT::SIMD auto loadL = Load<DEF, R>(lhs);
const CT::SIMD auto loadR = Load<DEF, R>(rhs);

if constexpr (not CT::SIMD<decltype(ConvertSIMD<E>(loadL))>
or not CT::SIMD<decltype(ConvertSIMD<E>(loadR))>) {
Expand Down
14 changes: 9 additions & 5 deletions source/Common.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -561,17 +561,21 @@ namespace Langulus::SIMD
else if constexpr (CT::SignedInteger16<T>) {
const auto lo_lane = simde_mm256_castsi256_si128(m);
const auto hi_lane = simde_mm256_extracti128_si256(m, 1);
return V128<std::int8_t> {simde_mm_packs_epi16(lo_lane, hi_lane)};
return V256<std::int8_t> {simde_mm256_castsi128_si256(
simde_mm_packs_epi16(lo_lane, hi_lane)
)};
}
else if constexpr (CT::UnsignedInteger16<T>) {
const auto lo_lane = simde_mm256_castsi256_si128(m);
const auto hi_lane = simde_mm256_extracti128_si256(m, 1);
return V128<std::uint8_t> {simde_mm_packus_epi16(lo_lane, hi_lane)};
return V256<std::uint8_t> {simde_mm256_castsi128_si256(
simde_mm_packus_epi16(lo_lane, hi_lane)
)};
}
else if constexpr (CT::SignedInteger32<T>)
return V256<std::int16_t> {simde_mm256_packs_epi32 (m, Zero())};
return V256<std::int16_t> {simde_mm256_packs_epi32 (m, simde_mm256_permute2x128_si256(m, m, 1))};
else if constexpr (CT::UnsignedInteger32<T>)
return V256<std::uint16_t> {simde_mm256_packus_epi32(m, Zero())};
return V256<std::uint16_t> {simde_mm256_packus_epi32(m, simde_mm256_permute2x128_si256(m, m, 1))};
else if constexpr (CT::SignedInteger64<T>) {
#if LANGULUS_SIMD(AVX512F) and LANGULUS_SIMD(AVX512VL)
return V128<std::int32_t> {simde_mm256_cvtepi64_epi32(m)};
Expand Down Expand Up @@ -779,7 +783,7 @@ namespace Langulus::SIMD
else if constexpr (CT::UnsignedInteger64<T>)
return V256<std::uint32_t> {simde_mm512_cvtepi64_epi32(m)};
else
LANGULUS_ERROR("Can't unpack this type");
LANGULUS_ERROR("Can't pack this type");
}
};

Expand Down
2 changes: 1 addition & 1 deletion source/Fill.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ namespace Langulus::SIMD
/// @return the filled register
template<int R> NOD() LANGULUS(INLINED)
auto Fill(const CT::Scalar auto& s) noexcept {
using T = Deref<decltype(s)>;
using T = Decvq<TypeOf<decltype(s)>>;

#if LANGULUS_SIMD(128BIT)
if constexpr (R <= 16) {
Expand Down
37 changes: 17 additions & 20 deletions source/Load.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,17 @@

namespace Langulus::SIMD
{
namespace Inner
{
template<class R, class FORCE_OUT>
consteval Count DecideCount() {
using T = Decvq<TypeOf<R>>;
if constexpr (CT::Void<FORCE_OUT>)
return CountOf<R>;
else
return sizeof(FORCE_OUT) / sizeof(T);
}
}

/// Load a register into another register
/// @tparam DEF - default value for setting elements outside input size
Expand All @@ -22,13 +33,8 @@ namespace Langulus::SIMD
template<auto DEF, class FORCE_OUT = void> NOD() LANGULUS(INLINED)
auto Load(const CT::SIMD auto& v) noexcept {
using R = Deref<decltype(v)>;
using T = TypeOf<R>;

static_assert(CT::Void<FORCE_OUT> or CT::Similar<TypeOf<FORCE_OUT>, T>,
"Load routine doesn't convert anything, make sure that "
"input register's type is similar to the desired register's type");
constexpr auto S = Inner::DecideCount<R, FORCE_OUT>();

constexpr auto S = CT::Void<FORCE_OUT> ? CountOf<R> : CountOf<FORCE_OUT>;
if constexpr (S == CountOf<R>) {
// Just forward the original register
return v;
Expand Down Expand Up @@ -60,24 +66,15 @@ namespace Langulus::SIMD
else {
// Load a scalar, by duplicating the value for each element
// in the register. FORCE_OUT MUST BE SET!
static_assert(CT::Similar<TypeOf<FORCE_OUT>, T>,
"Load routine doesn't convert anything, make sure that "
"scalar type is similar to the desired register's type");

constexpr auto S = CountOf<FORCE_OUT>;
constexpr auto RS = sizeof(T) * S;
constexpr auto S = Inner::DecideCount<R, FORCE_OUT>();
return Fill<sizeof(T) * S>(v);
}
}
else {
// Load a vector either partially, filling the blanks using
// DEF value, or directly if vector is of the proper size
// Should perform faster if 'v' is aligned properly
static_assert(CT::Void<FORCE_OUT> or CT::Similar<TypeOf<FORCE_OUT>, T>,
"Load routine doesn't convert anything, make sure that "
"vector's type is similar to the desired register's type");

constexpr auto S = CT::Void<FORCE_OUT> ? CountOf<R> : CountOf<FORCE_OUT>;
constexpr auto S = Inner::DecideCount<R, FORCE_OUT>();
constexpr auto RS = sizeof(T) * S;

#if LANGULUS_SIMD(128BIT)
Expand All @@ -86,7 +83,7 @@ namespace Langulus::SIMD
"Loading 128bit register from ", S, " unaligned elements");

// Load as a single 128bit register
if constexpr (RS == 16) {
if constexpr (sizeof(R) >= 16) {
if constexpr (CT::Float<T>) return V128<T> {simde_mm_loadu_ps (&GetFirst(v))};
else if constexpr (CT::Double<T>) return V128<T> {simde_mm_loadu_pd (&GetFirst(v))};
else if constexpr (CT::Integer<T>) return V128<T> {simde_mm_loadu_si128(&GetFirst(v))};
Expand All @@ -103,7 +100,7 @@ namespace Langulus::SIMD
"Loading 256bit register from ", S, " unaligned elements");

// Load as a single 256bit register
if constexpr (RS == 32) {
if constexpr (sizeof(R) >= 32) {
if constexpr (CT::Float<T>) return V256<T> {simde_mm256_loadu_ps (&GetFirst(v))};
else if constexpr (CT::Double<T>) return V256<T> {simde_mm256_loadu_pd (&GetFirst(v))};
else if constexpr (CT::Integer<T>) return V256<T> {simde_mm256_loadu_si256(&GetFirst(v))};
Expand All @@ -120,7 +117,7 @@ namespace Langulus::SIMD
"Loading 512bit register from ", S, " unaligned elements");

// Load as a single 512bit register
if constexpr (RS == 64) {
if constexpr (sizeof(R) >= 64) {
if constexpr (CT::Float<T>) return V512<T> {simde_mm512_loadu_ps (&GetFirst(v))};
else if constexpr (CT::Double<T>) return V512<T> {simde_mm512_loadu_pd (&GetFirst(v))};
else if constexpr (CT::Integer<T>) return V512<T> {simde_mm512_loadu_si512(&GetFirst(v))};
Expand Down
9 changes: 6 additions & 3 deletions source/Store.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -75,7 +75,7 @@ namespace Langulus::SIMD
"Destination array must be smaller or equal of the register size");
static_assert(CountOf<TO> > 1,
"Storing a single element is suboptimial - don't use SIMD in the first place");
static_assert(CT::Similar<T, TO_T> or CT::Bool<TO_T>,
static_assert(CT::Similar<T, TypeOf<TO_T>> or CT::Bool<TO_T>,
"Storing doesn't parform conversion, so destination must be "
"of similar type as the register");

Expand Down Expand Up @@ -347,10 +347,13 @@ namespace Langulus::SIMD
/// @param to - where to store it
LANGULUS(INLINED)
constexpr void Store(const CT::NotSemantic auto& from, CT::NotSIMD auto& to) noexcept {
if constexpr (CT::SIMD<decltype(from)>)
using FROM = Deref<decltype(from)>;
if constexpr (CT::SIMD<FROM>)
Inner::StoreSIMD(from, to);
else
else if constexpr (CT::Supported<FROM>)
Inner::StoreConstexpr(from, to);
//else
// LANGULUS_ERROR("Source not supported");
}

} // namespace Langulus::SIMD
Expand Down
2 changes: 1 addition & 1 deletion source/binary/Multiply.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -139,7 +139,7 @@ namespace Langulus::SIMD
/// @patam value - scalar/vector/register to operate on
/// @return the product scalar/vector/register
template<CT::NotSemantic FORCE_OUT = void> NOD() LANGULUS(INLINED)
auto Multiply(const auto& lhs, const auto& rhs) noexcept {
constexpr auto Multiply(const auto& lhs, const auto& rhs) noexcept {
return AttemptBinary<0, FORCE_OUT>(lhs, rhs,
[]<class R>(const R& l, const R& r) noexcept {
LANGULUS_SIMD_VERBOSE("Multiplying (SIMD) as ", NameOf<REGISTER>());
Expand Down
32 changes: 16 additions & 16 deletions source/converters/From128i.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -124,13 +124,13 @@ namespace Langulus::SIMD::Inner
// Converting to 8bit integer
//
if constexpr (CT::Integer8<T>)
return v;
return V128<TO> {v};
else if constexpr (CT::Integer16<T>)
return v.UnpackLo();
return V128<TO> {v.Pack()};
else if constexpr (CT::Integer32<T>)
return v.UnpackLo().UnpackLo();
return V128<TO> {v.Pack().Pack()};
else if constexpr (CT::Integer64<T>)
return v.UnpackLo().UnpackLo().UnpackLo();
return V128<TO> {v.Pack().Pack().Pack()};
else
LANGULUS_ERROR("Unsupported conversion");
}
Expand All @@ -139,13 +139,13 @@ namespace Langulus::SIMD::Inner
// Converting to 16bit integer
//
if constexpr (CT::Integer8<T>)
return v.Pack();
return V128<TO> {v.UnpackLo()};
else if constexpr (CT::Integer16<T>)
return v;
return V128<TO> {v};
else if constexpr (CT::Integer32<T>)
return v.UnpackLo();
return V128<TO> {v.Pack()};
else if constexpr (CT::Integer64<T>)
return v.UnpackLo().UnpackLo();
return V128<TO> {v.Pack().Pack()};
else
LANGULUS_ERROR("Unsupported conversion");
}
Expand All @@ -154,13 +154,13 @@ namespace Langulus::SIMD::Inner
// Converting to 32bit integer
//
if constexpr (CT::Integer8<T>)
return v.Pack().Pack();
return V128<TO> {v.UnpackLo().UnpackLo()};
else if constexpr (CT::Integer16<T>)
return v.Pack();
return V128<TO> {v.UnpackLo()};
else if constexpr (CT::Integer32<T>)
return v;
return V128<TO> {v};
else if constexpr (CT::Integer64<T>)
return v.UnpackLo();
return V128<TO> {v.Pack()};
else
LANGULUS_ERROR("Unsupported conversion");
}
Expand All @@ -169,13 +169,13 @@ namespace Langulus::SIMD::Inner
// Converting to 64bit integer
//
if constexpr (CT::Integer8<T>)
return v.Pack().Pack().Pack();
return V128<TO> {v.UnpackLo().UnpackLo().UnpackLo()};
else if constexpr (CT::Integer16<T>)
return v.Pack().Pack();
return V128<TO> {v.UnpackLo().UnpackLo()};
else if constexpr (CT::Integer32<T>)
return v.Pack();
return V128<TO> {v.UnpackLo()};
else if constexpr (CT::Integer64<T>)
return v;
return V128<TO> {v};
else
LANGULUS_ERROR("Unsupported conversion");
}
Expand Down
2 changes: 1 addition & 1 deletion source/converters/From256f.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ namespace Langulus::SIMD::Inner
template<Element TO> NOD() LANGULUS(INLINED)
auto ConvertFrom256f(CT::SIMD256f auto v) noexcept {
if constexpr (CT::Double<TO>)
return V256<TO> {simde_mm256_cvtps_pd(v)};
return V256<TO> {simde_mm256_cvtps_pd(simde_mm256_castps256_ps128(v))};
else if constexpr (CT::Float<TO>)
return v;
else if constexpr (CT::SignedInteger8<TO>) {
Expand Down
32 changes: 16 additions & 16 deletions source/converters/From256i.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -136,13 +136,13 @@ namespace Langulus::SIMD::Inner
// Converting to 8bit integer
//
if constexpr (CT::Integer8<T>)
return v;
return V256<TO> {v};
else if constexpr (CT::Integer16<T>)
return v.UnpackLo();
return V256<TO> {v.Pack()};
else if constexpr (CT::Integer32<T>)
return v.UnpackLo().UnpackLo();
return V256<TO> {v.Pack().Pack()};
else if constexpr (CT::Integer64<T>)
return v.UnpackLo().UnpackLo().UnpackLo();
return V256<TO> {v.Pack().Pack().Pack()};
else
LANGULUS_ERROR("Unsupported conversion");
}
Expand All @@ -151,13 +151,13 @@ namespace Langulus::SIMD::Inner
// Converting to 16bit integer
//
if constexpr (CT::Integer8<T>)
return v.Pack();
return V256<TO> {v.UnpackLo()};
else if constexpr (CT::Integer16<T>)
return v;
return V256<TO> {v};
else if constexpr (CT::Integer32<T>)
return v.UnpackLo();
return V256<TO> {v.Pack()};
else if constexpr (CT::Integer64<T>)
return v.UnpackLo().UnpackLo();
return V256<TO> {v.Pack().Pack()};
else
LANGULUS_ERROR("Unsupported conversion");
}
Expand All @@ -166,13 +166,13 @@ namespace Langulus::SIMD::Inner
// Converting to 32bit integer
//
if constexpr (CT::Integer8<T>)
return v.Pack().Pack();
return V256<TO> {v.UnpackLo().UnpackLo()};
else if constexpr (CT::Integer16<T>)
return v.Pack();
return V256<TO> {v.UnpackLo()};
else if constexpr (CT::Integer32<T>)
return v;
return V256<TO> {v};
else if constexpr (CT::Integer64<T>)
return v.UnpackLo();
return V256<TO> {v.Pack()};
else
LANGULUS_ERROR("Unsupported conversion");
}
Expand All @@ -181,13 +181,13 @@ namespace Langulus::SIMD::Inner
// Converting to 64bit integer
//
if constexpr (CT::Integer8<T>)
return v.Pack().Pack().Pack();
return V256<TO> {v.UnpackLo().UnpackLo().UnpackLo()};
else if constexpr (CT::Integer16<T>)
return v.Pack().Pack();
return V256<TO> {v.UnpackLo().UnpackLo()};
else if constexpr (CT::Integer32<T>)
return v.Pack();
return V256<TO> {v.UnpackLo()};
else if constexpr (CT::Integer64<T>)
return v;
return V256<TO> {v};
else
LANGULUS_ERROR("Unsupported conversion");
}
Expand Down
2 changes: 1 addition & 1 deletion test/Equal/TestEqual-VVB.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -10,11 +10,11 @@

///
TEMPLATE_TEST_CASE("Vector == Vector -> Bool", "[compare]"
, VECTORS_ALL(5)
, VECTORS_ALL(9)
, VECTORS_ALL(2)
, VECTORS_ALL(3)
, VECTORS_ALL(4)
, VECTORS_ALL(5)
, VECTORS_ALL(8)
, VECTORS_ALL(16)
, VECTORS_ALL(17)
Expand Down
Loading

0 comments on commit 3514447

Please sign in to comment.