diff --git a/source/Attempt.hpp b/source/Attempt.hpp index 05ad5e8..e6e3c6a 100644 --- a/source/Attempt.hpp +++ b/source/Attempt.hpp @@ -89,33 +89,45 @@ namespace Langulus::SIMD::Inner ) { using LHS = Deref; using RHS = Deref; + using LOSSLESS = SIMD::LosslessArray; using OUT = Conditional, - SIMD::LosslessArray, - SIMD::LosslessArray - >; + LOSSLESS, SIMD::LosslessArray>; using E = TypeOf; - using R = decltype(Load(Fake&>())); + using R = decltype(Load(Fake())); constexpr bool supported = CT::SIMD>; if constexpr (not supported) { // Operating on scalars, or SIMD not supported, just fallback return FallbackBinary(lhs, rhs, opFALL); } - else if constexpr (not CT::SIMD(lhs))> - or not CT::SIMD(rhs))>) { + else if constexpr (not CT::SIMD(lhs))> + or not CT::SIMD(rhs))>) { // Arguments can't be loaded in registers, just fallback return FallbackBinary(lhs, rhs, opFALL); } else if constexpr (CT::Bool) { // If FORCE_OUT was boolean, we're doing some comparing, so - // don't convert to output data yet - return opSIMD(Load(lhs), Load(rhs)); + // don't convert to output data yet. Instead, convert to the + // lossless of the two types. + const CT::SIMD auto loadL = Load(lhs); + const CT::SIMD auto loadR = Load(rhs); + using ALT_E = TypeOf; + + if constexpr (not CT::SIMD(loadL))> + or not CT::SIMD(loadR))>) { + // Arguments can't be converted to the desired type + return FallbackBinary(lhs, rhs, opFALL); + } + else { + // Perform the SIMD operation + return opSIMD(ConvertSIMD(loadL), ConvertSIMD(loadR)); + } } else { // Load both arguments, convert them to the desired FORCE_OUT // and perform the operation - const CT::SIMD auto loadL = Load(lhs); - const CT::SIMD auto loadR = Load(rhs); + const CT::SIMD auto loadL = Load(lhs); + const CT::SIMD auto loadR = Load(rhs); if constexpr (not CT::SIMD(loadL))> or not CT::SIMD(loadR))>) { diff --git a/source/Common.hpp b/source/Common.hpp index b386346..fdab0f0 100644 --- a/source/Common.hpp +++ b/source/Common.hpp @@ -561,17 +561,21 @@ namespace Langulus::SIMD else if constexpr (CT::SignedInteger16) { const auto lo_lane = simde_mm256_castsi256_si128(m); const auto hi_lane = simde_mm256_extracti128_si256(m, 1); - return V128 {simde_mm_packs_epi16(lo_lane, hi_lane)}; + return V256 {simde_mm256_castsi128_si256( + simde_mm_packs_epi16(lo_lane, hi_lane) + )}; } else if constexpr (CT::UnsignedInteger16) { const auto lo_lane = simde_mm256_castsi256_si128(m); const auto hi_lane = simde_mm256_extracti128_si256(m, 1); - return V128 {simde_mm_packus_epi16(lo_lane, hi_lane)}; + return V256 {simde_mm256_castsi128_si256( + simde_mm_packus_epi16(lo_lane, hi_lane) + )}; } else if constexpr (CT::SignedInteger32) - return V256 {simde_mm256_packs_epi32 (m, Zero())}; + return V256 {simde_mm256_packs_epi32 (m, simde_mm256_permute2x128_si256(m, m, 1))}; else if constexpr (CT::UnsignedInteger32) - return V256 {simde_mm256_packus_epi32(m, Zero())}; + return V256 {simde_mm256_packus_epi32(m, simde_mm256_permute2x128_si256(m, m, 1))}; else if constexpr (CT::SignedInteger64) { #if LANGULUS_SIMD(AVX512F) and LANGULUS_SIMD(AVX512VL) return V128 {simde_mm256_cvtepi64_epi32(m)}; @@ -779,7 +783,7 @@ namespace Langulus::SIMD else if constexpr (CT::UnsignedInteger64) return V256 {simde_mm512_cvtepi64_epi32(m)}; else - LANGULUS_ERROR("Can't unpack this type"); + LANGULUS_ERROR("Can't pack this type"); } }; diff --git a/source/Fill.hpp b/source/Fill.hpp index c4bae37..2b469cc 100644 --- a/source/Fill.hpp +++ b/source/Fill.hpp @@ -18,7 +18,7 @@ namespace Langulus::SIMD /// @return the filled register template NOD() LANGULUS(INLINED) auto Fill(const CT::Scalar auto& s) noexcept { - using T = Deref; + using T = Decvq>; #if LANGULUS_SIMD(128BIT) if constexpr (R <= 16) { diff --git a/source/Load.hpp b/source/Load.hpp index 6e544bf..69a305e 100644 --- a/source/Load.hpp +++ b/source/Load.hpp @@ -12,6 +12,17 @@ namespace Langulus::SIMD { + namespace Inner + { + template + consteval Count DecideCount() { + using T = Decvq>; + if constexpr (CT::Void) + return CountOf; + else + return sizeof(FORCE_OUT) / sizeof(T); + } + } /// Load a register into another register /// @tparam DEF - default value for setting elements outside input size @@ -22,13 +33,8 @@ namespace Langulus::SIMD template NOD() LANGULUS(INLINED) auto Load(const CT::SIMD auto& v) noexcept { using R = Deref; - using T = TypeOf; - - static_assert(CT::Void or CT::Similar, T>, - "Load routine doesn't convert anything, make sure that " - "input register's type is similar to the desired register's type"); + constexpr auto S = Inner::DecideCount(); - constexpr auto S = CT::Void ? CountOf : CountOf; if constexpr (S == CountOf) { // Just forward the original register return v; @@ -60,12 +66,7 @@ namespace Langulus::SIMD else { // Load a scalar, by duplicating the value for each element // in the register. FORCE_OUT MUST BE SET! - static_assert(CT::Similar, T>, - "Load routine doesn't convert anything, make sure that " - "scalar type is similar to the desired register's type"); - - constexpr auto S = CountOf; - constexpr auto RS = sizeof(T) * S; + constexpr auto S = Inner::DecideCount(); return Fill(v); } } @@ -73,11 +74,7 @@ namespace Langulus::SIMD // Load a vector either partially, filling the blanks using // DEF value, or directly if vector is of the proper size // Should perform faster if 'v' is aligned properly - static_assert(CT::Void or CT::Similar, T>, - "Load routine doesn't convert anything, make sure that " - "vector's type is similar to the desired register's type"); - - constexpr auto S = CT::Void ? CountOf : CountOf; + constexpr auto S = Inner::DecideCount(); constexpr auto RS = sizeof(T) * S; #if LANGULUS_SIMD(128BIT) @@ -86,7 +83,7 @@ namespace Langulus::SIMD "Loading 128bit register from ", S, " unaligned elements"); // Load as a single 128bit register - if constexpr (RS == 16) { + if constexpr (sizeof(R) >= 16) { if constexpr (CT::Float) return V128 {simde_mm_loadu_ps (&GetFirst(v))}; else if constexpr (CT::Double) return V128 {simde_mm_loadu_pd (&GetFirst(v))}; else if constexpr (CT::Integer) return V128 {simde_mm_loadu_si128(&GetFirst(v))}; @@ -103,7 +100,7 @@ namespace Langulus::SIMD "Loading 256bit register from ", S, " unaligned elements"); // Load as a single 256bit register - if constexpr (RS == 32) { + if constexpr (sizeof(R) >= 32) { if constexpr (CT::Float) return V256 {simde_mm256_loadu_ps (&GetFirst(v))}; else if constexpr (CT::Double) return V256 {simde_mm256_loadu_pd (&GetFirst(v))}; else if constexpr (CT::Integer) return V256 {simde_mm256_loadu_si256(&GetFirst(v))}; @@ -120,7 +117,7 @@ namespace Langulus::SIMD "Loading 512bit register from ", S, " unaligned elements"); // Load as a single 512bit register - if constexpr (RS == 64) { + if constexpr (sizeof(R) >= 64) { if constexpr (CT::Float) return V512 {simde_mm512_loadu_ps (&GetFirst(v))}; else if constexpr (CT::Double) return V512 {simde_mm512_loadu_pd (&GetFirst(v))}; else if constexpr (CT::Integer) return V512 {simde_mm512_loadu_si512(&GetFirst(v))}; diff --git a/source/Store.hpp b/source/Store.hpp index 901e3bd..2d6e17e 100644 --- a/source/Store.hpp +++ b/source/Store.hpp @@ -75,7 +75,7 @@ namespace Langulus::SIMD "Destination array must be smaller or equal of the register size"); static_assert(CountOf > 1, "Storing a single element is suboptimial - don't use SIMD in the first place"); - static_assert(CT::Similar or CT::Bool, + static_assert(CT::Similar> or CT::Bool, "Storing doesn't parform conversion, so destination must be " "of similar type as the register"); @@ -347,10 +347,13 @@ namespace Langulus::SIMD /// @param to - where to store it LANGULUS(INLINED) constexpr void Store(const CT::NotSemantic auto& from, CT::NotSIMD auto& to) noexcept { - if constexpr (CT::SIMD) + using FROM = Deref; + if constexpr (CT::SIMD) Inner::StoreSIMD(from, to); - else + else if constexpr (CT::Supported) Inner::StoreConstexpr(from, to); + //else + // LANGULUS_ERROR("Source not supported"); } } // namespace Langulus::SIMD diff --git a/source/binary/Multiply.hpp b/source/binary/Multiply.hpp index b717e9c..f81d314 100644 --- a/source/binary/Multiply.hpp +++ b/source/binary/Multiply.hpp @@ -139,7 +139,7 @@ namespace Langulus::SIMD /// @patam value - scalar/vector/register to operate on /// @return the product scalar/vector/register template NOD() LANGULUS(INLINED) - auto Multiply(const auto& lhs, const auto& rhs) noexcept { + constexpr auto Multiply(const auto& lhs, const auto& rhs) noexcept { return AttemptBinary<0, FORCE_OUT>(lhs, rhs, [](const R& l, const R& r) noexcept { LANGULUS_SIMD_VERBOSE("Multiplying (SIMD) as ", NameOf()); diff --git a/source/converters/From128i.hpp b/source/converters/From128i.hpp index eeacccf..d21b682 100644 --- a/source/converters/From128i.hpp +++ b/source/converters/From128i.hpp @@ -124,13 +124,13 @@ namespace Langulus::SIMD::Inner // Converting to 8bit integer // if constexpr (CT::Integer8) - return v; + return V128 {v}; else if constexpr (CT::Integer16) - return v.UnpackLo(); + return V128 {v.Pack()}; else if constexpr (CT::Integer32) - return v.UnpackLo().UnpackLo(); + return V128 {v.Pack().Pack()}; else if constexpr (CT::Integer64) - return v.UnpackLo().UnpackLo().UnpackLo(); + return V128 {v.Pack().Pack().Pack()}; else LANGULUS_ERROR("Unsupported conversion"); } @@ -139,13 +139,13 @@ namespace Langulus::SIMD::Inner // Converting to 16bit integer // if constexpr (CT::Integer8) - return v.Pack(); + return V128 {v.UnpackLo()}; else if constexpr (CT::Integer16) - return v; + return V128 {v}; else if constexpr (CT::Integer32) - return v.UnpackLo(); + return V128 {v.Pack()}; else if constexpr (CT::Integer64) - return v.UnpackLo().UnpackLo(); + return V128 {v.Pack().Pack()}; else LANGULUS_ERROR("Unsupported conversion"); } @@ -154,13 +154,13 @@ namespace Langulus::SIMD::Inner // Converting to 32bit integer // if constexpr (CT::Integer8) - return v.Pack().Pack(); + return V128 {v.UnpackLo().UnpackLo()}; else if constexpr (CT::Integer16) - return v.Pack(); + return V128 {v.UnpackLo()}; else if constexpr (CT::Integer32) - return v; + return V128 {v}; else if constexpr (CT::Integer64) - return v.UnpackLo(); + return V128 {v.Pack()}; else LANGULUS_ERROR("Unsupported conversion"); } @@ -169,13 +169,13 @@ namespace Langulus::SIMD::Inner // Converting to 64bit integer // if constexpr (CT::Integer8) - return v.Pack().Pack().Pack(); + return V128 {v.UnpackLo().UnpackLo().UnpackLo()}; else if constexpr (CT::Integer16) - return v.Pack().Pack(); + return V128 {v.UnpackLo().UnpackLo()}; else if constexpr (CT::Integer32) - return v.Pack(); + return V128 {v.UnpackLo()}; else if constexpr (CT::Integer64) - return v; + return V128 {v}; else LANGULUS_ERROR("Unsupported conversion"); } diff --git a/source/converters/From256f.hpp b/source/converters/From256f.hpp index 66d59de..bc66682 100644 --- a/source/converters/From256f.hpp +++ b/source/converters/From256f.hpp @@ -19,7 +19,7 @@ namespace Langulus::SIMD::Inner template NOD() LANGULUS(INLINED) auto ConvertFrom256f(CT::SIMD256f auto v) noexcept { if constexpr (CT::Double) - return V256 {simde_mm256_cvtps_pd(v)}; + return V256 {simde_mm256_cvtps_pd(simde_mm256_castps256_ps128(v))}; else if constexpr (CT::Float) return v; else if constexpr (CT::SignedInteger8) { diff --git a/source/converters/From256i.hpp b/source/converters/From256i.hpp index 230d84f..eb239e9 100644 --- a/source/converters/From256i.hpp +++ b/source/converters/From256i.hpp @@ -136,13 +136,13 @@ namespace Langulus::SIMD::Inner // Converting to 8bit integer // if constexpr (CT::Integer8) - return v; + return V256 {v}; else if constexpr (CT::Integer16) - return v.UnpackLo(); + return V256 {v.Pack()}; else if constexpr (CT::Integer32) - return v.UnpackLo().UnpackLo(); + return V256 {v.Pack().Pack()}; else if constexpr (CT::Integer64) - return v.UnpackLo().UnpackLo().UnpackLo(); + return V256 {v.Pack().Pack().Pack()}; else LANGULUS_ERROR("Unsupported conversion"); } @@ -151,13 +151,13 @@ namespace Langulus::SIMD::Inner // Converting to 16bit integer // if constexpr (CT::Integer8) - return v.Pack(); + return V256 {v.UnpackLo()}; else if constexpr (CT::Integer16) - return v; + return V256 {v}; else if constexpr (CT::Integer32) - return v.UnpackLo(); + return V256 {v.Pack()}; else if constexpr (CT::Integer64) - return v.UnpackLo().UnpackLo(); + return V256 {v.Pack().Pack()}; else LANGULUS_ERROR("Unsupported conversion"); } @@ -166,13 +166,13 @@ namespace Langulus::SIMD::Inner // Converting to 32bit integer // if constexpr (CT::Integer8) - return v.Pack().Pack(); + return V256 {v.UnpackLo().UnpackLo()}; else if constexpr (CT::Integer16) - return v.Pack(); + return V256 {v.UnpackLo()}; else if constexpr (CT::Integer32) - return v; + return V256 {v}; else if constexpr (CT::Integer64) - return v.UnpackLo(); + return V256 {v.Pack()}; else LANGULUS_ERROR("Unsupported conversion"); } @@ -181,13 +181,13 @@ namespace Langulus::SIMD::Inner // Converting to 64bit integer // if constexpr (CT::Integer8) - return v.Pack().Pack().Pack(); + return V256 {v.UnpackLo().UnpackLo().UnpackLo()}; else if constexpr (CT::Integer16) - return v.Pack().Pack(); + return V256 {v.UnpackLo().UnpackLo()}; else if constexpr (CT::Integer32) - return v.Pack(); + return V256 {v.UnpackLo()}; else if constexpr (CT::Integer64) - return v; + return V256 {v}; else LANGULUS_ERROR("Unsupported conversion"); } diff --git a/test/Equal/TestEqual-VVB.cpp b/test/Equal/TestEqual-VVB.cpp index 5b71f5b..76b780c 100644 --- a/test/Equal/TestEqual-VVB.cpp +++ b/test/Equal/TestEqual-VVB.cpp @@ -10,11 +10,11 @@ /// TEMPLATE_TEST_CASE("Vector == Vector -> Bool", "[compare]" + , VECTORS_ALL(5) , VECTORS_ALL(9) , VECTORS_ALL(2) , VECTORS_ALL(3) , VECTORS_ALL(4) - , VECTORS_ALL(5) , VECTORS_ALL(8) , VECTORS_ALL(16) , VECTORS_ALL(17) diff --git a/test/TestDiv.cpp b/test/TestDiv.cpp index 0e7e4cf..7431142 100644 --- a/test/TestDiv.cpp +++ b/test/TestDiv.cpp @@ -26,9 +26,9 @@ void ControlDiv(const Vector& lhsArray, const Vector& rhsArray, } TEMPLATE_TEST_CASE("Divide", "[divide]" + , VECTORS_ALL(2) , NUMBERS_ALL() , VECTORS_ALL(1) - , VECTORS_ALL(2) , VECTORS_ALL(3) , VECTORS_ALL(4) , VECTORS_ALL(5) @@ -49,6 +49,12 @@ TEMPLATE_TEST_CASE("Divide", "[divide]" InitOne(x, 1); InitOne(y, -5); } + else for (int i = 0; i < CountOf; ++i) { + if (x[i] == 0) + x[i] = 1; + if (y[i] == 0) + y[i] = 1; + } WHEN("Divided") { ControlDiv(x, y, rCheck); @@ -108,7 +114,7 @@ TEMPLATE_TEST_CASE("Divide", "[divide]" if constexpr (not CT::Vector) InitOne(x, 0); else - DenseCast(x.mArray[0]) = {}; + x[0] = 0; REQUIRE_THROWS(ControlDiv(y, x, rCheck)); REQUIRE_THROWS(SIMD::Divide(y, x, r));