diff --git a/source/Common.hpp b/source/Common.hpp index 80f3e16..b386346 100644 --- a/source/Common.hpp +++ b/source/Common.hpp @@ -342,10 +342,6 @@ namespace Langulus::SIMD return V128 {simde_mm_unpacklo_epi32(m, Zero())}; else if constexpr (CT::UnsignedInteger32) return V128 {simde_mm_unpacklo_epi32(m, Zero())}; - else if constexpr (CT::SignedInteger64) - return V128 {simde_mm_unpacklo_epi64(m, Zero())}; - else if constexpr (CT::UnsignedInteger64) - return V128 {simde_mm_unpacklo_epi64(m, Zero())}; else LANGULUS_ERROR("Can't unpack this type"); } @@ -364,10 +360,6 @@ namespace Langulus::SIMD return V128 {simde_mm_unpackhi_epi32(m, Zero())}; else if constexpr (CT::UnsignedInteger32) return V128 {simde_mm_unpackhi_epi32(m, Zero())}; - else if constexpr (CT::SignedInteger64) - return V128 {simde_mm_unpackhi_epi64(m, Zero())}; - else if constexpr (CT::UnsignedInteger64) - return V128 {simde_mm_unpackhi_epi64(m, Zero())}; else LANGULUS_ERROR("Can't unpack this type"); } @@ -400,7 +392,7 @@ namespace Langulus::SIMD // (or vpermq if you want) auto ordered = simde_mm_permute_pd( simde_mm_castps_pd(combined), - SIMDE_MM_SHUFFLE(1, 0, 1, 0) + SIMDE_MM_SHUFFLE(0, 0, 0, 1) ); return V128 {simde_mm_castpd_si128(ordered)}; #endif @@ -421,7 +413,7 @@ namespace Langulus::SIMD // (or vpermq if you want) auto ordered = simde_mm_permute_pd( simde_mm_castps_pd(combined), - SIMDE_MM_SHUFFLE(1, 0, 1, 0) + SIMDE_MM_SHUFFLE(0, 0, 0, 1) ); return V128 {simde_mm_castpd_si128(ordered)}; #endif @@ -529,21 +521,17 @@ namespace Langulus::SIMD NOD() LANGULUS(INLINED) auto UnpackLo() const noexcept { if constexpr (CT::SignedInteger8) - return V256 {simde_mm256_unpacklo_epi8 (m, Zero())}; + return V256 {simde_mm256_cvtepi8_epi16 (simde_mm256_extractf128_si256(m, 0))}; else if constexpr (CT::UnsignedInteger8) - return V256 {simde_mm256_unpacklo_epi8 (m, Zero())}; + return V256 {simde_mm256_cvtepu8_epi16 (simde_mm256_extractf128_si256(m, 0))}; else if constexpr (CT::SignedInteger16) - return V256 {simde_mm256_unpacklo_epi16(m, Zero())}; + return V256 {simde_mm256_cvtepi16_epi32(simde_mm256_extractf128_si256(m, 0))}; else if constexpr (CT::UnsignedInteger16) - return V256 {simde_mm256_unpacklo_epi16(m, Zero())}; + return V256 {simde_mm256_cvtepu16_epi32(simde_mm256_extractf128_si256(m, 0))}; else if constexpr (CT::SignedInteger32) - return V256 {simde_mm256_unpacklo_epi32(m, Zero())}; + return V256 {simde_mm256_cvtepi32_epi64(simde_mm256_extractf128_si256(m, 0))}; else if constexpr (CT::UnsignedInteger32) - return V256 {simde_mm256_unpacklo_epi32(m, Zero())}; - else if constexpr (CT::SignedInteger64) - return V256 {simde_mm256_unpacklo_epi64(m, Zero())}; - else if constexpr (CT::UnsignedInteger64) - return V256 {simde_mm256_unpacklo_epi64(m, Zero())}; + return V256 {simde_mm256_cvtepu32_epi64(simde_mm256_extractf128_si256(m, 0))}; else LANGULUS_ERROR("Can't unpack this type"); } @@ -551,21 +539,17 @@ namespace Langulus::SIMD NOD() LANGULUS(INLINED) auto UnpackHi() const noexcept { if constexpr (CT::SignedInteger8) - return V256 {simde_mm256_unpackhi_epi8 (m, Zero())}; + return V256 {simde_mm256_cvtepi8_epi16 (simde_mm256_extractf128_si256(m, 1))}; else if constexpr (CT::UnsignedInteger8) - return V256 {simde_mm256_unpackhi_epi8 (m, Zero())}; + return V256 {simde_mm256_cvtepu8_epi16 (simde_mm256_extractf128_si256(m, 1))}; else if constexpr (CT::SignedInteger16) - return V256 {simde_mm256_unpackhi_epi16(m, Zero())}; + return V256 {simde_mm256_cvtepi16_epi32(simde_mm256_extractf128_si256(m, 1))}; else if constexpr (CT::UnsignedInteger16) - return V256 {simde_mm256_unpackhi_epi16(m, Zero())}; + return V256 {simde_mm256_cvtepu16_epi32(simde_mm256_extractf128_si256(m, 1))}; else if constexpr (CT::SignedInteger32) - return V256 {simde_mm256_unpackhi_epi32(m, Zero())}; + return V256 {simde_mm256_cvtepi32_epi64(simde_mm256_extractf128_si256(m, 1))}; else if constexpr (CT::UnsignedInteger32) - return V256 {simde_mm256_unpackhi_epi32(m, Zero())}; - else if constexpr (CT::SignedInteger64) - return V256 {simde_mm256_unpackhi_epi64(m, Zero())}; - else if constexpr (CT::UnsignedInteger64) - return V256 {simde_mm256_unpackhi_epi64(m, Zero())}; + return V256 {simde_mm256_cvtepu32_epi64(simde_mm256_extractf128_si256(m, 1))}; else LANGULUS_ERROR("Can't unpack this type"); } @@ -574,10 +558,16 @@ namespace Langulus::SIMD auto Pack() const noexcept { if constexpr (CT::Integer8) return *this; - else if constexpr (CT::SignedInteger16) - return V256 {simde_mm256_packs_epi16 (m, Zero())}; - else if constexpr (CT::UnsignedInteger16) - return V256 {simde_mm256_packus_epi16(m, Zero())}; + else if constexpr (CT::SignedInteger16) { + const auto lo_lane = simde_mm256_castsi256_si128(m); + const auto hi_lane = simde_mm256_extracti128_si256(m, 1); + return V128 {simde_mm_packs_epi16(lo_lane, hi_lane)}; + } + else if constexpr (CT::UnsignedInteger16) { + const auto lo_lane = simde_mm256_castsi256_si128(m); + const auto hi_lane = simde_mm256_extracti128_si256(m, 1); + return V128 {simde_mm_packus_epi16(lo_lane, hi_lane)}; + } else if constexpr (CT::SignedInteger32) return V256 {simde_mm256_packs_epi32 (m, Zero())}; else if constexpr (CT::UnsignedInteger32) @@ -740,10 +730,6 @@ namespace Langulus::SIMD return V512 {simde_mm512_unpacklo_epi32(m, Zero())}; else if constexpr (CT::UnsignedInteger32) return V512 {simde_mm512_unpacklo_epi32(m, Zero())}; - else if constexpr (CT::SignedInteger64) - return V512 {simde_mm512_unpacklo_epi64(m, Zero())}; - else if constexpr (CT::UnsignedInteger64) - return V512 {simde_mm512_unpacklo_epi64(m, Zero())}; else LANGULUS_ERROR("Can't unpack this type"); } @@ -762,10 +748,6 @@ namespace Langulus::SIMD return V512 {simde_mm512_unpackhi_epi32(m, Zero())}; else if constexpr (CT::UnsignedInteger32) return V512 {simde_mm512_unpackhi_epi32(m, Zero())}; - else if constexpr (CT::SignedInteger64) - return V512 {simde_mm512_unpackhi_epi64(m, Zero())}; - else if constexpr (CT::UnsignedInteger64) - return V512 {simde_mm512_unpackhi_epi64(m, Zero())}; else LANGULUS_ERROR("Can't unpack this type"); } @@ -774,10 +756,20 @@ namespace Langulus::SIMD auto Pack() const noexcept { if constexpr (CT::Integer8) return *this; - else if constexpr (CT::SignedInteger16) - return V512 {simde_mm512_packs_epi16 (m, Zero())}; - else if constexpr (CT::UnsignedInteger16) - return V512 {simde_mm512_packus_epi16(m, Zero())}; + else if constexpr (CT::SignedInteger16) { + const auto lo_lane = simde_mm512_castsi512_si256(m); + const auto hi_lane = simde_mm512_extracti256_si512(m, 1); + return V256 { + simde_mm256_packs_epi16(lo_lane, hi_lane) + }.Pack(); + } + else if constexpr (CT::UnsignedInteger16) { + const auto lo_lane = simde_mm512_castsi512_si256(m); + const auto hi_lane = simde_mm512_extracti256_si512(m, 1); + return V256 { + simde_mm256_packus_epi16(lo_lane, hi_lane) + }.Pack(); + } else if constexpr (CT::SignedInteger32) return V512 {simde_mm512_packs_epi32 (m, Zero())}; else if constexpr (CT::UnsignedInteger32) @@ -1319,12 +1311,12 @@ namespace Langulus::SIMD ); const auto C1 = simde_mm_or_si128( - simde_mm_shuffle_epi8(simde_mm256_extracti128_si256(low.m, 0), maskLo), - simde_mm_shuffle_epi8(simde_mm256_extracti128_si256(low.m, 1), maskHi) + simde_mm_shuffle_epi8(simde_mm256_extracti128_si256(low, 0), maskLo), + simde_mm_shuffle_epi8(simde_mm256_extracti128_si256(low, 1), maskHi) ); const auto C2 = simde_mm_or_si128( - simde_mm_shuffle_epi8(simde_mm256_extracti128_si256(high.m, 0), maskLo), - simde_mm_shuffle_epi8(simde_mm256_extracti128_si256(high.m, 1), maskHi) + simde_mm_shuffle_epi8(simde_mm256_extracti128_si256(high, 0), maskLo), + simde_mm_shuffle_epi8(simde_mm256_extracti128_si256(high, 1), maskHi) ); const auto C = simde_mm256_inserti128_si256(simde_mm256_setzero_si256(), C1, 0); @@ -1368,8 +1360,8 @@ namespace Langulus::SIMD ); const auto r = simde_mm_or_si128( - simde_mm_shuffle_epi8(low.m, maskLo), - simde_mm_shuffle_epi8(high.m, maskHi) + simde_mm_shuffle_epi8(low, maskLo), + simde_mm_shuffle_epi8(high, maskHi) ); #endif @@ -1403,12 +1395,12 @@ namespace Langulus::SIMD ); const auto C1 = simde_mm_or_si128( - simde_mm_shuffle_epi8(simde_mm256_extracti128_si256(low.m, 0), maskLo), - simde_mm_shuffle_epi8(simde_mm256_extracti128_si256(low.m, 1), maskHi) + simde_mm_shuffle_epi8(simde_mm256_extracti128_si256(low, 0), maskLo), + simde_mm_shuffle_epi8(simde_mm256_extracti128_si256(low, 1), maskHi) ); const auto C2 = simde_mm_or_si128( - simde_mm_shuffle_epi8(simde_mm256_extracti128_si256(high.m, 0), maskLo), - simde_mm_shuffle_epi8(simde_mm256_extracti128_si256(high.m, 1), maskHi) + simde_mm_shuffle_epi8(simde_mm256_extracti128_si256(high, 0), maskLo), + simde_mm_shuffle_epi8(simde_mm256_extracti128_si256(high, 1), maskHi) ); auto C = simde_mm256_inserti128_si256(simde_mm256_setzero_si256(), C1, 0); diff --git a/source/SetGet.hpp b/source/SetGet.hpp index 8c58dfc..201b0c3 100644 --- a/source/SetGet.hpp +++ b/source/SetGet.hpp @@ -32,7 +32,7 @@ namespace Langulus::SIMD if constexpr (REVERSE) { if constexpr (MAXS - IDX - 1 < S) { - LANGULUS_SIMD_VERBOSE("Setting [", IDX, "] to ", DenseCast(values[MAXS - IDX - 1])); + LANGULUS_SIMD_VERBOSE("Setting [", IDX, "] to ", values[MAXS - IDX - 1]); return reinterpret_cast(values[MAXS - IDX - 1]); } else { @@ -42,7 +42,7 @@ namespace Langulus::SIMD } else { if constexpr (IDX < S) { - LANGULUS_SIMD_VERBOSE("Setting [", IDX, "] to ", DenseCast(values[IDX])); + LANGULUS_SIMD_VERBOSE("Setting [", IDX, "] to ", values[IDX]); return reinterpret_cast(values[IDX]); } else { @@ -67,15 +67,13 @@ namespace Langulus::SIMD #if LANGULUS_SIMD(128BIT) if constexpr (CHUNK == 16) { - LANGULUS_SIMD_VERBOSE("Setting 128bit register from ", - CountOf, " elements"); - - if constexpr (CT::Integer8) return V128 {simde_mm_setr_epi8 (Get(values)...)}; - else if constexpr (CT::Integer16) return V128 {simde_mm_setr_epi16(Get(values)...)}; - else if constexpr (CT::Integer32) return V128 {simde_mm_setr_epi32(Get(values)...)}; - else if constexpr (CT::Integer64) return V128 {simde_mm_set_epi64x(Get(values)...)}; - else if constexpr (CT::Float) return V128 {simde_mm_setr_ps (Get(values)...)}; - else if constexpr (CT::Double) return V128 {simde_mm_setr_pd (Get(values)...)}; + LANGULUS_SIMD_VERBOSE("Setting 128bit register from ", CountOf, " elements"); + if constexpr (CT::Integer8) return V128 {simde_mm_setr_epi8 (Get(values)...)}; + else if constexpr (CT::Integer16) return V128 {simde_mm_setr_epi16(Get(values)...)}; + else if constexpr (CT::Integer32) return V128 {simde_mm_setr_epi32(Get(values)...)}; + else if constexpr (CT::Integer64) return V128 {simde_mm_set_epi64x(Get(values)...)}; + else if constexpr (CT::Float) return V128 {simde_mm_setr_ps (Get(values)...)}; + else if constexpr (CT::Double) return V128 {simde_mm_setr_pd (Get(values)...)}; else LANGULUS_ERROR("Can't set 16-byte package"); } else @@ -84,23 +82,18 @@ namespace Langulus::SIMD #if LANGULUS_SIMD(256BIT) if constexpr (CHUNK == 32) { LANGULUS_SIMD_VERBOSE("Setting 256bit register from ", CountOf, " elements"); - - if constexpr (CT::Integer8) return V256 {simde_mm256_setr_epi8 (Get(values)...)}; + if constexpr (CT::Integer8) return V256 {simde_mm256_setr_epi8 (Get(values)...)}; else if constexpr (CT::Integer16) return V256 {simde_mm256_setr_epi16(Get(values)...)}; - else if constexpr (CT::Integer32) return V256 {simde_mm256_setr_epi32(Get(values)...)}; + else if constexpr (CT::Integer32) return V256 {simde_mm256_setr_epi32(Get(values)...)}; else if constexpr (CT::Integer64) { // This hits a very nasty MSVC compiler bug // The workaround is temporary, hopefully // https://stackoverflow.com/questions/77191454 #if LANGULUS_COMPILER(MSVC) and LANGULUS_BITNESS() == 32 and (LANGULUS_SIMD(AVX) or LANGULUS_SIMD(AVX2)) alignas(32) const int64_t temp[4] {Get(values)...}; - return V256 { - simde_mm256_load_si256(reinterpret_cast(temp)) - }; + return V256 {simde_mm256_load_si256(reinterpret_cast(temp))}; #else - return V256 { - simde_mm256_setr_epi64x(Get(values)...) - }; + return V256 {simde_mm256_setr_epi64x(Get(values)...)}; #endif } else if constexpr (CT::Float) return V256 {simde_mm256_setr_ps(Get(values)...)}; @@ -113,13 +106,12 @@ namespace Langulus::SIMD #if LANGULUS_SIMD(512BIT) if constexpr (CHUNK == 64) { LANGULUS_SIMD_VERBOSE("Setting 512bit register from ", CountOf, " elements"); - - if constexpr (CT::Integer8) return V512 {simde_mm512_setr_epi8 (Get(values)...)}; + if constexpr (CT::Integer8) return V512 {simde_mm512_setr_epi8 (Get(values)...)}; else if constexpr (CT::Integer16) return V512 {simde_mm512_setr_epi16(Get(values)...)}; else if constexpr (CT::Integer32) return V512 {simde_mm512_setr_epi32(Get(values)...)}; - else if constexpr (CT::Integer64) return V512 {simde_mm512_setr_epi64(Get(values)...)}; + else if constexpr (CT::Integer64) return V512 {simde_mm512_setr_epi64(Get(values)...)}; else if constexpr (CT::Float) return V512 {simde_mm512_setr_ps (Get(values)...)}; - else if constexpr (CT::Double) return V512 {simde_mm512_setr_pd (Get(values)...)}; + else if constexpr (CT::Double) return V512 {simde_mm512_setr_pd (Get(values)...)}; else LANGULUS_ERROR("Can't set 64-byte package"); } else @@ -141,11 +133,8 @@ namespace Langulus::SIMD auto Set(const FROM& values) noexcept { using T = TypeOf; constexpr auto S = CountOf; - constexpr auto MaxS = CHUNK / sizeof(Decay); - static_assert((CT::Dense and MaxS > S) - or (CT::Sparse and MaxS >= S), - "S should be smaller (or equal if sparse) than MaxS - use load otherwise"); - + constexpr auto MaxS = CHUNK / sizeof(T); + static_assert(MaxS > S, "S should be smaller than MaxS - use load otherwise"); return Inner::Set(Sequence::Expand, values); } diff --git a/source/Store.hpp b/source/Store.hpp index ed09924..901e3bd 100644 --- a/source/Store.hpp +++ b/source/Store.hpp @@ -34,7 +34,11 @@ namespace Langulus::SIMD } else if constexpr (CT::SIMD256) { if constexpr (CT::Integer8) to = simde_mm256_movemask_epi8(from); - else if constexpr (CT::Integer16) to = simde_mm256_movemask_epi8(simde_mm256_packs_epi16(from, from.Zero())); + else if constexpr (CT::Integer16) { + const auto lo_lane = simde_mm256_castsi256_si128(from); + const auto hi_lane = simde_mm256_extracti128_si256(from, 1); + to = simde_mm_movemask_epi8(simde_mm_packs_epi16(lo_lane, hi_lane)); + } else if constexpr (CT::Integer32) to = simde_mm256_movemask_ps (simde_mm256_castsi256_ps(from)); else if constexpr (CT::Integer64) to = simde_mm256_movemask_pd (simde_mm256_castsi256_pd(from)); else if constexpr (CT::Float) to = simde_mm256_movemask_ps (from); @@ -43,7 +47,11 @@ namespace Langulus::SIMD } else if constexpr (CT::SIMD512) { if constexpr (CT::Integer8) to = simde_mm512_movemask_epi8(from); - else if constexpr (CT::Integer16) to = simde_mm512_movemask_epi8(simde_mm512_packs_epi16(from, from.Zero())); + else if constexpr (CT::Integer16) { + const auto lo_lane = simde_mm512_castsi512_si256(from); + const auto hi_lane = simde_mm512_extracti256_si512(from, 1); + StoreSIMD(simde_mm256_packs_epi16(lo_lane, hi_lane), to); + } else if constexpr (CT::Integer32) to = simde_mm512_movemask_ps (simde_mm512_castsi256_ps(from)); else if constexpr (CT::Integer64) to = simde_mm512_movemask_pd (simde_mm512_castsi256_pd(from)); else if constexpr (CT::Float) to = simde_mm512_movemask_ps (from); diff --git a/source/binary/Max.hpp b/source/binary/Max.hpp index bc874b9..63f9400 100644 --- a/source/binary/Max.hpp +++ b/source/binary/Max.hpp @@ -30,12 +30,12 @@ namespace Langulus::SIMD (void)lhs; (void)rhs; if constexpr (CT::SIMD128) { - if constexpr (CT::SignedInteger8) return R {simde_mm_max_epi8(lhs, rhs)}; - else if constexpr (CT::UnsignedInteger8) return R {simde_mm_max_epu8(lhs, rhs)}; - else if constexpr (CT::SignedInteger16) return R {simde_mm_max_epi16(lhs, rhs)}; - else if constexpr (CT::UnsignedInteger16) return R {simde_mm_max_epu16(lhs, rhs)}; - else if constexpr (CT::SignedInteger32) return R {simde_mm_max_epi32(lhs, rhs)}; - else if constexpr (CT::UnsignedInteger32) return R {simde_mm_max_epu32(lhs, rhs)}; + if constexpr (CT::SignedInteger8) return R {simde_mm_max_epi8 (lhs, rhs)}; + else if constexpr (CT::UnsignedInteger8) return R {simde_mm_max_epu8 (lhs, rhs)}; + else if constexpr (CT::SignedInteger16) return R {simde_mm_max_epi16 (lhs, rhs)}; + else if constexpr (CT::UnsignedInteger16) return R {simde_mm_max_epu16 (lhs, rhs)}; + else if constexpr (CT::SignedInteger32) return R {simde_mm_max_epi32 (lhs, rhs)}; + else if constexpr (CT::UnsignedInteger32) return R {simde_mm_max_epu32 (lhs, rhs)}; else if constexpr (CT::SignedInteger64) { #if LANGULUS_SIMD(AVX512) return R {_mm_max_epi64(lhs, rhs)}; @@ -50,13 +50,13 @@ namespace Langulus::SIMD return Unsupported{}; #endif } - else if constexpr (CT::Float) return R {simde_mm_max_ps(lhs, rhs)}; - else if constexpr (CT::Double) return R {simde_mm_max_pd(lhs, rhs)}; + else if constexpr (CT::Float) return R {simde_mm_max_ps (lhs, rhs)}; + else if constexpr (CT::Double) return R {simde_mm_max_pd (lhs, rhs)}; else LANGULUS_ERROR("Unsupported type for 16-byte package"); } else if constexpr (CT::SIMD256) { - if constexpr (CT::SignedInteger8) return R {simde_mm256_max_epi8(lhs, rhs)}; - else if constexpr (CT::UnsignedInteger8) return R {simde_mm256_max_epu8(lhs, rhs)}; + if constexpr (CT::SignedInteger8) return R {simde_mm256_max_epi8 (lhs, rhs)}; + else if constexpr (CT::UnsignedInteger8) return R {simde_mm256_max_epu8 (lhs, rhs)}; else if constexpr (CT::SignedInteger16) return R {simde_mm256_max_epi16(lhs, rhs)}; else if constexpr (CT::UnsignedInteger16) return R {simde_mm256_max_epu16(lhs, rhs)}; else if constexpr (CT::SignedInteger32) return R {simde_mm256_max_epi32(lhs, rhs)}; @@ -75,21 +75,21 @@ namespace Langulus::SIMD return Unsupported{}; #endif } - else if constexpr (CT::Float) return R {simde_mm256_max_ps(lhs, rhs)}; - else if constexpr (CT::Double) return R {simde_mm256_max_pd(lhs, rhs)}; + else if constexpr (CT::Float) return R {simde_mm256_max_ps (lhs, rhs)}; + else if constexpr (CT::Double) return R {simde_mm256_max_pd (lhs, rhs)}; else LANGULUS_ERROR("Unsupported type for 32-byte package"); } else if constexpr (CT::SIMD512) { - if constexpr (CT::SignedInteger8) return R {simde_mm512_max_epi8(lhs, rhs)}; - else if constexpr (CT::UnsignedInteger8) return R {simde_mm512_max_epu8(lhs, rhs)}; + if constexpr (CT::SignedInteger8) return R {simde_mm512_max_epi8 (lhs, rhs)}; + else if constexpr (CT::UnsignedInteger8) return R {simde_mm512_max_epu8 (lhs, rhs)}; else if constexpr (CT::SignedInteger16) return R {simde_mm512_max_epi16(lhs, rhs)}; else if constexpr (CT::UnsignedInteger16) return R {simde_mm512_max_epu16(lhs, rhs)}; else if constexpr (CT::SignedInteger32) return R {simde_mm512_max_epi32(lhs, rhs)}; else if constexpr (CT::UnsignedInteger32) return R {simde_mm512_max_epu32(lhs, rhs)}; else if constexpr (CT::SignedInteger64) return R {simde_mm512_max_epi64(lhs, rhs)}; else if constexpr (CT::UnsignedInteger64) return R {simde_mm512_max_epu64(lhs, rhs)}; - else if constexpr (CT::Float) return R {simde_mm512_max_ps(lhs, rhs)}; - else if constexpr (CT::Double) return R {simde_mm512_max_pd(lhs, rhs)}; + else if constexpr (CT::Float) return R {simde_mm512_max_ps (lhs, rhs)}; + else if constexpr (CT::Double) return R {simde_mm512_max_pd (lhs, rhs)}; else LANGULUS_ERROR("Unsupported type for 64-byte package"); } else LANGULUS_ERROR("Unsupported type"); diff --git a/source/binary/Min.hpp b/source/binary/Min.hpp index 6d8770d..72d9ab9 100644 --- a/source/binary/Min.hpp +++ b/source/binary/Min.hpp @@ -30,12 +30,12 @@ namespace Langulus::SIMD (void)lhs; (void)rhs; if constexpr (CT::SIMD128) { - if constexpr (CT::SignedInteger8) return R {simde_mm_min_epi8(lhs, rhs)}; - else if constexpr (CT::UnsignedInteger8) return R {simde_mm_min_epu8(lhs, rhs)}; - else if constexpr (CT::SignedInteger16) return R {simde_mm_min_epi16(lhs, rhs)}; - else if constexpr (CT::UnsignedInteger16) return R {simde_mm_min_epu16(lhs, rhs)}; - else if constexpr (CT::SignedInteger32) return R {simde_mm_min_epi32(lhs, rhs)}; - else if constexpr (CT::UnsignedInteger32) return R {simde_mm_min_epu32(lhs, rhs)}; + if constexpr (CT::SignedInteger8) return R {simde_mm_min_epi8 (lhs, rhs)}; + else if constexpr (CT::UnsignedInteger8) return R {simde_mm_min_epu8 (lhs, rhs)}; + else if constexpr (CT::SignedInteger16) return R {simde_mm_min_epi16 (lhs, rhs)}; + else if constexpr (CT::UnsignedInteger16) return R {simde_mm_min_epu16 (lhs, rhs)}; + else if constexpr (CT::SignedInteger32) return R {simde_mm_min_epi32 (lhs, rhs)}; + else if constexpr (CT::UnsignedInteger32) return R {simde_mm_min_epu32 (lhs, rhs)}; else if constexpr (CT::SignedInteger64) { #if LANGULUS_SIMD(AVX512) return R {_mm_min_epi64(lhs, rhs)}; @@ -50,13 +50,13 @@ namespace Langulus::SIMD return Unsupported{}; #endif } - else if constexpr (CT::Float) return R {simde_mm_min_ps(lhs, rhs)}; - else if constexpr (CT::Double) return R {simde_mm_min_pd(lhs, rhs)}; + else if constexpr (CT::Float) return R {simde_mm_min_ps (lhs, rhs)}; + else if constexpr (CT::Double) return R {simde_mm_min_pd (lhs, rhs)}; else LANGULUS_ERROR("Unsupported type for 16-byte package"); } else if constexpr (CT::SIMD256) { - if constexpr (CT::SignedInteger8) return R {simde_mm256_min_epi8(lhs, rhs)}; - else if constexpr (CT::UnsignedInteger8) return R {simde_mm256_min_epu8(lhs, rhs)}; + if constexpr (CT::SignedInteger8) return R {simde_mm256_min_epi8 (lhs, rhs)}; + else if constexpr (CT::UnsignedInteger8) return R {simde_mm256_min_epu8 (lhs, rhs)}; else if constexpr (CT::SignedInteger16) return R {simde_mm256_min_epi16(lhs, rhs)}; else if constexpr (CT::UnsignedInteger16) return R {simde_mm256_min_epu16(lhs, rhs)}; else if constexpr (CT::SignedInteger32) return R {simde_mm256_min_epi32(lhs, rhs)}; @@ -75,21 +75,21 @@ namespace Langulus::SIMD return Unsupported{}; #endif } - else if constexpr (CT::Float) return R {simde_mm256_min_ps(lhs, rhs)}; - else if constexpr (CT::Double) return R {simde_mm256_min_pd(lhs, rhs)}; + else if constexpr (CT::Float) return R {simde_mm256_min_ps (lhs, rhs)}; + else if constexpr (CT::Double) return R {simde_mm256_min_pd (lhs, rhs)}; else LANGULUS_ERROR("Unsupported type for 32-byte package"); } else if constexpr (CT::SIMD512) { - if constexpr (CT::SignedInteger8) return R {simde_mm512_min_epi8(lhs, rhs)}; - else if constexpr (CT::UnsignedInteger8) return R {simde_mm512_min_epu8(lhs, rhs)}; + if constexpr (CT::SignedInteger8) return R {simde_mm512_min_epi8 (lhs, rhs)}; + else if constexpr (CT::UnsignedInteger8) return R {simde_mm512_min_epu8 (lhs, rhs)}; else if constexpr (CT::SignedInteger16) return R {simde_mm512_min_epi16(lhs, rhs)}; else if constexpr (CT::UnsignedInteger16) return R {simde_mm512_min_epu16(lhs, rhs)}; else if constexpr (CT::SignedInteger32) return R {simde_mm512_min_epi32(lhs, rhs)}; else if constexpr (CT::UnsignedInteger32) return R {simde_mm512_min_epu32(lhs, rhs)}; else if constexpr (CT::SignedInteger64) return R {simde_mm512_min_epi64(lhs, rhs)}; else if constexpr (CT::UnsignedInteger64) return R {simde_mm512_min_epu64(lhs, rhs)}; - else if constexpr (CT::Float) return R {simde_mm512_min_ps(lhs, rhs)}; - else if constexpr (CT::Double) return R {simde_mm512_min_pd(lhs, rhs)}; + else if constexpr (CT::Float) return R {simde_mm512_min_ps (lhs, rhs)}; + else if constexpr (CT::Double) return R {simde_mm512_min_pd (lhs, rhs)}; else LANGULUS_ERROR("Unsupported type for 64-byte package"); } else LANGULUS_ERROR("Unsupported type"); diff --git a/source/binary/ShiftLeft.hpp b/source/binary/ShiftLeft.hpp index f1fcd9f..055b171 100644 --- a/source/binary/ShiftLeft.hpp +++ b/source/binary/ShiftLeft.hpp @@ -39,48 +39,23 @@ namespace Langulus::SIMD if constexpr (CT::SIMD128) { if constexpr (CT::Integer8) { - #if LANGULUS_SIMD(512BIT) - // Optimal - return R {lgls_pack_epi16( - simde_mm_sllv_epi16(lhs.UnpackLo(), rhs.UnpackLo()), - simde_mm_sllv_epi16(lhs.UnpackHi(), rhs.UnpackHi()) - )}; - #elif LANGULUS_SIMD(256BIT) - // Not optimal, must be unpacked once more for AVX2 - auto lhs32_1 = lhs.UnpackLo().UnpackLo(); - auto lhs32_2 = lhs.UnpackHi().UnpackHi(); - auto rhs32_1 = rhs.UnpackLo().UnpackLo(); - auto rhs32_2 = rhs.UnpackHi().UnpackHi(); - - lhs32_1 = simde_mm_sllv_epi32(lhs32_1, rhs32_1); - lhs32_2 = simde_mm_sllv_epi32(lhs32_2, rhs32_2); - auto lo = lgls_pack_epi32(lhs32_1, lhs32_2); - - lhs32_1 = simde_mm_sllv_epi32(lhs32_1, rhs32_1); - lhs32_2 = simde_mm_sllv_epi32(lhs32_2, rhs32_2); - auto hi = lgls_pack_epi32(lhs32_1, lhs32_2); - + #if LANGULUS_SIMD(256BIT) or LANGULUS_SIMD(512BIT) + auto lo = ShiftLeftSIMD(lhs.UnpackLo(), rhs.UnpackLo()); + auto hi = ShiftLeftSIMD(lhs.UnpackHi(), rhs.UnpackHi()); return R {lgls_pack_epi16(lo, hi)}; #else - return Unsupported{}; //TODO + return Unsupported {}; //TODO #endif } else if constexpr (CT::Integer16) { #if LANGULUS_SIMD(512BIT) - // Optimal return simde_mm_sllv_epi16(lhs, rhs); #elif LANGULUS_SIMD(256BIT) - // Not optimal, must be unpacked for AVX2 - auto lhs32_1 = lhs.UnpackLo(); - auto lhs32_2 = lhs.UnpackHi(); - auto rhs32_1 = rhs.UnpackLo(); - auto rhs32_2 = rhs.UnpackHi(); - - lhs32_1 = simde_mm_sllv_epi32(lhs32_1, rhs32_1); - lhs32_2 = simde_mm_sllv_epi32(lhs32_2, rhs32_2); - return R {lgls_pack_epi32(lhs32_1, lhs32_2)}; + auto lo = ShiftLeftSIMD(lhs.UnpackLo(), rhs.UnpackLo()); + auto hi = ShiftLeftSIMD(lhs.UnpackHi(), rhs.UnpackHi()); + return R {lgls_pack_epi32(lo, hi)}; #else - return Unsupported{}; //TODO + return Unsupported {}; //TODO #endif } else if constexpr (CT::Integer32) { @@ -101,53 +76,17 @@ namespace Langulus::SIMD } else if constexpr (CT::SIMD256) { if constexpr (CT::Integer8) { - auto lhs1 = lhs.UnpackLo(); - auto lhs2 = lhs.UnpackHi(); - auto rhs1 = rhs.UnpackLo(); - auto rhs2 = rhs.UnpackHi(); - - #if LANGULUS_SIMD(512BIT) - // Optimal - lhs1 = simde_mm256_sllv_epi16(lhs1, rhs1); - lhs2 = simde_mm256_sllv_epi16(lhs2, rhs2); - return R {lgls_pack_epi16(lhs1, lhs2)}; - #else - // Not optimal, must be unpacked once more for AVX2 - auto lhs32_1 = rhs1.UnpackLo(); - auto lhs32_2 = rhs1.UnpackLo(); - auto rhs32_1 = rhs1.UnpackLo(); - auto rhs32_2 = rhs1.UnpackLo(); - - lhs32_1 = simde_mm256_sllv_epi32(lhs32_1, rhs32_1); - lhs32_2 = simde_mm256_sllv_epi32(lhs32_2, rhs32_2); - lhs1 = lgls_pack_epi32(lhs32_1, lhs32_2); - - lhs32_1 = rhs2.UnpackLo(); - lhs32_2 = rhs2.UnpackLo(); - rhs32_1 = rhs2.UnpackLo(); - rhs32_2 = rhs2.UnpackLo(); - - lhs32_1 = simde_mm256_sllv_epi32(lhs32_1, rhs32_1); - lhs32_2 = simde_mm256_sllv_epi32(lhs32_2, rhs32_2); - lhs2 = lgls_pack_epi32(lhs32_1, lhs32_2); - - return R {lgls_pack_epi16(lhs1, lhs2)}; - #endif + auto lo = ShiftLeftSIMD(lhs.UnpackLo(), rhs.UnpackLo()); + auto hi = ShiftLeftSIMD(lhs.UnpackHi(), rhs.UnpackHi()); + return R {lgls_pack_epi16(lo, hi)}; } else if constexpr (CT::Integer16) { #if LANGULUS_SIMD(512BIT) - // Optimal return simde_mm256_sllv_epi16(lhs, rhs); #else - // Not optimal, must be unpacked for AVX2 - auto lhs1 = lhs.UnpackLo(); - auto lhs2 = lhs.UnpackHi(); - auto rhs1 = rhs.UnpackLo(); - auto rhs2 = rhs.UnpackHi(); - - lhs1 = simde_mm256_sllv_epi32(lhs1, rhs1); - lhs2 = simde_mm256_sllv_epi32(lhs2, rhs2); - return R {lgls_pack_epi32(lhs1, lhs2)}; + auto lo = ShiftLeftSIMD(lhs.UnpackLo(), rhs.UnpackLo()); + auto hi = ShiftLeftSIMD(lhs.UnpackHi(), rhs.UnpackHi()); + return R {lgls_pack_epi32(lo, hi)}; #endif } else if constexpr (CT::Integer32) return R {simde_mm256_sllv_epi32(lhs, rhs)}; @@ -156,14 +95,9 @@ namespace Langulus::SIMD } else if constexpr (CT::SIMD512) { if constexpr (CT::Integer8) { - auto lhs1 = lhs.UnpackLo(); - auto lhs2 = lhs.UnpackHi(); - auto rhs1 = rhs.UnpackLo(); - auto rhs2 = rhs.UnpackHi(); - - lhs1 = simde_mm512_sllv_epi16(lhs1, rhs1); - lhs2 = simde_mm512_sllv_epi16(lhs2, rhs2); - return R {lgls_pack_epi16(lhs1, lhs2)}; + auto lo = ShiftLeftSIMD(lhs.UnpackLo(), rhs.UnpackLo()); + auto hi = ShiftLeftSIMD(lhs.UnpackHi(), rhs.UnpackHi()); + return R {lgls_pack_epi16(lo, hi)}; } else if constexpr (CT::Integer16) return R {simde_mm512_sllv_epi16(lhs, rhs)}; else if constexpr (CT::Integer32) return R {simde_mm512_sllv_epi32(lhs, rhs)}; diff --git a/source/binary/ShiftRight.hpp b/source/binary/ShiftRight.hpp index e45c6db..09e03fc 100644 --- a/source/binary/ShiftRight.hpp +++ b/source/binary/ShiftRight.hpp @@ -39,48 +39,23 @@ namespace Langulus::SIMD if constexpr (CT::SIMD128) { if constexpr (CT::Integer8) { - #if LANGULUS_SIMD(512BIT) - // Optimal - return R {lgls_pack_epi16( - simde_mm_srlv_epi16(lhs.UnpackLo(), rhs.UnpackLo()), - simde_mm_srlv_epi16(lhs.UnpackHi(), rhs.UnpackHi()) - )}; - #elif LANGULUS_SIMD(256BIT) - // Not optimal, must be unpacked once more for AVX2 - auto lhs32_1 = lhs.UnpackLo().UnpackLo(); - auto lhs32_2 = lhs.UnpackHi().UnpackHi(); - auto rhs32_1 = rhs.UnpackLo().UnpackLo(); - auto rhs32_2 = rhs.UnpackHi().UnpackHi(); - - lhs32_1 = simde_mm_srlv_epi32(lhs32_1, rhs32_1); - lhs32_2 = simde_mm_srlv_epi32(lhs32_2, rhs32_2); - auto lo = lgls_pack_epi32(lhs32_1, lhs32_2); - - lhs32_1 = simde_mm_srlv_epi32(lhs32_1, rhs32_1); - lhs32_2 = simde_mm_srlv_epi32(lhs32_2, rhs32_2); - auto hi = lgls_pack_epi32(lhs32_1, lhs32_2); - + #if LANGULUS_SIMD(256BIT) or LANGULUS_SIMD(512BIT) + auto lo = ShiftRightSIMD(lhs.UnpackLo(), rhs.UnpackLo()); + auto hi = ShiftRightSIMD(lhs.UnpackHi(), rhs.UnpackHi()); return R {lgls_pack_epi16(lo, hi)}; #else - return Unsupported{}; //TODO + return Unsupported {}; //TODO #endif } else if constexpr (CT::Integer16) { #if LANGULUS_SIMD(512BIT) - // Optimal - return R {simde_mm_srlv_epi16(lhs, rhs)}; + return simde_mm_srlv_epi16(lhs, rhs); #elif LANGULUS_SIMD(256BIT) - // Not optimal, must be unpacked for AVX2 - auto lhs32_1 = lhs.UnpackLo(); - auto lhs32_2 = lhs.UnpackHi(); - auto rhs32_1 = rhs.UnpackLo(); - auto rhs32_2 = rhs.UnpackHi(); - - lhs32_1 = simde_mm_srlv_epi32(lhs32_1, rhs32_1); - lhs32_2 = simde_mm_srlv_epi32(lhs32_2, rhs32_2); - return R {lgls_pack_epi32(lhs32_1, lhs32_2)}; + auto lo = ShiftRightSIMD(lhs.UnpackLo(), rhs.UnpackLo()); + auto hi = ShiftRightSIMD(lhs.UnpackHi(), rhs.UnpackHi()); + return R {lgls_pack_epi32(lo, hi)}; #else - return Unsupported{}; //TODO + return Unsupported {}; //TODO #endif } else if constexpr (CT::Integer32) { @@ -101,53 +76,17 @@ namespace Langulus::SIMD } else if constexpr (CT::SIMD256) { if constexpr (CT::Integer8) { - auto lhs1 = lhs.UnpackLo(); - auto lhs2 = lhs.UnpackHi(); - auto rhs1 = rhs.UnpackLo(); - auto rhs2 = rhs.UnpackHi(); - - #if LANGULUS_SIMD(512BIT) - // Optimal - lhs1 = simde_mm256_srlv_epi16(lhs1, rhs1); - lhs2 = simde_mm256_srlv_epi16(lhs2, rhs2); - return R {lgls_pack_epi16(lhs1, lhs2)}; - #else - // Not optimal, must be unpacked once more for AVX2 - auto lhs32_1 = rhs1.UnpackLo(); - auto lhs32_2 = rhs1.UnpackLo(); - auto rhs32_1 = rhs1.UnpackLo(); - auto rhs32_2 = rhs1.UnpackLo(); - - lhs32_1 = simde_mm256_srlv_epi32(lhs32_1, rhs32_1); - lhs32_2 = simde_mm256_srlv_epi32(lhs32_2, rhs32_2); - lhs1 = lgls_pack_epi32(lhs32_1, lhs32_2); - - lhs32_1 = rhs2.UnpackLo(); - lhs32_2 = rhs2.UnpackLo(); - rhs32_1 = rhs2.UnpackLo(); - rhs32_2 = rhs2.UnpackLo(); - - lhs32_1 = simde_mm256_srlv_epi32(lhs32_1, rhs32_1); - lhs32_2 = simde_mm256_srlv_epi32(lhs32_2, rhs32_2); - lhs2 = lgls_pack_epi32(lhs32_1, lhs32_2); - - return R {lgls_pack_epi16(lhs1, lhs2)}; - #endif + auto lo = ShiftRightSIMD(lhs.UnpackLo(), rhs.UnpackLo()); + auto hi = ShiftRightSIMD(lhs.UnpackHi(), rhs.UnpackHi()); + return R {lgls_pack_epi16(lo, hi)}; } else if constexpr (CT::Integer16) { #if LANGULUS_SIMD(512BIT) - // Optimal - return simde_mm256_srlv_epi16(lhs, rhs); + return simde_mm256_sllv_epi16(lhs, rhs); #else - // Not optimal, must be unpacked for AVX2 - auto lhs1 = lhs.UnpackLo(); - auto lhs2 = lhs.UnpackHi(); - auto rhs1 = rhs.UnpackLo(); - auto rhs2 = rhs.UnpackHi(); - - lhs1 = simde_mm256_srlv_epi32(lhs1, rhs1); - lhs2 = simde_mm256_srlv_epi32(lhs2, rhs2); - return R {lgls_pack_epi32(lhs1, lhs2)}; + auto lo = ShiftRightSIMD(lhs.UnpackLo(), rhs.UnpackLo()); + auto hi = ShiftRightSIMD(lhs.UnpackHi(), rhs.UnpackHi()); + return R {lgls_pack_epi32(lo, hi)}; #endif } else if constexpr (CT::Integer32) return R {simde_mm256_srlv_epi32(lhs, rhs)}; @@ -156,14 +95,9 @@ namespace Langulus::SIMD } else if constexpr (CT::SIMD512) { if constexpr (CT::Integer8) { - auto lhs1 = lhs.UnpackLo(); - auto lhs2 = lhs.UnpackHi(); - auto rhs1 = rhs.UnpackLo(); - auto rhs2 = rhs.UnpackHi(); - - lhs1 = simde_mm512_srlv_epi16(lhs1, rhs1); - lhs2 = simde_mm512_srlv_epi16(lhs2, rhs2); - return R {lgls_pack_epi16(lhs1, lhs2)}; + auto lo = ShiftRightSIMD(lhs.UnpackLo(), rhs.UnpackLo()); + auto hi = ShiftRightSIMD(lhs.UnpackHi(), rhs.UnpackHi()); + return R {lgls_pack_epi16(lo, hi)}; } else if constexpr (CT::Integer16) return R {simde_mm512_srlv_epi16(lhs, rhs)}; else if constexpr (CT::Integer32) return R {simde_mm512_srlv_epi32(lhs, rhs)}; diff --git a/source/unary/Floor.hpp b/source/unary/Floor.hpp index 2909db4..49a08cb 100644 --- a/source/unary/Floor.hpp +++ b/source/unary/Floor.hpp @@ -38,18 +38,18 @@ namespace Langulus::SIMD return Unsupported {}; #else if constexpr (CT::SIMD128) { - if constexpr (CT::Float) return simde_mm_floor_ps (value); - else if constexpr (CT::Double) return simde_mm_floor_pd (value); + if constexpr (CT::Float) return R {simde_mm_floor_ps (value)}; + else if constexpr (CT::Double) return R {simde_mm_floor_pd (value)}; else LANGULUS_ERROR("Unsupported type for 16-byte package"); } else if constexpr (CT::SIMD256) { - if constexpr (CT::Float) return simde_mm256_floor_ps(value); - else if constexpr (CT::Double) return simde_mm256_floor_pd(value); + if constexpr (CT::Float) return R {simde_mm256_floor_ps(value)}; + else if constexpr (CT::Double) return R {simde_mm256_floor_pd(value)}; else LANGULUS_ERROR("Unsupported type for 32-byte package"); } else if constexpr (CT::SIMD512) { - if constexpr (CT::Float) return simde_mm512_floor_ps(value); - else if constexpr (CT::Double) return simde_mm512_floor_pd(value); + if constexpr (CT::Float) return R {simde_mm512_floor_ps(value)}; + else if constexpr (CT::Double) return R {simde_mm512_floor_pd(value)}; else LANGULUS_ERROR("Unsupported type for 64-byte package"); } else LANGULUS_ERROR("Unsupported type"); diff --git a/source/unary/Round.hpp b/source/unary/Round.hpp index fdec97f..276501f 100644 --- a/source/unary/Round.hpp +++ b/source/unary/Round.hpp @@ -31,24 +31,31 @@ namespace Langulus::SIMD "Suboptimal and pointless for whole numbers"); (void)value; - constexpr auto STYLE = SIMDE_MM_FROUND_TO_NEAREST_INT | SIMDE_MM_FROUND_NO_EXC; + #if LANGULUS_COMPILER(CLANG) and LANGULUS(DEBUG) + // WORKAROUND for a Clang bug, see: + // https://github.com/simd-everywhere/simde/issues/1014 + //TODO hopefully it is fixed in the future + return Unsupported {}; + #else + constexpr auto STYLE = SIMDE_MM_FROUND_TO_NEAREST_INT | SIMDE_MM_FROUND_NO_EXC; - if constexpr (CT::SIMD128) { - if constexpr (CT::Float) return R {simde_mm_round_ps(value, STYLE)}; - else if constexpr (CT::Double) return R {simde_mm_round_pd(value, STYLE)}; - else LANGULUS_ERROR("Unsupported type for 16-byte package"); - } - else if constexpr (CT::SIMD256) { - if constexpr (CT::Float) return R {simde_mm256_round_ps(value, STYLE)}; - else if constexpr (CT::Double) return R {simde_mm256_round_pd(value, STYLE)}; - else LANGULUS_ERROR("Unsupported type for 32-byte package"); - } - else if constexpr (CT::SIMD512) { - if constexpr (CT::Float) return R {simde_mm512_roundscale_ps(value, STYLE)}; - else if constexpr (CT::Double) return R {simde_mm512_roundscale_pd(value, STYLE)}; - else LANGULUS_ERROR("Unsupported type for 64-byte package"); - } - else LANGULUS_ERROR("Unsupported type"); + if constexpr (CT::SIMD128) { + if constexpr (CT::Float) return R {simde_mm_round_ps(value, STYLE)}; + else if constexpr (CT::Double) return R {simde_mm_round_pd(value, STYLE)}; + else LANGULUS_ERROR("Unsupported type for 16-byte package"); + } + else if constexpr (CT::SIMD256) { + if constexpr (CT::Float) return R {simde_mm256_round_ps(value, STYLE)}; + else if constexpr (CT::Double) return R {simde_mm256_round_pd(value, STYLE)}; + else LANGULUS_ERROR("Unsupported type for 32-byte package"); + } + else if constexpr (CT::SIMD512) { + if constexpr (CT::Float) return R {simde_mm512_roundscale_ps(value, STYLE)}; + else if constexpr (CT::Double) return R {simde_mm512_roundscale_pd(value, STYLE)}; + else LANGULUS_ERROR("Unsupported type for 64-byte package"); + } + else LANGULUS_ERROR("Unsupported type"); + #endif } /// Get rounded values as constexpr, if possible diff --git a/test/Common.hpp b/test/Common.hpp new file mode 100644 index 0000000..681b682 --- /dev/null +++ b/test/Common.hpp @@ -0,0 +1,61 @@ +/// +/// Langulus::SIMD +/// Copyright (c) 2019 Dimo Markov +/// Part of the Langulus framework, see https://langulus.com +/// +/// SPDX-License-Identifier: MIT +/// + +/// INTENTIONALLY NOT GUARDED +/// Include this file once in each cpp file, after all other headers +#ifdef TWOBLUECUBES_SINGLE_INCLUDE_CATCH_HPP_INCLUDED + #error Catch has been included prior to this header +#endif + +//#define LANGULUS_STD_BENCHMARK + +#define CATCH_CONFIG_ENABLE_BENCHMARKING + +#include "Main.hpp" +#include + + +/// See https://github.com/catchorg/Catch2/blob/devel/docs/tostring.md +CATCH_TRANSLATE_EXCEPTION(::Langulus::Exception const& ex) { + return fmt::format("{}", ex); +} + +namespace Catch { + template<> + struct StringMaker { + static std::string convert(char8_t const& value) { + return std::to_string(static_cast(value)); + } + }; + + template<> + struct StringMaker { + static std::string convert(char16_t const& value) { + return std::to_string(static_cast(value)); + } + }; + + template<> + struct StringMaker { + static std::string convert(wchar_t const& value) { + return std::to_string(static_cast(value)); + } + }; + + template<> + struct StringMaker<::Langulus::Byte> { + static std::string convert(::Langulus::Byte const& value) { + return std::to_string(static_cast(value.mValue)); + } + }; +} + +using timer = Catch::Benchmark::Chronometer; + +template +using uninitialized = Catch::Benchmark::storage_for; \ No newline at end of file diff --git a/test/Equal/TestEqual-VVB.cpp b/test/Equal/TestEqual-VVB.cpp index 405c8ef..5b71f5b 100644 --- a/test/Equal/TestEqual-VVB.cpp +++ b/test/Equal/TestEqual-VVB.cpp @@ -6,17 +6,16 @@ /// SPDX-License-Identifier: MIT /// #include "TestEqual.hpp" -#include /// TEMPLATE_TEST_CASE("Vector == Vector -> Bool", "[compare]" + , VECTORS_ALL(9) , VECTORS_ALL(2) , VECTORS_ALL(3) , VECTORS_ALL(4) , VECTORS_ALL(5) , VECTORS_ALL(8) - , VECTORS_ALL(9) , VECTORS_ALL(16) , VECTORS_ALL(17) , VECTORS_ALL(32) diff --git a/test/Equal/TestEqual-VVM.cpp b/test/Equal/TestEqual-VVM.cpp index c466c54..9c2974f 100644 --- a/test/Equal/TestEqual-VVM.cpp +++ b/test/Equal/TestEqual-VVM.cpp @@ -6,7 +6,6 @@ /// SPDX-License-Identifier: MIT /// #include "TestEqual.hpp" -#include /// diff --git a/test/Equal/TestEqual.hpp b/test/Equal/TestEqual.hpp index 6dac8ef..2290131 100644 --- a/test/Equal/TestEqual.hpp +++ b/test/Equal/TestEqual.hpp @@ -6,7 +6,7 @@ /// SPDX-License-Identifier: MIT /// #pragma once -#include "../Main.hpp" +#include "../Common.hpp" /// Compare two scalars and put result in a bit diff --git a/test/Multiply/TestMul-VS.cpp b/test/Multiply/TestMul-VS.cpp index c23fee9..27c96c5 100644 --- a/test/Multiply/TestMul-VS.cpp +++ b/test/Multiply/TestMul-VS.cpp @@ -6,7 +6,6 @@ /// SPDX-License-Identifier: MIT /// #include "TestMul.hpp" -#include TEMPLATE_TEST_CASE("Vector * Scalar", "[multiply]" diff --git a/test/Multiply/TestMul-VV.cpp b/test/Multiply/TestMul-VV.cpp index ec6ec3d..b3cb37f 100644 --- a/test/Multiply/TestMul-VV.cpp +++ b/test/Multiply/TestMul-VV.cpp @@ -6,7 +6,6 @@ /// SPDX-License-Identifier: MIT /// #include "TestMul.hpp" -#include TEMPLATE_TEST_CASE("Vector * Vector", "[multiply]" diff --git a/test/Multiply/TestMul.hpp b/test/Multiply/TestMul.hpp index 79b346d..b07b4df 100644 --- a/test/Multiply/TestMul.hpp +++ b/test/Multiply/TestMul.hpp @@ -6,7 +6,7 @@ /// SPDX-License-Identifier: MIT /// #pragma once -#include "../Main.hpp" +#include "../Common.hpp" /// Scalar * Scalar (either dense or sparse, wrapped or not) diff --git a/test/TestAdd.cpp b/test/TestAdd.cpp index f94a9cf..23d20ef 100644 --- a/test/TestAdd.cpp +++ b/test/TestAdd.cpp @@ -5,22 +5,15 @@ /// /// SPDX-License-Identifier: MIT /// -#include "Main.hpp" -#include +#include "Common.hpp" -using timer = Catch::Benchmark::Chronometer; -template -using uninitialized = Catch::Benchmark::storage_for; - -template -LANGULUS(INLINED) +template LANGULUS(INLINED) void ControlAdd(const LHS& lhs, const RHS& rhs, OUT& out) noexcept { - DenseCast(out) = DenseCast(lhs) + DenseCast(rhs); + out = lhs + rhs; } -template -LANGULUS(INLINED) +template LANGULUS(INLINED) void ControlAdd(const Vector& lhsArray, const Vector& rhsArray, Vector& out) noexcept { auto r = out.mArray; auto lhs = lhsArray.mArray; @@ -57,11 +50,7 @@ TEMPLATE_TEST_CASE("Add", "[add]" WHEN("Added") { ControlAdd(x, y, rCheck); - - if constexpr (CT::Vector) - SIMD::Add(x.mArray, y.mArray, r.mArray); - else - SIMD::Add(x, y, r); + SIMD::Add(x, y, r); REQUIRE(r == rCheck); @@ -100,10 +89,7 @@ TEMPLATE_TEST_CASE("Add", "[add]" some nr(meter.runs()); meter.measure([&](int i) { - if constexpr (CT::Vector) - SIMD::Add(nx[i].mArray, ny[i].mArray, nr[i].mArray); - else - SIMD::Add(nx[i], ny[i], nr[i]); + SIMD::Add(nx[i], ny[i], nr[i]); }); }; #endif @@ -111,11 +97,7 @@ TEMPLATE_TEST_CASE("Add", "[add]" WHEN("Added in reverse") { ControlAdd(y, x, rCheck); - - if constexpr (CT::Vector) - SIMD::Add(y.mArray, x.mArray, r.mArray); - else - SIMD::Add(y, x, r); + SIMD::Add(y, x, r); REQUIRE(r == rCheck); } diff --git a/test/TestCeil.cpp b/test/TestCeil.cpp index b3456fb..9ffbd12 100644 --- a/test/TestCeil.cpp +++ b/test/TestCeil.cpp @@ -5,17 +5,12 @@ /// /// SPDX-License-Identifier: MIT /// -#include "Main.hpp" -#include +#include "Common.hpp" -using timer = Catch::Benchmark::Chronometer; - -template -using uninitialized = Catch::Benchmark::storage_for; template LANGULUS(INLINED) void ControlCeil(const VAL& val, OUT& out) noexcept { - DenseCast(out) = std::ceil(DenseCast(val)); + out = std::ceil(val); } template LANGULUS(INLINED) @@ -94,11 +89,7 @@ TEMPLATE_TEST_CASE("Ceil", "[ceil]" WHEN("Ceiled") { ControlCeil(x, rCheck); - - if constexpr (CT::Vector) - SIMD::Ceil(x.mArray, r.mArray); - else - SIMD::Ceil(x, r); + SIMD::Ceil(x, r); REQUIRE(r == rCheck); @@ -125,10 +116,7 @@ TEMPLATE_TEST_CASE("Ceil", "[ceil]" some nr(meter.runs()); meter.measure([&](int i) { - if constexpr (CT::Vector) - SIMD::Ceil(nx[i].mArray, nr[i].mArray); - else - SIMD::Ceil(nx[i], nr[i]); + SIMD::Ceil(nx[i], nr[i]); }); }; #endif diff --git a/test/TestDiv.cpp b/test/TestDiv.cpp index 65ad987..0e7e4cf 100644 --- a/test/TestDiv.cpp +++ b/test/TestDiv.cpp @@ -5,20 +5,17 @@ /// /// SPDX-License-Identifier: MIT /// -#include "Main.hpp" -#include +#include "Common.hpp" -template -LANGULUS(INLINED) + +template LANGULUS(INLINED) void ControlDiv(const LHS& lhs, const RHS& rhs, OUT& out) { - if (rhs == Decay {0}) + if (rhs == RHS {0}) LANGULUS_THROW(DivisionByZero, "Division by zero"); - out = lhs / rhs; } -template -LANGULUS(INLINED) +template LANGULUS(INLINED) void ControlDiv(const Vector& lhsArray, const Vector& rhsArray, Vector& out) { auto r = out.mArray; auto lhs = lhsArray.mArray; @@ -55,11 +52,7 @@ TEMPLATE_TEST_CASE("Divide", "[divide]" WHEN("Divided") { ControlDiv(x, y, rCheck); - - if constexpr (CT::Vector) - SIMD::Divide(x.mArray, y.mArray, r.mArray); - else - SIMD::Divide(x, y, r); + SIMD::Divide(x, y, r); REQUIRE(r == rCheck); @@ -98,10 +91,7 @@ TEMPLATE_TEST_CASE("Divide", "[divide]" some nr(meter.runs()); meter.measure([&](int i) { - if constexpr (CT::Vector) - SIMD::Divide(nx[i].mArray, ny[i].mArray, nr[i].mArray); - else - SIMD::Divide(nx[i], ny[i], nr[i]); + SIMD::Divide(nx[i], ny[i], nr[i]); }); }; #endif @@ -109,11 +99,7 @@ TEMPLATE_TEST_CASE("Divide", "[divide]" WHEN("Divided in reverse") { ControlDiv(y, x, rCheck); - - if constexpr (CT::Vector) - SIMD::Divide(y.mArray, x.mArray, r.mArray); - else - SIMD::Divide(y, x, r); + SIMD::Divide(y, x, r); REQUIRE(r == rCheck); } @@ -125,11 +111,7 @@ TEMPLATE_TEST_CASE("Divide", "[divide]" DenseCast(x.mArray[0]) = {}; REQUIRE_THROWS(ControlDiv(y, x, rCheck)); - - if constexpr (CT::Vector) - REQUIRE_THROWS(SIMD::Divide(y.mArray, x.mArray, r.mArray)); - else - REQUIRE_THROWS(SIMD::Divide(y, x, r)); + REQUIRE_THROWS(SIMD::Divide(y, x, r)); } } } \ No newline at end of file diff --git a/test/TestFloor.cpp b/test/TestFloor.cpp index 0af2cdc..b308faf 100644 --- a/test/TestFloor.cpp +++ b/test/TestFloor.cpp @@ -5,13 +5,8 @@ /// /// SPDX-License-Identifier: MIT /// -#include "Main.hpp" -#include +#include "Common.hpp" -using timer = Catch::Benchmark::Chronometer; - -template -using uninitialized = Catch::Benchmark::storage_for; template LANGULUS(INLINED) void ControlFloor(const VAL& val, OUT& out) noexcept { @@ -94,11 +89,7 @@ TEMPLATE_TEST_CASE("Floor", "[floor]" WHEN("Floored") { ControlFloor(x, rCheck); - - if constexpr (CT::Vector) - SIMD::Floor(x.mArray, r.mArray); - else - SIMD::Floor(x, r); + SIMD::Floor(x, r); REQUIRE(r == rCheck); @@ -125,10 +116,7 @@ TEMPLATE_TEST_CASE("Floor", "[floor]" some nr(meter.runs()); meter.measure([&](int i) { - if constexpr (CT::Vector) - SIMD::Floor(nx[i].mArray, nr[i].mArray); - else - SIMD::Floor(nx[i], nr[i]); + SIMD::Floor(nx[i], nr[i]); }); }; #endif diff --git a/test/TestPack.cpp b/test/TestPack.cpp new file mode 100644 index 0000000..1562ce0 --- /dev/null +++ b/test/TestPack.cpp @@ -0,0 +1,145 @@ +/// +/// Langulus::SIMD +/// Copyright (c) 2019 Dimo Markov +/// Part of the Langulus framework, see https://langulus.com +/// +/// SPDX-License-Identifier: MIT +/// +#include "Common.hpp" + + +TEMPLATE_TEST_CASE("Pack 64 bits", "[pack]", ::std::int64_t, ::std::uint64_t) { + using T = TestType; + +#if LANGULUS_SIMD(128BIT) + GIVEN("A 128bit sequence of numbers") { + const T n[2] {4, 8}; + const auto r = SIMD::Load<0>(n); + + WHEN("Packed once") { + const auto r_packed = r.Pack(); + + if constexpr (CT::Signed) { + ::std::int32_t result[2]; + SIMD::Store(r_packed, result); + + for (signed i = 0; i < 2; ++i) + REQUIRE(result[i] == (i + 1) * 4); + } + else { + ::std::uint32_t result[2]; + SIMD::Store(r_packed, result); + + for (unsigned i = 0; i < 2; ++i) + REQUIRE(result[i] == (i + 1) * 4); + } + } + + WHEN("Packed twice") { + const auto r_packed = r.Pack().Pack(); + + if constexpr (CT::Signed) { + ::std::int16_t result[2]; + SIMD::Store(r_packed, result); + + for (signed i = 0; i < 2; ++i) + REQUIRE(result[i] == (i + 1) * 4); + } + else { + ::std::uint16_t result[2]; + SIMD::Store(r_packed, result); + + for (unsigned i = 0; i < 2; ++i) + REQUIRE(result[i] == (i + 1) * 4); + } + } + + WHEN("Packed thrice") { + const auto r_packed = r.Pack().Pack().Pack(); + + if constexpr (CT::Signed) { + ::std::int8_t result[2]; + SIMD::Store(r_packed, result); + + for (signed i = 0; i < 2; ++i) + REQUIRE(result[i] == (i + 1) * 4); + } + else { + ::std::uint8_t result[2]; + SIMD::Store(r_packed, result); + + for (unsigned i = 0; i < 2; ++i) + REQUIRE(result[i] == (i + 1) * 4); + } + } + } +#endif + +#if LANGULUS_SIMD(256BIT) + GIVEN("A 256bit sequence of numbers") { + const T n[4] {4, 8, 12, 16}; + const auto r = SIMD::Load<0>(n); + + WHEN("Packed once") { + const auto r_packed = r.Pack(); + + if constexpr (CT::Signed) { + ::std::int32_t result[4]; + SIMD::Store(r_packed, result); + + for (signed i = 0; i < 4; ++i) + REQUIRE(result[i] == (i + 1) * 4); + } + else { + ::std::uint32_t result[4]; + SIMD::Store(r_packed, result); + + for (unsigned i = 0; i < 4; ++i) + REQUIRE(result[i] == (i + 1) * 4); + } + } + + WHEN("Packed twice") { + const auto r_packed = r.Pack().Pack(); + + if constexpr (CT::Signed) { + ::std::int16_t result[4]; + SIMD::Store(r_packed, result); + + for (signed i = 0; i < 4; ++i) + REQUIRE(result[i] == (i + 1) * 4); + } + else { + ::std::uint16_t result[4]; + SIMD::Store(r_packed, result); + + for (unsigned i = 0; i < 4; ++i) + REQUIRE(result[i] == (i + 1) * 4); + } + } + + WHEN("Packed thrice") { + const auto r_packed = r.Pack().Pack().Pack(); + + if constexpr (CT::Signed) { + ::std::int8_t result[4]; + SIMD::Store(r_packed, result); + + for (signed i = 0; i < 4; ++i) + REQUIRE(result[i] == (i + 1) * 4); + } + else { + ::std::uint8_t result[4]; + SIMD::Store(r_packed, result); + + for (unsigned i = 0; i < 4; ++i) + REQUIRE(result[i] == (i + 1) * 4); + } + } + } +#endif + +#if LANGULUS_SIMD(512BIT) + TODO(); +#endif +} \ No newline at end of file diff --git a/test/TestPow.cpp b/test/TestPow.cpp index 1df29eb..0b8fbbb 100644 --- a/test/TestPow.cpp +++ b/test/TestPow.cpp @@ -5,8 +5,7 @@ /// /// SPDX-License-Identifier: MIT /// -#include "Main.hpp" -#include +#include "Common.hpp" template NOD() LANGULUS(INLINED) @@ -83,11 +82,7 @@ TEMPLATE_TEST_CASE("Power", "[power]" WHEN("Raised to a power") { ControlPow(x, y, rCheck); - - if constexpr (CT::Vector) - SIMD::Power(x.mArray, y.mArray, r.mArray); - else - SIMD::Power(x, y, r); + SIMD::Power(x, y, r); REQUIRE(r == rCheck); @@ -126,10 +121,7 @@ TEMPLATE_TEST_CASE("Power", "[power]" some nr(meter.runs()); meter.measure([&](int i) { - if constexpr (CT::Vector) - SIMD::Power(nx[i].mArray, ny[i].mArray, nr[i].mArray); - else - SIMD::Power(nx[i], ny[i], nr[i]); + SIMD::Power(nx[i], ny[i], nr[i]); }); }; #endif @@ -137,11 +129,7 @@ TEMPLATE_TEST_CASE("Power", "[power]" WHEN("Raise to a power in reverse") { ControlPow(y, x, rCheck); - - if constexpr (CT::Vector) - SIMD::Power(y.mArray, x.mArray, r.mArray); - else - SIMD::Power(y, x, r); + SIMD::Power(y, x, r); REQUIRE(r == rCheck); } diff --git a/test/TestShiftLeft.cpp b/test/TestShiftLeft.cpp index 5026734..f4c82d8 100644 --- a/test/TestShiftLeft.cpp +++ b/test/TestShiftLeft.cpp @@ -5,74 +5,37 @@ /// /// SPDX-License-Identifier: MIT /// -#include "Main.hpp" -#include - -namespace Catch { - template<> - struct StringMaker { - static std::string convert(char8_t const& value) { - return std::to_string(static_cast(value)); - } - }; - - template<> - struct StringMaker { - static std::string convert(char16_t const& value) { - return std::to_string(static_cast(value)); - } - }; +#include "Common.hpp" - template<> - struct StringMaker { - static std::string convert(wchar_t const& value) { - return std::to_string(static_cast(value)); - } - }; - - template<> - struct StringMaker<::Langulus::Byte> { - static std::string convert(::Langulus::Byte const& value) { - return std::to_string(static_cast(value.mValue)); - } - }; -} -using timer = Catch::Benchmark::Chronometer; - -template -using uninitialized = Catch::Benchmark::storage_for; - -template -LANGULUS(INLINED) +template LANGULUS(INLINED) void ControlSL(const LHS& lhs, const RHS& rhs, OUT& out) noexcept { - static_assert(CT::IntegerX, Decay>, "Can only shift integers"); - // Well defined condition in SIMD calls, that is otherwise - // undefined behavior by C++ standard - out = rhs < Decay {sizeof(Decay) * 8} and rhs >= 0 + static_assert(CT::IntegerX, "Can only shift integers"); + // Well defined condition in SIMD calls, that is otherwise + // undefined behavior by C++ standard + out = rhs < RHS {sizeof(RHS) * 8} and rhs >= 0 ? lhs << rhs : 0; } -template -LANGULUS(INLINED) +template LANGULUS(INLINED) void ControlSL(const Vector& lhsArray, const Vector& rhsArray, Vector& out) noexcept { - static_assert(CT::IntegerX, Decay>, "Can only shift integers"); + static_assert(CT::IntegerX, "Can only shift integers"); auto r = out.mArray; auto lhs = lhsArray.mArray; auto rhs = rhsArray.mArray; const auto lhsEnd = lhs + C; while (lhs != lhsEnd) - ControlSL(*lhs++, *rhs++, *r++); + ControlSL(*(lhs++), *(rhs++), *(r++)); } TEMPLATE_TEST_CASE("Shift left", "[shift]" + , VECTORS_INT(8) , NUMBERS_INT() , VECTORS_INT(1) , VECTORS_INT(2) , VECTORS_INT(3) , VECTORS_INT(4) , VECTORS_INT(5) - , VECTORS_INT(8) , VECTORS_INT(9) , VECTORS_INT(16) , VECTORS_INT(17) @@ -92,11 +55,7 @@ TEMPLATE_TEST_CASE("Shift left", "[shift]" WHEN("Shifted left") { ControlSL(x, y, rCheck); - - if constexpr (CT::Vector) - SIMD::ShiftLeft(x.mArray, y.mArray, r.mArray); - else - SIMD::ShiftLeft(x, y, r); + SIMD::ShiftLeft(x, y, r); REQUIRE(r == rCheck); @@ -135,10 +94,7 @@ TEMPLATE_TEST_CASE("Shift left", "[shift]" some nr(meter.runs()); meter.measure([&](int i) { - if constexpr (CT::Vector) - SIMD::ShiftLeft(nx[i].mArray, ny[i].mArray, nr[i].mArray); - else - SIMD::ShiftLeft(nx[i], ny[i], nr[i]); + SIMD::ShiftLeft(nx[i], ny[i], nr[i]); }); }; #endif @@ -146,11 +102,7 @@ TEMPLATE_TEST_CASE("Shift left", "[shift]" WHEN("Shifted left in reverse") { ControlSL(y, x, rCheck); - - if constexpr (CT::Vector) - SIMD::ShiftLeft(y.mArray, x.mArray, r.mArray); - else - SIMD::ShiftLeft(y, x, r); + SIMD::ShiftLeft(y, x, r); REQUIRE(r == rCheck); } diff --git a/test/TestShiftRight.cpp b/test/TestShiftRight.cpp index 35b0909..438c293 100644 --- a/test/TestShiftRight.cpp +++ b/test/TestShiftRight.cpp @@ -5,56 +5,21 @@ /// /// SPDX-License-Identifier: MIT /// -#include "Main.hpp" -#include - -namespace Catch { - template<> - struct StringMaker { - static std::string convert(char8_t const& value) { - return std::to_string(static_cast(value)); - } - }; - - template<> - struct StringMaker { - static std::string convert(char16_t const& value) { - return std::to_string(static_cast(value)); - } - }; +#include "Common.hpp" - template<> - struct StringMaker { - static std::string convert(wchar_t const& value) { - return std::to_string(static_cast(value)); - } - }; - template<> - struct StringMaker<::Langulus::Byte> { - static std::string convert(::Langulus::Byte const& value) { - return std::to_string(static_cast(value.mValue)); - } - }; -} - -using timer = Catch::Benchmark::Chronometer; - -template -using uninitialized = Catch::Benchmark::storage_for; - -template -LANGULUS(INLINED) void ControlSR(const LHS& lhs, const RHS& rhs, OUT& out) noexcept { - static_assert(CT::IntegerX, Decay>, "Can only shift integers"); - // Well defined condition in SIMD calls, that is otherwise - // undefined behavior by C++ standard - out = rhs < Decay {sizeof(Decay) * 8} and rhs >= 0 +template LANGULUS(INLINED) +void ControlSR(const LHS& lhs, const RHS& rhs, OUT& out) noexcept { + static_assert(CT::IntegerX, "Can only shift integers"); + // Well defined condition in SIMD calls, that is otherwise + // undefined behavior by C++ standard + out = rhs < RHS {sizeof(RHS) * 8} and rhs >= 0 ? lhs >> rhs : 0; } -template -LANGULUS(INLINED) void ControlSR(const Vector& lhsArray, const Vector& rhsArray, Vector& out) noexcept { - static_assert(CT::IntegerX, Decay>, "Can only shift integers"); +template LANGULUS(INLINED) +void ControlSR(const Vector& lhsArray, const Vector& rhsArray, Vector& out) noexcept { + static_assert(CT::IntegerX, "Can only shift integers"); auto r = out.mArray; auto lhs = lhsArray.mArray; auto rhs = rhsArray.mArray; @@ -90,11 +55,7 @@ TEMPLATE_TEST_CASE("Shift right", "[shift]" WHEN("Shifted right") { ControlSR(x, y, rCheck); - - if constexpr (CT::Vector) - SIMD::ShiftRight(x.mArray, y.mArray, r.mArray); - else - SIMD::ShiftRight(x, y, r); + SIMD::ShiftRight(x, y, r); REQUIRE(r == rCheck); @@ -133,10 +94,7 @@ TEMPLATE_TEST_CASE("Shift right", "[shift]" some nr(meter.runs()); meter.measure([&](int i) { - if constexpr (CT::Vector) - SIMD::ShiftRight(nx[i].mArray, ny[i].mArray, nr[i].mArray); - else - SIMD::ShiftRight(nx[i], ny[i], nr[i]); + SIMD::ShiftRight(nx[i], ny[i], nr[i]); }); }; #endif @@ -144,11 +102,7 @@ TEMPLATE_TEST_CASE("Shift right", "[shift]" WHEN("Shifted right in reverse") { ControlSR(y, x, rCheck); - - if constexpr (CT::Vector) - SIMD::ShiftRight(y.mArray, x.mArray, r.mArray); - else - SIMD::ShiftRight(y, x, r); + SIMD::ShiftRight(y, x, r); REQUIRE(r == rCheck); } diff --git a/test/TestStrangeMSVCBug.cpp b/test/TestStrangeMSVCBug.cpp index c9a7a4e..f1c998d 100644 --- a/test/TestStrangeMSVCBug.cpp +++ b/test/TestStrangeMSVCBug.cpp @@ -5,8 +5,7 @@ /// /// SPDX-License-Identifier: MIT /// -#include "Main.hpp" -#include +#include "Common.hpp" #if LANGULUS_SIMD(256BIT) diff --git a/test/TestSub.cpp b/test/TestSub.cpp index 23d73ba..0f16da5 100644 --- a/test/TestSub.cpp +++ b/test/TestSub.cpp @@ -5,21 +5,16 @@ /// /// SPDX-License-Identifier: MIT /// -#include "Main.hpp" -#include +#include "Common.hpp" -using timer = Catch::Benchmark::Chronometer; -template -using uninitialized = Catch::Benchmark::storage_for; - -template -LANGULUS(INLINED) void ControlSub(const LHS& lhs, const RHS& rhs, OUT& out) noexcept { +template LANGULUS(INLINED) +void ControlSub(const LHS& lhs, const RHS& rhs, OUT& out) noexcept { out = lhs - rhs; } -template -LANGULUS(INLINED) void ControlSub(const Vector& lhsArray, const Vector& rhsArray, Vector& out) noexcept { +template LANGULUS(INLINED) +void ControlSub(const Vector& lhsArray, const Vector& rhsArray, Vector& out) noexcept { auto r = out.mArray; auto lhs = lhsArray.mArray; auto rhs = rhsArray.mArray; @@ -55,11 +50,7 @@ TEMPLATE_TEST_CASE("Subtract", "[subtract]" WHEN("Subtracted") { ControlSub(x, y, rCheck); - - if constexpr (CT::Vector) - SIMD::Subtract(x.mArray, y.mArray, r.mArray); - else - SIMD::Subtract(x, y, r); + SIMD::Subtract(x, y, r); REQUIRE(r == rCheck); @@ -98,10 +89,7 @@ TEMPLATE_TEST_CASE("Subtract", "[subtract]" some nr(meter.runs()); meter.measure([&](int i) { - if constexpr (CT::Vector) - SIMD::Subtract(nx[i].mArray, ny[i].mArray, nr[i].mArray); - else - SIMD::Subtract(nx[i], ny[i], nr[i]); + SIMD::Subtract(nx[i], ny[i], nr[i]); }); }; #endif @@ -109,11 +97,7 @@ TEMPLATE_TEST_CASE("Subtract", "[subtract]" WHEN("Subtracted in reverse") { ControlSub(y, x, rCheck); - - if constexpr (CT::Vector) - SIMD::Subtract(y.mArray, x.mArray, r.mArray); - else - SIMD::Subtract(y, x, r); + SIMD::Subtract(y, x, r); REQUIRE(r == rCheck); } diff --git a/test/TestTypeCorrectness.cpp b/test/TestTypeCorrectness.cpp index f2a63c7..eefe41f 100644 --- a/test/TestTypeCorrectness.cpp +++ b/test/TestTypeCorrectness.cpp @@ -5,9 +5,8 @@ /// /// SPDX-License-Identifier: MIT /// -#include "Main.hpp" -#include -#include +#include "Common.hpp" + using Vec2u8 = Vector<::std::uint8_t, 2>; using Vec2u16 = Vector<::std::uint16_t, 2>; diff --git a/test/TestUnpack.cpp b/test/TestUnpack.cpp new file mode 100644 index 0000000..1829f23 --- /dev/null +++ b/test/TestUnpack.cpp @@ -0,0 +1,262 @@ +/// +/// Langulus::SIMD +/// Copyright (c) 2019 Dimo Markov +/// Part of the Langulus framework, see https://langulus.com +/// +/// SPDX-License-Identifier: MIT +/// +#include "Common.hpp" + + +TEMPLATE_TEST_CASE("Unpack 8 bits", "[unpack]", ::std::int8_t, ::std::uint8_t) { + using T = TestType; + +#if LANGULUS_SIMD(128BIT) + GIVEN("A 128bit sequence of numbers") { + const T n[16] {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16}; + const auto r = SIMD::Load<0>(n); + + WHEN("Unpacked low once") { + const auto r_unpacked = r.UnpackLo(); + + if constexpr (CT::Signed) { + ::std::int16_t result[8]; + SIMD::Store(r_unpacked, result); + + for (signed i = 0; i < 8; ++i) + REQUIRE(result[i] == i + 1); + } + else { + ::std::uint16_t result[8]; + SIMD::Store(r_unpacked, result); + + for (unsigned i = 0; i < 8; ++i) + REQUIRE(result[i] == i + 1); + } + } + + WHEN("Unpacked low twice") { + const auto r_unpacked = r.UnpackLo().UnpackLo(); + + if constexpr (CT::Signed) { + ::std::int32_t result[4]; + SIMD::Store(r_unpacked, result); + + for (signed i = 0; i < 4; ++i) + REQUIRE(result[i] == i + 1); + } + else { + ::std::uint32_t result[4]; + SIMD::Store(r_unpacked, result); + + for (unsigned i = 0; i < 4; ++i) + REQUIRE(result[i] == i + 1); + } + } + + WHEN("Unpacked low thrice") { + const auto r_unpacked = r.UnpackLo().UnpackLo().UnpackLo(); + + if constexpr (CT::Signed) { + ::std::int64_t result[2]; + SIMD::Store(r_unpacked, result); + + for (signed i = 0; i < 2; ++i) + REQUIRE(result[i] == i + 1); + } + else { + ::std::uint64_t result[2]; + SIMD::Store(r_unpacked, result); + + for (unsigned i = 0; i < 2; ++i) + REQUIRE(result[i] == i + 1); + } + } + + WHEN("Unpacked high once") { + const auto r_unpacked = r.UnpackHi(); + + if constexpr (CT::Signed) { + ::std::int16_t result[8]; + SIMD::Store(r_unpacked, result); + + for (signed i = 0; i < 8; ++i) + REQUIRE(result[i] == i + 9); + } + else { + ::std::uint16_t result[8]; + SIMD::Store(r_unpacked, result); + + for (unsigned i = 0; i < 8; ++i) + REQUIRE(result[i] == i + 9); + } + } + + WHEN("Unpacked high twice") { + const auto r_unpacked = r.UnpackHi().UnpackHi(); + + if constexpr (CT::Signed) { + ::std::int32_t result[4]; + SIMD::Store(r_unpacked, result); + + for (signed i = 0; i < 4; ++i) + REQUIRE(result[i] == i + 13); + } + else { + ::std::uint32_t result[4]; + SIMD::Store(r_unpacked, result); + + for (unsigned i = 0; i < 4; ++i) + REQUIRE(result[i] == i + 13); + } + } + + WHEN("Unpacked high thrice") { + const auto r_unpacked = r.UnpackHi().UnpackHi().UnpackHi(); + + if constexpr (CT::Signed) { + ::std::int64_t result[2]; + SIMD::Store(r_unpacked, result); + + for (signed i = 0; i < 2; ++i) + REQUIRE(result[i] == i + 15); + } + else { + ::std::uint64_t result[2]; + SIMD::Store(r_unpacked, result); + + for (unsigned i = 0; i < 2; ++i) + REQUIRE(result[i] == i + 15); + } + } + } +#endif + +#if LANGULUS_SIMD(256BIT) + GIVEN("A 256bit sequence of numbers") { + const T n[32] { + 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, + 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32 + }; + const auto r = SIMD::Load<0>(n); + + WHEN("Unpacked low once") { + const auto r_unpacked = r.UnpackLo(); + + if constexpr (CT::Signed) { + ::std::int16_t result[16]; + SIMD::Store(r_unpacked, result); + + for (signed i = 0; i < 16; ++i) + REQUIRE(result[i] == i + 1); + } + else { + ::std::uint16_t result[16]; + SIMD::Store(r_unpacked, result); + + for (unsigned i = 0; i < 16; ++i) + REQUIRE(result[i] == i + 1); + } + } + + WHEN("Unpacked low twice") { + const auto r_unpacked = r.UnpackLo().UnpackLo(); + + if constexpr (CT::Signed) { + ::std::int32_t result[8]; + SIMD::Store(r_unpacked, result); + + for (signed i = 0; i < 8; ++i) + REQUIRE(result[i] == i + 1); + } + else { + ::std::uint32_t result[8]; + SIMD::Store(r_unpacked, result); + + for (unsigned i = 0; i < 8; ++i) + REQUIRE(result[i] == i + 1); + } + } + + WHEN("Unpacked low thrice") { + const auto r_unpacked = r.UnpackLo().UnpackLo().UnpackLo(); + + if constexpr (CT::Signed) { + ::std::int64_t result[4]; + SIMD::Store(r_unpacked, result); + + for (signed i = 0; i < 4; ++i) + REQUIRE(result[i] == i + 1); + } + else { + ::std::uint64_t result[4]; + SIMD::Store(r_unpacked, result); + + for (unsigned i = 0; i < 4; ++i) + REQUIRE(result[i] == i + 1); + } + } + + WHEN("Unpacked high once") { + const auto r_unpacked = r.UnpackHi(); + + if constexpr (CT::Signed) { + ::std::int16_t result[16]; + SIMD::Store(r_unpacked, result); + + for (signed i = 0; i < 16; ++i) + REQUIRE(result[i] == i + 17); + } + else { + ::std::uint16_t result[16]; + SIMD::Store(r_unpacked, result); + + for (unsigned i = 0; i < 16; ++i) + REQUIRE(result[i] == i + 17); + } + } + + WHEN("Unpacked high twice") { + const auto r_unpacked = r.UnpackHi().UnpackHi(); + + if constexpr (CT::Signed) { + ::std::int32_t result[8]; + SIMD::Store(r_unpacked, result); + + for (signed i = 0; i < 8; ++i) + REQUIRE(result[i] == i + 25); + } + else { + ::std::uint32_t result[8]; + SIMD::Store(r_unpacked, result); + + for (unsigned i = 0; i < 8; ++i) + REQUIRE(result[i] == i + 25); + } + } + + WHEN("Unpacked high thrice") { + const auto r_unpacked = r.UnpackHi().UnpackHi().UnpackHi(); + + if constexpr (CT::Signed) { + ::std::int64_t result[4]; + SIMD::Store(r_unpacked, result); + + for (signed i = 0; i < 4; ++i) + REQUIRE(result[i] == i + 29); + } + else { + ::std::uint64_t result[4]; + SIMD::Store(r_unpacked, result); + + for (unsigned i = 0; i < 4; ++i) + REQUIRE(result[i] == i + 29); + } + } + } +#endif + +#if LANGULUS_SIMD(512BIT) + TODO(); +#endif +} \ No newline at end of file