Skip to content

Commit 3679fa1

Browse files
committed
Merge bitcoin/bitcoin#28893: Fix SSE4.1-related issues
d440f13 crypto: Guard code with `ENABLE_SSE41` macro (Hennadii Stepanov) 6ec1ca7 build: Fix test for SSE4.1 intrinsics (Hennadii Stepanov) Pull request description: 1. Fix the test for SSE4.1 intrinsics during build system configuration, which currently can be false positive, for example, when `CXXFLAGS="-mno-sse4.1"` provided. This PR fixes the test by adding the `_mm_blend_epi16` SSE4.1 function used in our codebase. 2. Guard `sha_x86_shani.cpp` code with `ENABLE_SSE41` macro as it uses the `_mm_blend_epi16` function from the SSE4.1 instruction set. It is possible that SHA-NI is enabled even when SSE4.1 is disabled, which causes compile errors in the master branch. Closes bitcoin/bitcoin#28864. ACKs for top commit: sipa: utACK d440f13 willcl-ark: tACK d440f13 theuni: utACK d440f13 Tree-SHA512: a6e1e8c94e1b94874ff51846815ef445e6135cbdb01b08eb695b3548115f2340dd835ebe53673ae46a553fe6be4815e68d8642c34235dd7af5106c4b7c9ea6f3
2 parents 5f5862f + d440f13 commit 3679fa1

File tree

4 files changed

+11
-10
lines changed

4 files changed

+11
-10
lines changed

configure.ac

+4-3
Original file line numberDiff line numberDiff line change
@@ -482,11 +482,12 @@ TEMP_CXXFLAGS="$CXXFLAGS"
482482
CXXFLAGS="$SSE41_CXXFLAGS $CXXFLAGS"
483483
AC_MSG_CHECKING([for SSE4.1 intrinsics])
484484
AC_COMPILE_IFELSE([AC_LANG_PROGRAM([[
485-
#include <stdint.h>
486485
#include <immintrin.h>
487486
]],[[
488-
__m128i l = _mm_set1_epi32(0);
489-
return _mm_extract_epi32(l, 3);
487+
__m128i a = _mm_set1_epi32(0);
488+
__m128i b = _mm_set1_epi32(1);
489+
__m128i r = _mm_blend_epi16(a, b, 0xFF);
490+
return _mm_extract_epi32(r, 3);
490491
]])],
491492
[ AC_MSG_RESULT([yes]); enable_sse41=yes; AC_DEFINE([ENABLE_SSE41], [1], [Define this symbol to build code that uses SSE4.1 intrinsics]) ],
492493
[ AC_MSG_RESULT([no])]

src/Makefile.am

+5-5
Original file line numberDiff line numberDiff line change
@@ -51,15 +51,15 @@ LIBBITCOIN_CRYPTO = $(LIBBITCOIN_CRYPTO_BASE)
5151
if ENABLE_SSE41
5252
LIBBITCOIN_CRYPTO_SSE41 = crypto/libbitcoin_crypto_sse41.la
5353
LIBBITCOIN_CRYPTO += $(LIBBITCOIN_CRYPTO_SSE41)
54+
if ENABLE_X86_SHANI
55+
LIBBITCOIN_CRYPTO_X86_SHANI = crypto/libbitcoin_crypto_x86_shani.la
56+
LIBBITCOIN_CRYPTO += $(LIBBITCOIN_CRYPTO_X86_SHANI)
57+
endif
5458
endif
5559
if ENABLE_AVX2
5660
LIBBITCOIN_CRYPTO_AVX2 = crypto/libbitcoin_crypto_avx2.la
5761
LIBBITCOIN_CRYPTO += $(LIBBITCOIN_CRYPTO_AVX2)
5862
endif
59-
if ENABLE_X86_SHANI
60-
LIBBITCOIN_CRYPTO_X86_SHANI = crypto/libbitcoin_crypto_x86_shani.la
61-
LIBBITCOIN_CRYPTO += $(LIBBITCOIN_CRYPTO_X86_SHANI)
62-
endif
6363
if ENABLE_ARM_SHANI
6464
LIBBITCOIN_CRYPTO_ARM_SHANI = crypto/libbitcoin_crypto_arm_shani.la
6565
LIBBITCOIN_CRYPTO += $(LIBBITCOIN_CRYPTO_ARM_SHANI)
@@ -622,7 +622,7 @@ crypto_libbitcoin_crypto_x86_shani_la_LDFLAGS = $(AM_LDFLAGS) -static
622622
crypto_libbitcoin_crypto_x86_shani_la_CXXFLAGS = $(AM_CXXFLAGS) $(PIE_FLAGS) -static
623623
crypto_libbitcoin_crypto_x86_shani_la_CPPFLAGS = $(AM_CPPFLAGS)
624624
crypto_libbitcoin_crypto_x86_shani_la_CXXFLAGS += $(X86_SHANI_CXXFLAGS)
625-
crypto_libbitcoin_crypto_x86_shani_la_CPPFLAGS += -DENABLE_X86_SHANI
625+
crypto_libbitcoin_crypto_x86_shani_la_CPPFLAGS += -DENABLE_SSE41 -DENABLE_X86_SHANI
626626
crypto_libbitcoin_crypto_x86_shani_la_SOURCES = crypto/sha256_x86_shani.cpp
627627

628628
# See explanation for -static in crypto_libbitcoin_crypto_base_la's LDFLAGS and

src/crypto/sha256.cpp

+1-1
Original file line numberDiff line numberDiff line change
@@ -621,7 +621,7 @@ std::string SHA256AutoDetect(sha256_implementation::UseImplementation use_implem
621621
}
622622
}
623623

624-
#if defined(ENABLE_X86_SHANI)
624+
#if defined(ENABLE_SSE41) && defined(ENABLE_X86_SHANI)
625625
if (have_x86_shani) {
626626
Transform = sha256_x86_shani::Transform;
627627
TransformD64 = TransformD64Wrapper<sha256_x86_shani::Transform>;

src/crypto/sha256_x86_shani.cpp

+1-1
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66
// Written and placed in public domain by Jeffrey Walton.
77
// Based on code from Intel, and by Sean Gulley for the miTLS project.
88

9-
#ifdef ENABLE_X86_SHANI
9+
#if defined(ENABLE_SSE41) && defined(ENABLE_X86_SHANI)
1010

1111
#include <stdint.h>
1212
#include <immintrin.h>

0 commit comments

Comments
 (0)