@@ -175,12 +175,62 @@ extern "C" {
175
175
#elif defined(_MSC_VER) // (#if-chain ref AVOID_CPU_ARCH_1)
176
176
177
177
#if defined(_M_X64)
178
- // We need <intrin.h> for the __cpuid function.
179
- #include <intrin.h>
178
+
179
+ // On X86_64, Microsoft Visual C/C++ (MSVC) only supports SSE2 by default.
180
+ // There are /arch:SSE2, /arch:AVX and /arch:AVX2 compiler flags (the AVX2 one
181
+ // is roughly equivalent to X86_64_V3), but there is no /arch:SSE42 compiler
182
+ // flag that's equivalent to X86_64_V2.
183
+ //
184
+ // For getting maximum performance with X86_64 MSVC and Wuffs, pass /arch:AVX2
185
+ // (and then test on the oldest hardware you intend to support).
186
+ //
187
+ // Absent that compiler flag, either define one of the three macros listed
188
+ // below or else the X86_64 SIMD code will be disabled and you'll get a #pragma
189
+ // message stating this library "performs best with /arch:AVX2". This message
190
+ // is harmless and ignorable, in that the non-SIMD code is still correct and
191
+ // reasonably performant, but is a reminder that when combining Wuffs and MSVC,
192
+ // some compiler configuration is required for maximum performance.
193
+ //
194
+ // - WUFFS_CONFIG__DISABLE_MSVC_CPU_ARCH__X86_64_FAMILY
195
+ // - WUFFS_CONFIG__ENABLE_MSVC_CPU_ARCH__X86_64_V2 (enables SSE4.2 and below)
196
+ // - WUFFS_CONFIG__ENABLE_MSVC_CPU_ARCH__X86_64_V3 (enables AVX2 and below)
197
+ //
198
+ // Defining the first one (WUFFS_CONFIG__DISABLE_MSVC_CPU_ARCH__X86_64_FAMILY)
199
+ // or defining none of those three (the default state) are equivalent (in that
200
+ // both disable the SIMD code paths), other than that pragma message.
201
+ //
202
+ // When defining these WUFFS_CONFIG__ENABLE_ETC macros with MSVC, be aware that
203
+ // some users report it leading to ICEs (Internal Compiler Errors), but other
204
+ // users report no problems at all (and improved performance). It's unclear
205
+ // exactly what combination of SIMD code and MSVC configuration lead to ICEs.
206
+ // Do your own testing with your own MSVC version and configuration.
207
+ //
208
+ // https://github.com/google/wuffs/issues/148
209
+ // https://github.com/google/wuffs/issues/151
210
+ // https://developercommunity.visualstudio.com/t/fatal--error-C1001:-Internal-compiler-er/10703305
211
+ //
212
+ // Clang (including clang-cl) and GCC don't need this WUFFS_CONFIG__ETC macro
213
+ // machinery, or having the Wuffs-the-library user to fiddle with compiler
214
+ // flags, because they support "__attribute__((target(arg)))".
215
+ #if defined(__AVX2__) || defined(__clang__) || \
216
+ defined(WUFFS_CONFIG__ENABLE_MSVC_CPU_ARCH__X86_64_V3)
217
+ #define WUFFS_PRIVATE_IMPL__CPU_ARCH__X86_64
218
+ #define WUFFS_PRIVATE_IMPL__CPU_ARCH__X86_64_V2
219
+ #define WUFFS_PRIVATE_IMPL__CPU_ARCH__X86_64_V3
220
+ #elif defined(WUFFS_CONFIG__ENABLE_MSVC_CPU_ARCH__X86_64_V2)
180
221
#define WUFFS_PRIVATE_IMPL__CPU_ARCH__X86_64
181
222
#define WUFFS_PRIVATE_IMPL__CPU_ARCH__X86_64_V2
182
- #if defined(__AVX2__) || defined(__clang__)
223
+ #elif !defined(WUFFS_CONFIG__DISABLE_MSVC_CPU_ARCH__X86_64_FAMILY)
224
+ #pragma message("Wuffs with MSVC+X64 performs best with /arch:AVX2")
225
+ #endif // defined(__AVX2__) || defined(__clang__) || etc
183
226
227
+ #if defined(WUFFS_PRIVATE_IMPL__CPU_ARCH__X86_64)
228
+
229
+ #if defined(WUFFS_CONFIG__DISABLE_MSVC_CPU_ARCH__X86_64_FAMILY)
230
+ #error "MSVC_CPU_ARCH simultaneously enabled and disabled"
231
+ #endif
232
+
233
+ #include <intrin.h>
184
234
// intrin.h isn't enough for X64 SIMD, with clang-cl, if we want to use
185
235
// "__attribute__((target(arg)))" without e.g. "/arch:AVX".
186
236
//
@@ -190,23 +240,9 @@ extern "C" {
190
240
#include <immintrin.h> // AVX, AVX2, FMA, POPCNT
191
241
#include <nmmintrin.h> // SSE4.2
192
242
#include <wmmintrin.h> // AES, PCLMUL
193
- #define WUFFS_PRIVATE_IMPL__CPU_ARCH__X86_64_V3
194
243
195
- #else // defined(__AVX2__) || defined(__clang__)
196
-
197
- // clang-cl (which defines both __clang__ and _MSC_VER) supports
198
- // "__attribute__((target(arg)))".
199
- //
200
- // For MSVC's cl.exe (unlike clang or gcc), SIMD capability is a compile-time
201
- // property of the source file (e.g. a /arch:AVX2 or -mavx2 compiler flag), not
202
- // of individual functions (that can be conditionally selected at runtime).
203
- #if !defined(WUFFS_CONFIG__I_KNOW_THAT_WUFFS_MSVC_PERFORMS_BEST_WITH_ARCH_AVX2)
204
- #pragma message("Wuffs with MSVC+IX86/X64 performs best with /arch:AVX2")
205
- #endif
206
-
207
- #endif // defined(__AVX2__) || defined(__clang__)
244
+ #endif // defined(WUFFS_PRIVATE_IMPL__CPU_ARCH__X86_64)
208
245
#endif // defined(_M_X64)
209
-
210
246
#endif // (#if-chain ref AVOID_CPU_ARCH_1)
211
247
#endif // (#if-chain ref AVOID_CPU_ARCH_0)
212
248
0 commit comments