diff --git a/.ci/copy_headers.sh b/.ci/copy_headers.sh index fd4ec82b96e9d..1313d6e658f02 100755 --- a/.ci/copy_headers.sh +++ b/.ci/copy_headers.sh @@ -6,7 +6,7 @@ set -ex mkdir ../build cd ../build cmake -DCMAKE_EXPORT_COMPILE_COMMANDS=ON -Dall=On -Dtesting=On -Dx11=Off -Dalien=Off \ - -Dcuda=Off -Dtmva-gpu=Off -Dveccore=Off ../root + -Dcuda=Off -Dtmva-gpu=Off ../root # We need to prebuild a minimal set of targets which are responsible for header copy # or generation. make -j4 move_headers intrinsics_gen clang-tablegen-targets ClangDriverOptions \ diff --git a/.github/workflows/root-ci-config/buildconfig/global.txt b/.github/workflows/root-ci-config/buildconfig/global.txt index 833e2e7a3c34c..7c11740c1b8e7 100644 --- a/.github/workflows/root-ci-config/buildconfig/global.txt +++ b/.github/workflows/root-ci-config/buildconfig/global.txt @@ -24,9 +24,7 @@ builtin_openui5=ON builtin_pcre=OFF builtin_tbb=OFF builtin_unuran=OFF -builtin_vc=OFF builtin_vdt=OFF -builtin_veccore=OFF builtin_xrootd=OFF builtin_xxhash=OFF builtin_zeromq=ON @@ -97,9 +95,7 @@ tmva=ON unfold=ON unuran=ON uring=OFF -vc=OFF vdt=ON -veccore=OFF vecgeom=OFF webgui=ON win_broken_tests=OFF diff --git a/.github/workflows/root-ci-config/buildconfig/mac-beta.txt b/.github/workflows/root-ci-config/buildconfig/mac-beta.txt index cf6e5fca5057b..40282e1d60a3c 100644 --- a/.github/workflows/root-ci-config/buildconfig/mac-beta.txt +++ b/.github/workflows/root-ci-config/buildconfig/mac-beta.txt @@ -17,7 +17,6 @@ builtin_pcre=ON builtin_tbb=ON builtin_unuran=ON builtin_vdt=ON -builtin_veccore=ON builtin_xrootd=ON builtin_xxhash=ON builtin_zeromq=ON diff --git a/.github/workflows/root-ci-config/buildconfig/mac14.txt b/.github/workflows/root-ci-config/buildconfig/mac14.txt index 93b1254abc168..09f04347d5e25 100644 --- a/.github/workflows/root-ci-config/buildconfig/mac14.txt +++ b/.github/workflows/root-ci-config/buildconfig/mac14.txt @@ -18,9 +18,7 @@ builtin_openssl=ON builtin_pcre=ON builtin_tbb=ON builtin_unuran=ON -builtin_vc=ON builtin_vdt=ON -builtin_veccore=ON builtin_xrootd=ON builtin_xxhash=ON builtin_zeromq=ON diff --git a/.github/workflows/root-ci-config/buildconfig/mac15.txt b/.github/workflows/root-ci-config/buildconfig/mac15.txt index 374943b3459d4..8f60c0781132e 100644 --- a/.github/workflows/root-ci-config/buildconfig/mac15.txt +++ b/.github/workflows/root-ci-config/buildconfig/mac15.txt @@ -16,7 +16,6 @@ builtin_pcre=ON builtin_tbb=ON builtin_unuran=ON builtin_vdt=ON -builtin_veccore=ON builtin_xrootd=ON builtin_xxhash=ON builtin_zeromq=ON diff --git a/.github/workflows/root-ci-config/buildconfig/mac26.txt b/.github/workflows/root-ci-config/buildconfig/mac26.txt index cf6e5fca5057b..40282e1d60a3c 100644 --- a/.github/workflows/root-ci-config/buildconfig/mac26.txt +++ b/.github/workflows/root-ci-config/buildconfig/mac26.txt @@ -17,7 +17,6 @@ builtin_pcre=ON builtin_tbb=ON builtin_unuran=ON builtin_vdt=ON -builtin_veccore=ON builtin_xrootd=ON builtin_xxhash=ON builtin_zeromq=ON diff --git a/README/ReleaseNotes/v640/index.md b/README/ReleaseNotes/v640/index.md index 7e8e363b8979e..54dfa0a072fdb 100644 --- a/README/ReleaseNotes/v640/index.md +++ b/README/ReleaseNotes/v640/index.md @@ -71,6 +71,20 @@ This change affects the following classes: `TFile`, `TMapFile`, `TMemFile`, `TD ## Math +### Migration from Vc to `std::simd` + +ROOT has migrated its internal SIMD usage from **Vc** to `std::simd` where applicable. + +This change affects: + * The vectorized backend of **TFormula** and **TMath** interfaces that are available via VecCore when building ROOT with `veccore=ON`. + * Users who instantiate the GenVector classes with Vc SIMD types like `Vc::double_v`. If you rely on these types, your code must be update to use `std::simd` instead. + +* On **Windows and Apple silicon platforms**, this change has no practical impact: Vc-based SIMD via VecCore was not supported by ROOT on these platforms, and ROOT will also not try to use `std::simd` now on these platforms. + +* TFormula and TMath attempt to use `std::simd` when compiled with **Clang** (any supported version) or **GCC ≥ 9**. On other compilers or configurations, SIMD support is disabled. This affects in particular the default compiler on RHEL/AlmaLinux 8 (GCC 8.5). + +* As a consequence of this migration, the build options **vc** and **builtin_vc** are now deprecated and ignored. Their usage will cause CMake configuration errors starting from ROOT 6.42. + ## RooFit ## RDataFrame diff --git a/cmake/modules/RootBuildOptions.cmake b/cmake/modules/RootBuildOptions.cmake index 9add71c9aa9fc..5d722e2e2f1e0 100644 --- a/cmake/modules/RootBuildOptions.cmake +++ b/cmake/modules/RootBuildOptions.cmake @@ -107,9 +107,7 @@ ROOT_BUILD_OPTION(builtin_pcre OFF "Build bundled copy of PCRE") ROOT_BUILD_OPTION(builtin_png OFF "Build bundled copy of libpng") ROOT_BUILD_OPTION(builtin_tbb OFF "Build TBB internally (requires network)") ROOT_BUILD_OPTION(builtin_unuran OFF "Build bundled copy of unuran") -ROOT_BUILD_OPTION(builtin_vc OFF "Build Vc internally (requires network)") ROOT_BUILD_OPTION(builtin_vdt OFF "Build VDT internally (requires network)") -ROOT_BUILD_OPTION(builtin_veccore OFF "Build VecCore internally (requires network)") ROOT_BUILD_OPTION(builtin_xrootd OFF "Build XRootD internally (requires network)") ROOT_BUILD_OPTION(builtin_xxhash OFF "Build bundled copy of xxHash") ROOT_BUILD_OPTION(builtin_zeromq OFF "Build ZeroMQ internally (requires network)") @@ -180,9 +178,7 @@ ROOT_BUILD_OPTION(unfold OFF "Enable the unfold package [GPL]") ROOT_BUILD_OPTION(unuran OFF "Enable support for UNURAN (package for generating non-uniform random numbers) [GPL]") ROOT_BUILD_OPTION(uring OFF "Enable support for io_uring (requires liburing and Linux kernel >= 5.1)") ROOT_BUILD_OPTION(use_gsl_cblas ON "Use the CBLAS library from GSL instead of finding a more optimized BLAS library automatically with FindBLAS (the GSL CBLAS is less performant but more portable)") -ROOT_BUILD_OPTION(vc OFF "Enable support for Vc (SIMD Vector Classes for C++)") ROOT_BUILD_OPTION(vdt ON "Enable support for VDT (fast and vectorisable mathematical functions)") -ROOT_BUILD_OPTION(veccore OFF "Enable support for VecCore SIMD abstraction library") ROOT_BUILD_OPTION(vecgeom OFF "Enable support for VecGeom vectorized geometry library") ROOT_BUILD_OPTION(webgui ON "Build Web-based UI components of ROOT") ROOT_BUILD_OPTION(win_broken_tests OFF "Enable broken tests on Windows") @@ -253,9 +249,7 @@ if(all) set(tmva-pymva_defvalue ON) set(tmva-rmva_defvalue ON) set(unuran_defvalue ON) - set(vc_defvalue ON) set(vdt_defvalue ON) - set(veccore_defvalue ON) set(vecgeom_defvalue ON) set(x11_defvalue ON) set(xml_defvalue ON) @@ -295,9 +289,7 @@ if(builtin_all) set(builtin_png_defvalue ON) set(builtin_tbb_defvalue ON) # set(builtin_unuran_defvalue ON) (GPL) - set(builtin_vc_defvalue ON) set(builtin_vdt_defvalue ON) - set(builtin_veccore_defvalue ON) set(builtin_xrootd_defvalue ON) set(builtin_xxhash_defvalue ON) set(builtin_zeromq_defvalue ON) @@ -415,6 +407,14 @@ if(DEFINED rpath) "") # empty line at the end to make the deprecation message more visible endif() +foreach(opt vc builtin_vc veccore builtin_veccore) + if(${opt}) + message(DEPRECATION ">>> Option '${opt}' is deprecated and ignored." + "ROOT now uses std::experimental::simd for the vectorized TFormula and TMath classes when available (on Linux when compiling with C++20 or higher)." + "Using this option will result in configuration errors in ROOT 6.42.") + endif() +endforeach() + foreach(opt minuit2_mpi) if(${opt}) message(WARNING "The option '${opt}' can only be used to minimise thread-safe functions in Minuit2. It cannot be used for Histogram/Graph fitting and for RooFit. If you want to use Minuit2 with MPI support, it is better to build Minuit2 as a standalone library.") diff --git a/cmake/modules/RootConfiguration.cmake b/cmake/modules/RootConfiguration.cmake index 4cb5183bb8e75..783875bfeb2fc 100644 --- a/cmake/modules/RootConfiguration.cmake +++ b/cmake/modules/RootConfiguration.cmake @@ -365,20 +365,16 @@ if(cocoa) else() set(hascocoa undef) endif() -if(vc) - set(hasvc define) -else() - set(hasvc undef) -endif() if(vdt) set(hasvdt define) else() set(hasvdt undef) endif() -if(veccore) - set(hasveccore define) +# Assume std::experimental::simd is available on Linux if we build with C++20 on Linux +if(WIN32 OR APPLE OR CMAKE_CXX_STANDARD LESS 20) + set(hasstdsimd undef) else() - set(hasveccore undef) + set(hasstdsimd define) endif() if(dataframe) set(hasdataframe define) diff --git a/cmake/modules/SearchInstalledSoftware.cmake b/cmake/modules/SearchInstalledSoftware.cmake index a6dedfbfaf3f8..c15c034adba6e 100644 --- a/cmake/modules/SearchInstalledSoftware.cmake +++ b/cmake/modules/SearchInstalledSoftware.cmake @@ -1365,194 +1365,6 @@ if(builtin_tbb) set(TBB_TARGET TBB) endif() -#---Check for Vc--------------------------------------------------------------------- -if(builtin_vc) - unset(Vc_FOUND) - unset(Vc_FOUND CACHE) - set(vc ON CACHE BOOL "Enabled because builtin_vc requested (${vc_description})" FORCE) -elseif(vc) - if(fail-on-missing) - find_package(Vc 1.4.4 CONFIG QUIET REQUIRED) - else() - find_package(Vc 1.4.4 CONFIG QUIET) - if(NOT Vc_FOUND) - message(STATUS "Vc library not found, support for it disabled.") - message(STATUS "Please enable the option 'builtin_vc' to build Vc internally.") - set(vc OFF CACHE BOOL "Disabled because Vc not found (${vc_description})" FORCE) - endif() - endif() - if(Vc_FOUND) - set_property(DIRECTORY APPEND PROPERTY INCLUDE_DIRECTORIES ${Vc_INCLUDE_DIR}) - BUILD_ROOT_INCLUDE_PATH("${Vc_INCLUDE_DIR}") - endif() -endif() - -if(vc AND NOT Vc_FOUND) - ROOT_CHECK_CONNECTION_AND_DISABLE_OPTION("vc") -endif() - -if(vc AND NOT Vc_FOUND) - set(Vc_VERSION "1.4.4") - set(Vc_PROJECT "Vc-${Vc_VERSION}") - set(Vc_SRC_URI "${lcgpackages}/${Vc_PROJECT}.tar.gz") - set(Vc_DESTDIR "${CMAKE_BINARY_DIR}/externals") - set(Vc_ROOTDIR "${Vc_DESTDIR}/${CMAKE_INSTALL_PREFIX}") - set(Vc_LIBNAME "${CMAKE_STATIC_LIBRARY_PREFIX}Vc${CMAKE_STATIC_LIBRARY_SUFFIX}") - set(Vc_LIBRARY "${Vc_ROOTDIR}/lib/${Vc_LIBNAME}") - - ExternalProject_Add(VC - URL ${Vc_SRC_URI} - URL_HASH SHA256=5933108196be44c41613884cd56305df320263981fe6a49e648aebb3354d57f3 - BUILD_IN_SOURCE 0 - BUILD_BYPRODUCTS ${Vc_LIBRARY} - LOG_DOWNLOAD 1 LOG_CONFIGURE 1 LOG_BUILD 1 LOG_INSTALL 1 LOG_OUTPUT_ON_FAILURE 1 - CMAKE_ARGS -G ${CMAKE_GENERATOR} - -DCMAKE_POLICY_VERSION_MINIMUM=3.5 - -DCMAKE_BUILD_TYPE=${CMAKE_BUILD_TYPE} - -DCMAKE_C_COMPILER=${CMAKE_C_COMPILER} - -DCMAKE_C_FLAGS=${CMAKE_C_FLAGS} - -DCMAKE_CXX_COMPILER=${CMAKE_CXX_COMPILER} - -DCMAKE_CXX_FLAGS=${ROOT_EXTERNAL_CXX_FLAGS} - -DCMAKE_INSTALL_PREFIX=${CMAKE_INSTALL_PREFIX} - INSTALL_COMMAND env DESTDIR=${Vc_DESTDIR} ${CMAKE_COMMAND} --build . --target install - TIMEOUT 600 - ) - - set(VC_TARGET Vc) - set(Vc_LIBRARIES Vc) - set(Vc_INCLUDE_DIR ${Vc_ROOTDIR}/include) - set(Vc_CMAKE_MODULES_DIR ${Vc_ROOTDIR}/lib/cmake/Vc) - - add_library(VcExt STATIC IMPORTED) - set_property(TARGET VcExt PROPERTY IMPORTED_LOCATION ${Vc_LIBRARY}) - add_dependencies(VcExt VC) - - add_library(Vc INTERFACE) - target_include_directories(Vc SYSTEM BEFORE INTERFACE $) - target_link_libraries(Vc INTERFACE VcExt) - - find_package_handle_standard_args(Vc - FOUND_VAR Vc_FOUND - REQUIRED_VARS Vc_INCLUDE_DIR Vc_LIBRARIES Vc_CMAKE_MODULES_DIR - VERSION_VAR Vc_VERSION) - - # FIXME: This is a workaround to let ROOT find the headers at runtime if - # they are in the build directory. This is necessary until we decide how to - # treat externals with headers used by ROOT - if(NOT EXISTS ${CMAKE_BINARY_DIR}/include/Vc) - if (NOT EXISTS ${CMAKE_BINARY_DIR}/include) - execute_process(COMMAND ${CMAKE_COMMAND} -E make_directory ${CMAKE_BINARY_DIR}/include) - endif() - execute_process(COMMAND ${CMAKE_COMMAND} -E create_symlink - ${Vc_INCLUDE_DIR}/Vc ${CMAKE_BINARY_DIR}/include/Vc) - endif() - # end of workaround - - install(DIRECTORY ${Vc_ROOTDIR}/ DESTINATION ".") -endif() - -if(Vc_FOUND) - # Missing from VcConfig.cmake - set(Vc_INCLUDE_DIRS ${Vc_INCLUDE_DIR}) -endif() - -#---Check for VecCore-------------------------------------------------------------------- -if(builtin_veccore) - unset(VecCore_FOUND) - unset(VecCore_FOUND CACHE) - set(veccore ON CACHE BOOL "Enabled because builtin_veccore requested (${veccore_description})" FORCE) -elseif(veccore) - if(vc) - set(VecCore_COMPONENTS Vc) - endif() - if(fail-on-missing) - find_package(VecCore 0.4.2 CONFIG QUIET REQUIRED COMPONENTS ${VecCore_COMPONENTS}) - else() - find_package(VecCore 0.4.2 CONFIG QUIET COMPONENTS ${VecCore_COMPONENTS}) - if(NOT VecCore_FOUND) - ROOT_CHECK_CONNECTION("veccore=OFF") - if(NO_CONNECTION) - message(STATUS "VecCore not found and no internet connection, disabling the 'veccore' option") - set(veccore OFF CACHE BOOL "Disabled because not found and No internet connection" FORCE) - else() - message(STATUS "VecCore not found, switching on 'builtin_veccore' option.") - set(builtin_veccore ON CACHE BOOL "Enabled because veccore requested and not found externally (${builtin_veccore_description})" FORCE) - endif() - endif() - endif() - if(VecCore_FOUND) - set_property(DIRECTORY APPEND PROPERTY INCLUDE_DIRECTORIES ${VecCore_INCLUDE_DIRS}) - endif() -endif() - -if(builtin_veccore) - ROOT_CHECK_CONNECTION_AND_DISABLE_OPTION("builtin_veccore") -endif() - -if(builtin_veccore) - set(VecCore_VERSION "0.8.2") - set(VecCore_PROJECT "VecCore-${VecCore_VERSION}") - set(VecCore_SRC_URI "${lcgpackages}/${VecCore_PROJECT}.tar.gz") - set(VecCore_DESTDIR "${CMAKE_BINARY_DIR}/externals") - set(VecCore_ROOTDIR "${VecCore_DESTDIR}/${CMAKE_INSTALL_PREFIX}") - - ExternalProject_Add(VECCORE - URL ${VecCore_SRC_URI} - URL_HASH SHA256=1268bca92acf00acd9775f1e79a2da7b1d902733d17e283e0dd5e02c41ac9666 - BUILD_IN_SOURCE 0 - LOG_DOWNLOAD 1 LOG_CONFIGURE 1 LOG_BUILD 1 LOG_INSTALL 1 LOG_OUTPUT_ON_FAILURE 1 - CMAKE_ARGS -G ${CMAKE_GENERATOR} - -DBUILD_TESTING=OFF - -DCMAKE_BUILD_TYPE=${CMAKE_BUILD_TYPE} - -DCMAKE_C_COMPILER=${CMAKE_C_COMPILER} - -DCMAKE_C_FLAGS=${CMAKE_C_FLAGS} - -DCMAKE_CXX_COMPILER=${CMAKE_CXX_COMPILER} - -DCMAKE_CXX_FLAGS=${ROOT_EXTERNAL_CXX_FLAGS} - -DCMAKE_INSTALL_PREFIX=${CMAKE_INSTALL_PREFIX} - INSTALL_COMMAND env DESTDIR=${VecCore_DESTDIR} ${CMAKE_COMMAND} --build . --target install - TIMEOUT 600 - ) - - set(VECCORE_TARGET VecCore) - set(VecCore_LIBRARIES VecCore) - list(APPEND VecCore_INCLUDE_DIRS ${VecCore_ROOTDIR}/include) - - add_library(VecCore INTERFACE) - target_include_directories(VecCore SYSTEM INTERFACE $) - add_dependencies(VecCore VECCORE) - - if (Vc_FOUND) - set(VecCore_Vc_FOUND True) - set(VecCore_Vc_DEFINITIONS -DVECCORE_ENABLE_VC) - set(VecCore_Vc_INCLUDE_DIR ${Vc_INCLUDE_DIR}) - set(VecCore_Vc_LIBRARIES ${Vc_LIBRARIES}) - - set(VecCore_DEFINITIONS ${VecCore_Vc_DEFINITIONS}) - list(APPEND VecCore_INCLUDE_DIRS ${VecCore_Vc_INCLUDE_DIR}) - set(VecCore_LIBRARIES ${VecCore_LIBRARIES} ${Vc_LIBRARIES}) - target_link_libraries(VecCore INTERFACE ${Vc_LIBRARIES}) - endif() - - find_package_handle_standard_args(VecCore - FOUND_VAR VecCore_FOUND - REQUIRED_VARS VecCore_INCLUDE_DIRS VecCore_LIBRARIES - VERSION_VAR VecCore_VERSION) - - # FIXME: This is a workaround to let ROOT find the headers at runtime if - # they are in the build directory. This is necessary until we decide how to - # treat externals with headers used by ROOT - if(NOT EXISTS ${CMAKE_BINARY_DIR}/include/VecCore) - if (NOT EXISTS ${CMAKE_BINARY_DIR}/include) - execute_process(COMMAND ${CMAKE_COMMAND} -E make_directory ${CMAKE_BINARY_DIR}/include) - endif() - execute_process(COMMAND ${CMAKE_COMMAND} -E create_symlink - ${VecCore_ROOTDIR}/include/VecCore ${CMAKE_BINARY_DIR}/include/VecCore) - endif() - # end of workaround - - install(DIRECTORY ${VecCore_ROOTDIR}/ DESTINATION ".") -endif() - if(builtin_vdt) ROOT_CHECK_CONNECTION_AND_DISABLE_OPTION("builtin_vdt") endif() @@ -1626,12 +1438,6 @@ endif() if (vecgeom) message(STATUS "Looking for VecGeom") find_package(VecGeom 1.2 CONFIG) - if(builtin_veccore) - message(WARNING "ROOT must be built against the VecCore installation that was used to build VecGeom; builtin_veccore cannot be used. Option VecGeom will be disabled.") - set(vecgeom OFF CACHE BOOL "Disabled because non-builtin VecGeom specified but its VecCore cannot be found" FORCE) - elseif(builtin_veccore AND fail-on-missing) - message(SEND_ERROR "ROOT must be built against the VecCore installation that was used to build VecGeom; builtin_veccore cannot be used. Ensure that builtin_veccore option is OFF.") - endif() if(NOT VecGeom_FOUND ) if(fail-on-missing) message(SEND_ERROR "VecGeom not found. Ensure that the installation of VecGeom is in the CMAKE_PREFIX_PATH") diff --git a/config/RConfigure.in b/config/RConfigure.in index a4b0165d10485..583ff78a081e9 100644 --- a/config/RConfigure.in +++ b/config/RConfigure.in @@ -37,9 +37,8 @@ #@hasxft@ R__HAS_XFT /**/ #@hasclad@ R__HAS_CLAD /**/ #@hascocoa@ R__HAS_COCOA /**/ -#@hasvc@ R__HAS_VC /**/ #@hasvdt@ R__HAS_VDT /**/ -#@hasveccore@ R__HAS_VECCORE /**/ +#@hasstdsimd@ R__HAS_STD_SIMD /**/ #@usecxxmodules@ R__USE_CXXMODULES /**/ #@uselibc++@ R__USE_LIBCXX /**/ #@has_found_attribute_always_inline@ R__HAS_ATTRIBUTE_ALWAYS_INLINE /**/ @@ -54,12 +53,6 @@ #@use_less_includes@ R__LESS_INCLUDES /**/ #define R__HARDWARE_INTERFERENCE_SIZE @hardwareinterferencesize@ /*Determined at CMake configure to be stable across all TUs*/ -#if defined(R__HAS_VECCORE) && defined(R__HAS_VC) -#ifndef VECCORE_ENABLE_VC -#define VECCORE_ENABLE_VC -#endif -#endif - #@uselz4@ R__HAS_DEFAULT_LZ4 /**/ #@usezlib@ R__HAS_DEFAULT_ZLIB /**/ #@uselzma@ R__HAS_DEFAULT_LZMA /**/ diff --git a/core/clingutils/CMakeLists.txt b/core/clingutils/CMakeLists.txt index 6d2d893d5a8c1..df49af2198f91 100644 --- a/core/clingutils/CMakeLists.txt +++ b/core/clingutils/CMakeLists.txt @@ -116,9 +116,6 @@ set(clinginclude ${CMAKE_SOURCE_DIR}/interpreter/cling/include) set(custom_modulemaps) if (runtime_cxxmodules) set(custom_modulemaps boost.modulemap tinyxml2.modulemap cuda.modulemap module.modulemap.build) - if(vc) - set(custom_modulemaps ${custom_modulemaps} vc.modulemap) - endif() # We need to override the default modulemap because instead of producing a # single std.pcm, produces hundreds of pcms. This changed with MacOSX14.4.sdk diff --git a/geom/vecgeom/CMakeLists.txt b/geom/vecgeom/CMakeLists.txt index 2e94e13078bf0..9fd8078d7e008 100644 --- a/geom/vecgeom/CMakeLists.txt +++ b/geom/vecgeom/CMakeLists.txt @@ -10,9 +10,6 @@ ############################################################################ include_directories(AFTER SYSTEM ${VECGEOM_INCLUDE_DIRS}) -if ( Vc_FOUND ) - include_directories( AFTER SYSTEM ${Vc_INCLUDE_DIRS}) -endif() ROOT_STANDARD_LIBRARY_PACKAGE(ConverterVG HEADERS diff --git a/hist/hist/inc/Math/WrappedMultiTF1.h b/hist/hist/inc/Math/WrappedMultiTF1.h index b7160e5806f29..9e97764b8e178 100644 --- a/hist/hist/inc/Math/WrappedMultiTF1.h +++ b/hist/hist/inc/Math/WrappedMultiTF1.h @@ -373,11 +373,7 @@ namespace ROOT { // case of polynomial function (no parameter dependency) (case for dim = 1) assert(fDim == 1); if (ipar == 0) return 1.0; -#ifdef R__HAS_VECCORE - return vecCore::math::Pow(x[0], static_cast(ipar)); -#else - return std::pow(x[0], static_cast(ipar)); -#endif + return pow(x[0], static_cast(ipar)); } else { // case of general linear function (built in TFormula with ++ ) return GeneralLinearFunctionDerivation::DoParameterDerivative(this, x, ipar); diff --git a/hist/hist/inc/TF1.h b/hist/hist/inc/TF1.h index e1e6a774ee9d2..0248dcedb8c26 100644 --- a/hist/hist/inc/TF1.h +++ b/hist/hist/inc/TF1.h @@ -704,7 +704,7 @@ class TF1 : public TNamed, public TAttLine, public TAttFill, public TAttMarker { template T EvalParTempl(const T *data, const Double_t *params = nullptr); -#ifdef R__HAS_VECCORE +#ifdef R__HAS_STD_SIMD inline double EvalParVec(const Double_t *data, const Double_t *params); #endif @@ -718,11 +718,13 @@ namespace ROOT { void TF1Builder::Build(TF1 *f, Func func) { // check if vector interface is supported by Func +#ifdef R__HAS_STD_SIMD if constexpr(std::is_invocable_r_v) { - // if ROOT was not built with veccore and vc, Double_v is just an alias for the scalar double - f->fType = std::is_same::value ? TF1::EFType::kTemplScalar : TF1::EFType::kTemplVec; + f->fType = TF1::EFType::kTemplVec; f->fFunctor.reset(new TF1::TF1FunctorPointerImpl(ROOT::Math::ParamFunctorTempl(func))); - } else { + } else +#endif + { f->fType = TF1::EFType::kTemplScalar; f->fFunctor.reset(new TF1::TF1FunctorPointerImpl(ROOT::Math::ParamFunctorTempl(func))); } @@ -734,11 +736,13 @@ namespace ROOT { void TF1Builder::Build(TF1 *f, Func *func) { // check if vector interface is supported by Func +#ifdef R__HAS_STD_SIMD if constexpr(std::is_invocable_r_v) { - // if ROOT was not built with veccore and vc, Double_v is just an alias for the scalar double - f->fType = std::is_same::value ? TF1::EFType::kTemplScalar : TF1::EFType::kTemplVec; + f->fType = TF1::EFType::kTemplVec; f->fFunctor.reset(new TF1::TF1FunctorPointerImpl(ROOT::Math::ParamFunctorTempl(func))); - } else { + } else +#endif + { f->fType = TF1::EFType::kTemplScalar; f->fFunctor.reset(new TF1::TF1FunctorPointerImpl(ROOT::Math::ParamFunctorTempl(func))); } @@ -823,7 +827,7 @@ inline T TF1::EvalParTempl(const T *data, const Double_t *params) return TMath::SignalingNaN(); } -#ifdef R__HAS_VECCORE +#ifdef R__HAS_STD_SIMD // Internal to TF1. Evaluates Vectorized TF1 on data of type Double_v inline double TF1::EvalParVec(const Double_t *data, const Double_t *params) { @@ -841,7 +845,7 @@ inline double TF1::EvalParVec(const Double_t *data, const Double_t *params) // res = GetSave(x); return TMath::SignalingNaN(); } - return vecCore::Get(res, 0); + return res[0]; } #endif diff --git a/hist/hist/inc/TF12.h b/hist/hist/inc/TF12.h index 39575355cba37..d1d3cec8738f0 100644 --- a/hist/hist/inc/TF12.h +++ b/hist/hist/inc/TF12.h @@ -39,7 +39,7 @@ class TF12 : public TF1 { Double_t Eval(Double_t x, Double_t y=0, Double_t z=0, Double_t t=0) const override; Double_t EvalPar(const Double_t *x, const Double_t *params=nullptr) override; -#ifdef R__HAS_VECCORE +#ifdef R__HAS_STD_SIMD using TF1::Eval; // to not hide the vectorized version using TF1::EvalPar; // to not hide the vectorized version #endif diff --git a/hist/hist/inc/TFormula.h b/hist/hist/inc/TFormula.h index 1ef8aac324ecf..10de476226f69 100644 --- a/hist/hist/inc/TFormula.h +++ b/hist/hist/inc/TFormula.h @@ -123,7 +123,6 @@ class TFormula : public TNamed static Bool_t IsDefaultVariableName(const TString &name); void ReplaceAllNames(TString &formula, std::map &substitutions); void FillParametrizedFunctions(std::map, std::pair> &functions); - void FillVecFunctionsShurtCuts(); void ReInitializeEvalMethod(); std::string GetGradientFuncName() const { return std::string(GetUniqueFuncName().Data()) + "_grad_1"; @@ -168,7 +167,7 @@ class TFormula : public TNamed void SetPredefinedParamNames(); Double_t DoEval(const Double_t * x, const Double_t * p = nullptr) const; -#ifdef R__HAS_VECCORE +#ifdef R__HAS_STD_SIMD ROOT::Double_v DoEvalVec(const ROOT::Double_v *x, const Double_t *p = nullptr) const; #endif @@ -245,7 +244,7 @@ class TFormula : public TNamed T EvalPar(const T *x, const Double_t *params = nullptr) const { return EvalParVec(x, params); } -#ifdef R__HAS_VECCORE +#ifdef R__HAS_STD_SIMD ROOT::Double_v EvalParVec(const ROOT::Double_v *x, const Double_t *params = nullptr) const; #endif TString GetExpFormula(Option_t *option = "") const; diff --git a/hist/hist/src/HFitImpl.cxx b/hist/hist/src/HFitImpl.cxx index f1a46467b8aed..7134fae5fee51 100644 --- a/hist/hist/src/HFitImpl.cxx +++ b/hist/hist/src/HFitImpl.cxx @@ -239,7 +239,7 @@ TFitResultPtr HFit::Fit(FitObject * h1, TF1 *f1 , Foption_t & fitOption , const // if option grad is specified use gradient if ( (linear || fitOption.Gradient) ) fitter->SetFunction(ROOT::Math::WrappedMultiTF1(*f1)); -#ifdef R__HAS_VECCORE +#ifdef R__HAS_STD_SIMD else if(f1->IsVectorized()) fitter->SetFunction(static_cast &>(ROOT::Math::WrappedMultiTF1Templ(*f1))); #endif diff --git a/hist/hist/src/TF1.cxx b/hist/hist/src/TF1.cxx index 90e3347efba0c..e0aabc8e50801 100644 --- a/hist/hist/src/TF1.cxx +++ b/hist/hist/src/TF1.cxx @@ -1508,7 +1508,7 @@ Double_t TF1::EvalPar(const Double_t *x, const Double_t *params) return result; } -#ifdef R__HAS_VECCORE +#ifdef R__HAS_STD_SIMD if (fType == EFType::kTemplVec) { if (fFunctor) { if (params) result = EvalParVec(x, params); diff --git a/hist/hist/src/TFormula.cxx b/hist/hist/src/TFormula.cxx index 11cfbb2746e2a..d9ae3c6aa9bdb 100644 --- a/hist/hist/src/TFormula.cxx +++ b/hist/hist/src/TFormula.cxx @@ -56,9 +56,9 @@ std::string doubleToString(double val) // the native SIMD width used in compiled ROOT. std::string vectorizedArgType() { -#ifdef VECCORE_ENABLE_VC +#ifdef R__HAS_STD_SIMD auto n = ROOT::Double_v::size(); - return "Vc::fixed_size_simd"; + return "std::experimental::resize_simd_t<" + std::to_string(n) + ", ROOT::Double_v>"; #else // For other possible VecCore backends, we assume using the same type is fine. return "ROOT::Double_v"; @@ -517,7 +517,7 @@ TFormula::TFormula(const char *name, const char *formula, bool addToGlobList, bo fNumber = 0; fLambdaPtr = nullptr; fVectorized = vectorize; -#ifndef R__HAS_VECCORE +#ifndef R__HAS_STD_SIMD fVectorized = false; #endif @@ -971,42 +971,9 @@ void TFormula::FillDefaults() for (auto con : defconsts) { fConsts[con.first] = con.second; } - if (fVectorized) { - FillVecFunctionsShurtCuts(); - } else { - for (auto fun : funShortcuts) { - fFunctionsShortcuts[fun.first] = fun.second; - } - } -} - -//////////////////////////////////////////////////////////////////////////////// -/// Fill the shortcuts for vectorized functions -/// We will replace for example sin with vecCore::Mat::Sin -/// - -void TFormula::FillVecFunctionsShurtCuts() { -#ifdef R__HAS_VECCORE - const pair vecFunShortcuts[] = - { {"sin","vecCore::math::Sin" }, - {"cos","vecCore::math::Cos" }, {"exp","vecCore::math::Exp"}, {"log","vecCore::math::Log"}, {"log10","vecCore::math::Log10"}, - {"tan","vecCore::math::Tan"}, - //{"sinh","vecCore::math::Sinh"}, {"cosh","vecCore::math::Cosh"},{"tanh","vecCore::math::Tanh"}, - {"asin","vecCore::math::ASin"}, - {"acos","TMath::Pi()/2-vecCore::math::ASin"}, - {"atan","vecCore::math::ATan"}, - {"atan2","vecCore::math::ATan2"}, {"sqrt","vecCore::math::Sqrt"}, - {"ceil","vecCore::math::Ceil"}, {"floor","vecCore::math::Floor"}, {"pow","vecCore::math::Pow"}, - {"cbrt","vecCore::math::Cbrt"},{"abs","vecCore::math::Abs"}, - {"min","vecCore::math::Min"},{"max","vecCore::math::Max"},{"sign","vecCore::math::Sign" } - //{"sq","TMath::Sq"}, {"binomial","TMath::Binomial"} // this last two functions will not work in vectorized mode - }; - // replace in the data member maps fFunctionsShortcuts - for (auto fun : vecFunShortcuts) { + for (auto fun : funShortcuts) { fFunctionsShortcuts[fun.first] = fun.second; } -#endif - // do nothing in case Veccore is not enabled } //////////////////////////////////////////////////////////////////////////////// @@ -3145,7 +3112,7 @@ void TFormula::ReplaceParamName(TString & formula, const TString & oldName, cons //////////////////////////////////////////////////////////////////////////////// void TFormula::SetVectorized(Bool_t vectorized) { -#ifdef R__HAS_VECCORE +#ifdef R__HAS_STD_SIMD if (fNdim == 0) { Info("SetVectorized","Cannot vectorized a function of zero dimension"); return; @@ -3164,13 +3131,12 @@ void TFormula::SetVectorized(Bool_t vectorized) fMethod.reset(); - FillVecFunctionsShurtCuts(); // to replace with the right vectorized signature (e.g. sin -> vecCore::math::Sin) PreProcessFormula(fFormula); PrepareFormula(fFormula); } #else if (vectorized) - Warning("SetVectorized", "Cannot set vectorized -- try building with option -Dbuiltin_veccore=On"); + Warning("SetVectorized", "Cannot set vectorized -- try building with C++20 on Linux"); #endif } @@ -3180,11 +3146,11 @@ Double_t TFormula::EvalPar(const Double_t *x,const Double_t *params) const if (!fVectorized) return DoEval(x, params); -#ifdef R__HAS_VECCORE +#ifdef R__HAS_STD_SIMD if (fNdim == 0 || !x) { ROOT::Double_v ret = DoEvalVec(nullptr, params); - return vecCore::Get( ret, 0 ); + return ret[0]; } // otherwise, regular Double_t inputs on a vectorized function @@ -3200,7 +3166,7 @@ Double_t TFormula::EvalPar(const Double_t *x,const Double_t *params) const xvec[i] = x[i]; ROOT::Double_v ans = DoEvalVec(xvec.data(), params); - return vecCore::Get(ans, 0); + return ans[0]; } // allocating a vector is much slower (we do only for dim > 4) std::vector xvec(fNdim); @@ -3208,12 +3174,12 @@ Double_t TFormula::EvalPar(const Double_t *x,const Double_t *params) const xvec[i] = x[i]; ROOT::Double_v ans = DoEvalVec(xvec.data(), params); - return vecCore::Get(ans, 0); + return ans[0]; #else // this should never happen, because fVectorized can only be set true with - // R__HAS_VECCORE, but just in case: - Error("EvalPar", "Formula is vectorized (even though VECCORE is disabled!)"); + // R__HAS_STD_SIMD, but just in case: + Error("EvalPar", "Formula is vectorized (even though vectorizaton is not supported!)"); return TMath::QuietNaN(); #endif } @@ -3423,7 +3389,7 @@ void TFormula::HessianPar(const Double_t *x, Double_t *result) { } //////////////////////////////////////////////////////////////////////////////// -#ifdef R__HAS_VECCORE +#ifdef R__HAS_STD_SIMD // ROOT::Double_v TFormula::Eval(ROOT::Double_v x, ROOT::Double_v y, ROOT::Double_v z, ROOT::Double_v t) const // { // ROOT::Double_v xxx[] = {x, y, z, t}; @@ -3443,16 +3409,16 @@ ROOT::Double_v TFormula::EvalParVec(const ROOT::Double_v *x, const Double_t *par if (gDebug) Info("EvalPar", "Function is not vectorized - converting ROOT::Double_v into Double_t and back"); - const int vecSize = vecCore::VectorSize(); + const int vecSize = ROOT::Double_v::size(); std::vector xscalars(vecSize*fNdim); for (int i = 0; i < vecSize; i++) for (int j = 0; j < fNdim; j++) - xscalars[i*fNdim+j] = vecCore::Get(x[j],i); + xscalars[i * fNdim + j] = x[j][i]; ROOT::Double_v answers(0.); for (int i = 0; i < vecSize; i++) - vecCore::Set(answers, i, DoEval(&xscalars[i*fNdim], params)); + answers[i] = DoEval(&xscalars[i * fNdim], params); return answers; } @@ -3519,7 +3485,7 @@ Double_t TFormula::DoEval(const double * x, const double * params) const //////////////////////////////////////////////////////////////////////////////// // Copied from DoEval, but this is the vectorized version -#ifdef R__HAS_VECCORE +#ifdef R__HAS_STD_SIMD ROOT::Double_v TFormula::DoEvalVec(const ROOT::Double_v *x, const double *params) const { if (!fReadyToExecute) { @@ -3563,8 +3529,7 @@ ROOT::Double_v TFormula::DoEvalVec(const ROOT::Double_v *x, const double *params } return result; } -#endif // R__HAS_VECCORE - +#endif // R__HAS_STD_SIMD ////////////////////////////////////////////////////////////////////////////// /// Re-initialize eval method diff --git a/interpreter/cling/lib/Interpreter/CMakeLists.txt b/interpreter/cling/lib/Interpreter/CMakeLists.txt index a45992c49661f..cf23baa1e1adc 100644 --- a/interpreter/cling/lib/Interpreter/CMakeLists.txt +++ b/interpreter/cling/lib/Interpreter/CMakeLists.txt @@ -58,10 +58,6 @@ set(LLVM_LINK_COMPONENTS ${LLVM_TARGETS_TO_BUILD} ) -if (vc) - set(cling_vc_support ON) -endif() - # clingInterpreter depends on Options.inc to be tablegen-ed # (target ClangDriverOptions) from in-tree builds. set(CLING_DEPENDS ClingDriverOptions) @@ -375,10 +371,6 @@ add_custom_command(OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/cling-compiledata.h set_property(SOURCE ${CMAKE_CURRENT_SOURCE_DIR}/CIFactory.cpp APPEND PROPERTY OBJECT_DEPENDS ${CMAKE_CURRENT_BINARY_DIR}/cling-compiledata.h) -if(cling_vc_support) - set_property(SOURCE ${CMAKE_CURRENT_SOURCE_DIR}/CIFactory.cpp - APPEND PROPERTY COMPILE_DEFINITIONS "CLING_SUPPORT_VC") -endif() # If LLVM is external, but Clang is builtin, we must use some files # from patched (builtin) version of LLVM diff --git a/math/genvector/inc/Math/GenVector/Plane3D.h b/math/genvector/inc/Math/GenVector/Plane3D.h index cb46c32ac8a9e..a0592994716c3 100644 --- a/math/genvector/inc/Math/GenVector/Plane3D.h +++ b/math/genvector/inc/Math/GenVector/Plane3D.h @@ -30,6 +30,23 @@ namespace Math { namespace Impl { +template +void PrintValue(std::ostream &os, const T &v) +{ + if constexpr (std::is_arithmetic_v) { + os << v; + } else { + // assume std::simd for non-arithmetic types + os << "["; + for (std::size_t i = 0; i < T::size(); ++i) { + if (i) + os << ", "; + os << v[i]; + } + os << "]"; + } +} + //_______________________________________________________________________________ /** Class describing a geometrical plane in 3 dimensions. @@ -295,8 +312,17 @@ class Plane3D { template std::ostream &operator<<(std::ostream &os, const Plane3D &p) { - os << "\n" - << p.Normal().X() << " " << p.Normal().Y() << " " << p.Normal().Z() << " " << p.HesseDistance() << "\n"; + os << "\n"; + + PrintValue(os, p.Normal().X()); + os << " "; + PrintValue(os, p.Normal().Y()); + os << " "; + PrintValue(os, p.Normal().Z()); + os << " "; + PrintValue(os, p.HesseDistance()); + + os << "\n"; return os; } diff --git a/math/mathcore/CMakeLists.txt b/math/mathcore/CMakeLists.txt index e907b0d6fcd5a..b21afc12da87d 100644 --- a/math/mathcore/CMakeLists.txt +++ b/math/mathcore/CMakeLists.txt @@ -105,14 +105,6 @@ if(runtime_cxxmodules) # 'abs' shadow declaration (from stl math.h) is broken. # FIXME: Revise after a llvm upgrade or reproduce it outside rootcling. list(REMOVE_ITEM HEADERS "Math/Math.h") - - if(vc) - # We do not link against libVc.a thus it makes no sense to check for - # version compatibility between libraries and header files. This fixes - # ROOT-11002 where upon building the modules.idx we run the static ctor - # runLibraryAbiCheck which fails to find the corresponding symbol. - set(dictoptions "-m" "Vc" "-mByproduct" "Vc" "-D" "Vc_NO_VERSION_CHECK") - endif(vc) endif() ROOT_ADD_C_FLAG(_flags -Wno-strict-overflow) # Avoid what it seems a compiler false positive warning @@ -123,11 +115,6 @@ if(imt) set(MATHCORE_DEPENDENCIES Imt) endif() -if(veccore) - set(MATHCORE_BUILTINS VECCORE) - set(MATHCORE_LIBRARIES ${VecCore_LIBRARIES}) -endif() - ROOT_STANDARD_LIBRARY_PACKAGE(MathCore HEADERS ${HEADERS} @@ -190,30 +177,14 @@ ROOT_STANDARD_LIBRARY_PACKAGE(MathCore src/TStatistic.cxx src/UnBinData.cxx src/Util.cxx - src/VectorizedTMath.cxx - LIBRARIES - ${MATHCORE_LIBRARIES} DICTIONARY_OPTIONS -writeEmptyRootPCM ${dictoptions} DEPENDENCIES Core ${MATHCORE_DEPENDENCIES} - BUILTINS - ${MATHCORE_BUILTINS} ) -target_include_directories(MathCore PRIVATE ${Vc_INCLUDE_DIR}) -target_include_directories(MathCore PRIVATE ${VecCore_INCLUDE_DIRS}) - -list(APPEND math_incl ${Vc_INCLUDE_DIR}) -list(APPEND math_incl ${VecCore_INCLUDE_DIRS}) - -foreach(incl ${math_incl}) - target_include_directories(MathCore PUBLIC $) -endforeach() - -target_compile_definitions(MathCore INTERFACE ${VecCore_DEFINITIONS}) target_link_libraries(MathCore PRIVATE ${CMAKE_THREAD_LIBS_INIT}) ROOT_ADD_TEST_SUBDIRECTORY(test) diff --git a/math/mathcore/inc/Fit/FitData.h b/math/mathcore/inc/Fit/FitData.h index 9a403fcda26f3..70339d2ce5802 100644 --- a/math/mathcore/inc/Fit/FitData.h +++ b/math/mathcore/inc/Fit/FitData.h @@ -350,7 +350,7 @@ namespace ROOT { fWrapped = false; } -#ifdef R__HAS_VECCORE +#ifdef R__HAS_STD_SIMD /** * Compute the number that should be added to dataSize in order to have a * multiple of SIMD vector size. @@ -358,14 +358,14 @@ namespace ROOT { static unsigned VectorPadding(unsigned dataSize) { unsigned padding = 0; - unsigned modP = (dataSize) % vecCore::VectorSize(); + unsigned modP = (dataSize) % ROOT::Double_v::size(); if (modP > 0) - padding = vecCore::VectorSize() - modP; + padding = ROOT::Double_v::size() - modP; return padding; } #else /** - * If VecCore is not defined, there is no vectorization available and the SIMD vector + * If std::experimental::simd is not available, there is no vectorization available and the SIMD vector * size will always be one. Then, as every number is a multiple of SIMD vector size, the * padding will always be zero. */ diff --git a/math/mathcore/inc/Fit/FitUtil.h b/math/mathcore/inc/Fit/FitUtil.h index 0d32d7e18d794..0955091f29f54 100644 --- a/math/mathcore/inc/Fit/FitUtil.h +++ b/math/mathcore/inc/Fit/FitUtil.h @@ -36,22 +36,9 @@ //#define DEBUG_FITUTIL -#ifdef R__HAS_VECCORE -namespace vecCore { -template -vecCore::Mask Int2Mask(unsigned i) -{ - T x; - for (unsigned j = 0; j < vecCore::VectorSize(); j++) - vecCore::Set(x, j, j); - return vecCore::Mask(x < T(i)); -} -} -#endif - namespace ROOT { - namespace Fit { +namespace Fit { /** namespace defining utility free functions using in Fit for evaluating the various fit method @@ -61,6 +48,64 @@ namespace ROOT { */ namespace FitUtil { +namespace Detail { + +template +T NumericMax() +{ + return T{std::numeric_limits::max()}; +} + +template <> +inline double NumericMax() +{ + return std::numeric_limits::max(); +} + +template <> +inline float NumericMax() +{ + return std::numeric_limits::max(); +} + +template +auto Int2Mask(unsigned i) +{ + T x; + for (unsigned j = 0; j < T::size(); j++) { + x[j] = j; + } + return x < T(i); +} + +template +void Load(V &v, S const *ptr) +{ + for (size_t i = 0; i < V::size(); ++i) + v[i] = ptr[i]; +} + +template +auto ReduceAdd(const T &v) +{ + typename T::value_type result(0); + for (size_t i = 0; i < T::size(); ++i) { + result += v[i]; + } + return result; +} + +template +bool MaskEmpty(T mask) +{ + for (size_t i = 0; i < T::size(); ++i) + if (mask[i]) + return false; + return true; +} + +} // namespace Detail + typedef ROOT::Math::IParamMultiFunction IModelFunction; typedef ROOT::Math::IParamMultiGradFunction IGradModelFunction; @@ -226,7 +271,7 @@ namespace FitUtil { return (*f)(x, p); } -#ifdef R__HAS_VECCORE +#ifdef R__HAS_STD_SIMD #if __clang_major__ > 16 #pragma clang diagnostic push @@ -235,23 +280,15 @@ namespace FitUtil { inline double ExecFunc(const IModelFunctionTempl *f, const double *x, const double *p) const { - // Figure out the size of the SIMD vectors. - constexpr static int vecSize = sizeof(ROOT::Double_v) / sizeof(double); - double xBuffer[vecSize]; ROOT::Double_v xx[fDim]; for (unsigned int i = 0; i < fDim; ++i) { - // The Load() function reads multiple values from the pointed-to - // memory into xx. This is why we have to copy the input values from - // the x array into a zero-padded buffer to read from. Otherwise, - // Load() would access the x array out of bounds. - *xBuffer = x[i]; - for(int j = 1; j < vecSize; ++j) { - xBuffer[j] = 0.0; + xx[i][0] = x[i]; + for (std::size_t j = 1; j < ROOT::Double_v::size(); ++j) { + xx[i][j] = 0.0; } - vecCore::Load(xx[i], xBuffer); } auto res = (*f)(xx, p); - return vecCore::Get(res, 0); + return res[0]; } #if __clang_major__ > 16 @@ -316,7 +353,7 @@ namespace FitUtil { ::ROOT::EExecutionPolicy executionPolicy = ::ROOT::EExecutionPolicy::kSequential, unsigned nChunks = 0); - // #ifdef R__HAS_VECCORE + // #ifdef R__HAS_STD_SIMD // template ::value)>> // void EvaluateLogLGradient(const IModelFunctionTempl &, const UnBinData &, const double *, double // *, unsigned int & ) {} @@ -373,7 +410,7 @@ namespace FitUtil { template struct Evaluate { -#ifdef R__HAS_VECCORE +#ifdef R__HAS_STD_SIMD static double EvalChi2(const IModelFunctionTempl &func, const BinData &data, const double *p, unsigned int &nPoints, ::ROOT::EExecutionPolicy executionPolicy, unsigned nChunks = 0) { @@ -404,16 +441,16 @@ namespace FitUtil { double maxResValue = std::numeric_limits::max() / n; std::vector ones{1., 1., 1., 1.}; - auto vecSize = vecCore::VectorSize(); + auto vecSize = T::size(); auto mapFunction = [&](unsigned int i) { // in case of no error in y invError=1 is returned T x1, y, invErrorVec; - vecCore::Load(x1, data.GetCoordComponent(i * vecSize, 0)); - vecCore::Load(y, data.ValuePtr(i * vecSize)); + Detail::Load(x1, data.GetCoordComponent(i * vecSize, 0)); + Detail::Load(y, data.ValuePtr(i * vecSize)); const auto invError = data.ErrorPtr(i * vecSize); auto invErrorptr = (invError != nullptr) ? invError : &ones.front(); - vecCore::Load(invErrorVec, invErrorptr); + Detail::Load(invErrorVec, invErrorptr); const T *x; std::vector xc; @@ -421,7 +458,7 @@ namespace FitUtil { xc.resize(data.NDim()); xc[0] = x1; for (unsigned int j = 1; j < data.NDim(); ++j) - vecCore::Load(xc[j], data.GetCoordComponent(i * vecSize, j)); + Detail::Load(xc[j], data.GetCoordComponent(i * vecSize, j)); x = xc.data(); } else { x = &x1; @@ -440,9 +477,7 @@ namespace FitUtil { // avoid infinity or nan in chi2 values due to wrong function values - auto m = vecCore::Mask_v(chi2 > maxResValue); - - vecCore::MaskedAssign(chi2, m, maxResValue); + where(chi2 > maxResValue, chi2) = maxResValue; return chi2; }; @@ -478,10 +513,9 @@ namespace FitUtil { // Last SIMD vector of elements (if padding needed) if (data.Size() % vecSize != 0) - vecCore::MaskedAssign(res, vecCore::Int2Mask(data.Size() % vecSize), - res + mapFunction(data.Size() / vecSize)); + where(Detail::Int2Mask(data.Size() % vecSize), res) = res + mapFunction(data.Size() / vecSize); - return vecCore::ReduceAdd(res); + return Detail::ReduceAdd(res); } static double EvalLogL(const IModelFunctionTempl &func, const UnBinData &data, const double *const p, @@ -506,13 +540,13 @@ namespace FitUtil { if (!normalizeFunc) { if (data.NDim() == 1) { T x; - vecCore::Load(x, data.GetCoordComponent(0, 0)); + Detail::Load(x, data.GetCoordComponent(0, 0)); func( &x, p); } else { std::vector x(data.NDim()); for (unsigned int j = 0; j < data.NDim(); ++j) - vecCore::Load(x[j], data.GetCoordComponent(0, j)); + Detail::Load(x[j], data.GetCoordComponent(0, j)); func( x.data(), p); } } @@ -537,9 +571,9 @@ namespace FitUtil { data.Range().GetRange(&xmin[0], &xmax[0]); // check if function is zero at +- inf T xmin_v, xmax_v; - vecCore::Load(xmin_v, xmin.data()); - vecCore::Load(xmax_v, xmax.data()); - if (vecCore::ReduceAdd(func(&xmin_v, p)) != 0 || vecCore::ReduceAdd(func(&xmax_v, p)) != 0) { + Detail::Load(xmin_v, xmin.data()); + Detail::Load(xmax_v, xmax.data()); + if (Detail::ReduceAdd(func(&xmin_v, p)) != 0 || Detail::ReduceAdd(func(&xmax_v, p)) != 0) { MATH_ERROR_MSG("FitUtil::EvaluateLogLikelihood", "A range has not been set and the function is not zero at +/- inf"); return 0; } @@ -549,7 +583,7 @@ namespace FitUtil { // needed to compute effective global weight in case of extended likelihood - auto vecSize = vecCore::VectorSize(); + auto vecSize = T::size(); unsigned int numVectors = n / vecSize; auto mapFunction = [ &, p](const unsigned i) { @@ -560,7 +594,7 @@ namespace FitUtil { (void)p; /* avoid unused lambda capture warning if PARAMCACHE is disabled */ T x1; - vecCore::Load(x1, data.GetCoordComponent(i * vecSize, 0)); + Detail::Load(x1, data.GetCoordComponent(i * vecSize, 0)); const T *x = nullptr; unsigned int ndim = data.NDim(); std::vector xc; @@ -568,7 +602,7 @@ namespace FitUtil { xc.resize(ndim); xc[0] = x1; for (unsigned int j = 1; j < ndim; ++j) - vecCore::Load(xc[j], data.GetCoordComponent(i * vecSize, j)); + Detail::Load(xc[j], data.GetCoordComponent(i * vecSize, j)); x = xc.data(); } else { x = &x1; @@ -596,7 +630,7 @@ namespace FitUtil { if (data.WeightsPtr(i) == nullptr) weight = 1; else - vecCore::Load(weight, data.WeightsPtr(i*vecSize)); + Detail::Load(weight, data.WeightsPtr(i * vecSize)); logval *= weight; if (iWeight == 2) { logval *= weight; // use square of weights in likelihood @@ -660,17 +694,17 @@ namespace FitUtil { if (remainingPoints > 0) { auto remainingPointsContribution = mapFunction(numVectors); // Add the contribution from the valid remaining points and store the result in the output variable - auto remainingMask = vecCore::Int2Mask(remainingPoints); - vecCore::MaskedAssign(logl_v, remainingMask, logl_v + remainingPointsContribution.logvalue); - vecCore::MaskedAssign(sumW_v, remainingMask, sumW_v + remainingPointsContribution.weight); - vecCore::MaskedAssign(sumW2_v, remainingMask, sumW2_v + remainingPointsContribution.weight2); + auto remainingMask = Detail::Int2Mask(remainingPoints); + where(remainingMask, logl_v) = logl_v + remainingPointsContribution.logvalue; + where(remainingMask, sumW_v) = sumW_v + remainingPointsContribution.weight; + where(remainingMask, sumW2_v) = sumW2_v + remainingPointsContribution.weight2; } //reduce vector type to double. - double logl = vecCore::ReduceAdd(logl_v); - double sumW = vecCore::ReduceAdd(sumW_v); - double sumW2 = vecCore::ReduceAdd(sumW2_v); + double logl = Detail::ReduceAdd(logl_v); + double sumW = Detail::ReduceAdd(sumW_v); + double sumW2 = Detail::ReduceAdd(sumW2_v); if (extended) { // add Poisson extended term @@ -695,9 +729,9 @@ namespace FitUtil { data.Range().GetRange(&xmin[0], &xmax[0]); // check if function is zero at +- inf T xmin_v, xmax_v; - vecCore::Load(xmin_v, xmin.data()); - vecCore::Load(xmax_v, xmax.data()); - if (vecCore::ReduceAdd(func(&xmin_v, p)) != 0 || vecCore::ReduceAdd(func(&xmax_v, p)) != 0) { + Detail::Load(xmin_v, xmin.data()); + Detail::Load(xmax_v, xmax.data()); + if (Detail::ReduceAdd(func(&xmin_v, p)) != 0 || Detail::ReduceAdd(func(&xmax_v, p)) != 0) { MATH_ERROR_MSG("FitUtil::EvaluateLogLikelihood", "A range has not been set and the function is not zero at +/- inf"); return 0; } @@ -756,7 +790,7 @@ namespace FitUtil { #ifdef USE_PARAMCACHE (const_cast &>(func)).SetParameters(p); #endif - auto vecSize = vecCore::VectorSize(); + auto vecSize = T::size(); // get fit option and check case of using integral of bins const DataOptions &fitOpt = data.Opt(); if (fitOpt.fExpErrors || fitOpt.fIntegral) @@ -766,13 +800,13 @@ namespace FitUtil { auto mapFunction = [&](unsigned int i) { T y; - vecCore::Load(y, data.ValuePtr(i * vecSize)); + Detail::Load(y, data.ValuePtr(i * vecSize)); T fval{}; if (data.NDim() > 1) { std::vector x(data.NDim()); for (unsigned int j = 0; j < data.NDim(); ++j) - vecCore::Load(x[j], data.GetCoordComponent(i * vecSize, j)); + Detail::Load(x[j], data.GetCoordComponent(i * vecSize, j)); #ifdef USE_PARAMCACHE fval = func(x.data()); #else @@ -781,7 +815,7 @@ namespace FitUtil { // one -dim case } else { T x; - vecCore::Load(x, data.GetCoordComponent(i * vecSize, 0)); + Detail::Load(x, data.GetCoordComponent(i * vecSize, 0)); #ifdef USE_PARAMCACHE fval = func(&x); #else @@ -791,7 +825,7 @@ namespace FitUtil { // EvalLog protects against 0 values of fval but don't want to add in the -log sum // negative values of fval - vecCore::MaskedAssign(fval, fval < 0.0, 0.0); + where(fval < 0.0, fval) = 0.0; T nloglike{}; // negative loglikelihood @@ -803,14 +837,17 @@ namespace FitUtil { // the saturated model) assert (data.GetErrorType() != ROOT::Fit::BinData::ErrorType::kNoError); T error = 0.0; - vecCore::Load(error, data.ErrorPtr(i * vecSize)); + Detail::Load(error, data.ErrorPtr(i * vecSize)); // for empty bin use the average weight computed from the total data weight - auto m = vecCore::Mask_v(y != 0.0); - auto weight = vecCore::Blend(m,(error * error) / y, T(data.SumOfError2()/ data.SumOfContent()) ); + auto m = y != 0.0; + T weight{}; + where(m, weight) = (error * error) / y; + where(!m, weight) = T(data.SumOfError2() / data.SumOfContent()); if (extended) { nloglike = weight * ( fval - y); } - vecCore::MaskedAssign(nloglike, y != 0, nloglike + weight * y *( ROOT::Math::Util::EvalLog(y) - ROOT::Math::Util::EvalLog(fval)) ); + where(y != 0, nloglike) = + nloglike + weight * y * (ROOT::Math::Util::EvalLog(y) - ROOT::Math::Util::EvalLog(fval)); } else { // standard case no weights or iWeight=1 @@ -819,8 +856,7 @@ namespace FitUtil { // (same formula as in Baker-Cousins paper, page 439 except a factor of 2 if (extended) nloglike = fval - y; - vecCore::MaskedAssign( - nloglike, y > 0, nloglike + y * (ROOT::Math::Util::EvalLog(y) - ROOT::Math::Util::EvalLog(fval))); + where(y > 0, nloglike) = nloglike + y * (ROOT::Math::Util::EvalLog(y) - ROOT::Math::Util::EvalLog(fval)); } return nloglike; @@ -859,10 +895,9 @@ namespace FitUtil { // Last padded SIMD vector of elements if (data.Size() % vecSize != 0) - vecCore::MaskedAssign(res, vecCore::Int2Mask(data.Size() % vecSize), - res + mapFunction(data.Size() / vecSize)); + where(Detail::Int2Mask(data.Size() % vecSize), res) = res + mapFunction(data.Size() / vecSize); - return vecCore::ReduceAdd(res); + return Detail::ReduceAdd(res); } static double EvalChi2Effective(const IModelFunctionTempl &, const BinData &, const double *, unsigned int &) @@ -874,15 +909,15 @@ namespace FitUtil { // Compute a mask to filter out infinite numbers and NaN values. // The argument rval is updated so infinite numbers and NaN values are replaced by // maximum finite values (preserving the original sign). - static vecCore::Mask CheckInfNaNValues(T &rval) + static auto CheckInfNaNValues(T &rval) { - auto mask = rval > -vecCore::NumericLimits::Max() && rval < vecCore::NumericLimits::Max(); + auto mask = rval > -Detail::NumericMax() && rval < Detail::NumericMax(); // Case +inf or nan - vecCore::MaskedAssign(rval, !mask, +vecCore::NumericLimits::Max()); + where(!mask, rval) = +Detail::NumericMax(); // Case -inf - vecCore::MaskedAssign(rval, !mask && rval < 0, -vecCore::NumericLimits::Max()); + where(!mask && rval < 0, rval) = -Detail::NumericMax(); return mask; } @@ -915,12 +950,12 @@ namespace FitUtil { "BinVolume or ExpErrors\n. Aborting operation."); unsigned int npar = func.NPar(); - auto vecSize = vecCore::VectorSize(); + auto vecSize = T::size(); unsigned initialNPoints = data.Size(); unsigned numVectors = initialNPoints / vecSize; // numVectors + 1 because of the padded data (call to mapFunction with i = numVectors after the main loop) - std::vector> validPointsMasks(numVectors + 1); + std::vector validPointsMasks(numVectors + 1); auto mapFunction = [&](const unsigned int i) { // set all vector values to zero @@ -929,14 +964,14 @@ namespace FitUtil { T x1, y, invError; - vecCore::Load(x1, data.GetCoordComponent(i * vecSize, 0)); - vecCore::Load(y, data.ValuePtr(i * vecSize)); + Detail::Load(x1, data.GetCoordComponent(i * vecSize, 0)); + Detail::Load(y, data.ValuePtr(i * vecSize)); const auto invErrorPtr = data.ErrorPtr(i * vecSize); if (invErrorPtr == nullptr) invError = 1; else - vecCore::Load(invError, invErrorPtr); + Detail::Load(invError, invErrorPtr); // TODO: Check error options and invert if needed @@ -952,7 +987,7 @@ namespace FitUtil { xc.resize(ndim); xc[0] = x1; for (unsigned int j = 1; j < ndim; ++j) - vecCore::Load(xc[j], data.GetCoordComponent(i * vecSize, j)); + Detail::Load(xc[j], data.GetCoordComponent(i * vecSize, j)); x = xc.data(); } else { x = &x1; @@ -962,7 +997,7 @@ namespace FitUtil { func.ParameterGradient(x, p, &gradFunc[0]); validPointsMasks[i] = CheckInfNaNValues(fval); - if (vecCore::MaskEmpty(validPointsMasks[i])) { + if (Detail::MaskEmpty(validPointsMasks[i])) { // Return a zero contribution to all partial derivatives on behalf of the current points return pointContributionVec; } @@ -973,13 +1008,13 @@ namespace FitUtil { // eventually add possibility of excluding some points (like singularity) validPointsMasks[i] = CheckInfNaNValues(gradFunc[ipar]); - if (vecCore::MaskEmpty(validPointsMasks[i])) { + if (Detail::MaskEmpty(validPointsMasks[i])) { break; // exit loop on parameters } // calculate derivative point contribution (only for valid points) - vecCore::MaskedAssign(pointContributionVec[ipar], validPointsMasks[i], - -2.0 * (y - fval) * invError * invError * gradFunc[ipar]); + where(validPointsMasks[i], pointContributionVec[ipar]) = + -2.0 * (y - fval) * invError * invError * gradFunc[ipar]; } return pointContributionVec; @@ -1035,26 +1070,31 @@ namespace FitUtil { if (remainingPoints > 0) { auto remainingPointsContribution = mapFunction(numVectors); // Add the contribution from the valid remaining points and store the result in the output variable - auto remainingMask = vecCore::Int2Mask(remainingPoints); + auto remainingMask = Detail::Int2Mask(remainingPoints); for (unsigned int param = 0; param < npar; param++) { - vecCore::MaskedAssign(gVec[param], remainingMask, gVec[param] + remainingPointsContribution[param]); + where(remainingMask, gVec[param]) = gVec[param] + remainingPointsContribution[param]; } } // reduce final gradient result from T to double for (unsigned int param = 0; param < npar; param++) { - grad[param] = vecCore::ReduceAdd(gVec[param]); + grad[param] = Detail::ReduceAdd(gVec[param]); } // correct the number of points nPoints = initialNPoints; - if (std::any_of(validPointsMasks.begin(), validPointsMasks.end(), - [](vecCore::Mask validPoints) { return !vecCore::MaskFull(validPoints); })) { + if (std::any_of(validPointsMasks.begin(), validPointsMasks.end(), [](auto validPoints) { + // Check if the mask is not full + for (size_t i = 0; i < T::mask_type::size(); ++i) + if (!validPoints[i]) + return true; + return false; + })) { unsigned nRejected = 0; for (const auto &mask : validPointsMasks) { for (unsigned int i = 0; i < vecSize; i++) { - nRejected += !vecCore::Get(mask, i); + nRejected += !mask[i]; } } @@ -1118,7 +1158,7 @@ namespace FitUtil { "BinVolume or ExpErrors\n. Aborting operation."); unsigned int npar = func.NPar(); - auto vecSize = vecCore::VectorSize(); + auto vecSize = T::size(); unsigned initialNPoints = data.Size(); unsigned numVectors = initialNPoints / vecSize; @@ -1129,8 +1169,8 @@ namespace FitUtil { T x1, y; - vecCore::Load(x1, data.GetCoordComponent(i * vecSize, 0)); - vecCore::Load(y, data.ValuePtr(i * vecSize)); + Detail::Load(x1, data.GetCoordComponent(i * vecSize, 0)); + Detail::Load(y, data.ValuePtr(i * vecSize)); T fval = 0; @@ -1142,7 +1182,7 @@ namespace FitUtil { xc.resize(ndim); xc[0] = x1; for (unsigned int j = 1; j < ndim; ++j) - vecCore::Load(xc[j], data.GetCoordComponent(i * vecSize, j)); + Detail::Load(xc[j], data.GetCoordComponent(i * vecSize, j)); x = xc.data(); } else { x = &x1; @@ -1153,18 +1193,21 @@ namespace FitUtil { // correct the gradient for (unsigned int ipar = 0; ipar < npar; ++ipar) { - vecCore::Mask positiveValuesMask = fval > 0; + auto positiveValuesMask = fval > 0; // df/dp * (1. - y/f ) - vecCore::MaskedAssign(pointContributionVec[ipar], positiveValuesMask, gradFunc[ipar] * (1. - y / fval)); + where(positiveValuesMask, pointContributionVec[ipar]) = gradFunc[ipar] * (1. - y / fval); - vecCore::Mask validNegativeValuesMask = !positiveValuesMask && gradFunc[ipar] != 0; + auto validNegativeValuesMask = !positiveValuesMask && gradFunc[ipar] != 0; - if (!vecCore::MaskEmpty(validNegativeValuesMask)) { - const T kdmax1 = vecCore::math::Sqrt(vecCore::NumericLimits::Max()); - const T kdmax2 = vecCore::NumericLimits::Max() / (4 * initialNPoints); + if (!Detail::MaskEmpty(validNegativeValuesMask)) { + const T kdmax1 = sqrt(Detail::NumericMax()); + const T kdmax2 = Detail::NumericMax() / (4 * initialNPoints); T gg = kdmax1 * gradFunc[ipar]; - pointContributionVec[ipar] = -vecCore::Blend(gg > 0, vecCore::math::Min(gg, kdmax2), vecCore::math::Max(gg, -kdmax2)); + auto mask = gg > 0; + where(mask, pointContributionVec[ipar]) = min(gg, kdmax2); + where(!mask, pointContributionVec[ipar]) = max(gg, -kdmax2); + pointContributionVec[ipar] = -pointContributionVec[ipar]; } } @@ -1234,14 +1277,14 @@ namespace FitUtil { if (remainingPoints > 0) { auto remainingPointsContribution = mapFunction(numVectors); // Add the contribution from the valid remaining points and store the result in the output variable - auto remainingMask = vecCore::Int2Mask(remainingPoints); + auto remainingMask = Detail::Int2Mask(remainingPoints); for (unsigned int param = 0; param < npar; param++) { - vecCore::MaskedAssign(gVec[param], remainingMask, gVec[param] + remainingPointsContribution[param]); + where(remainingMask, gVec[param]) = gVec[param] + remainingPointsContribution[param]; } } // reduce final gradient result from T to double for (unsigned int param = 0; param < npar; param++) { - grad[param] = vecCore::ReduceAdd(gVec[param]); + grad[param] = Detail::ReduceAdd(gVec[param]); } #ifdef DEBUG_FITUTIL @@ -1266,7 +1309,7 @@ namespace FitUtil { unsigned int npar = func.NPar(); - auto vecSize = vecCore::VectorSize(); + auto vecSize = T::size(); unsigned initialNPoints = data.Size(); unsigned numVectors = initialNPoints / vecSize; @@ -1279,15 +1322,15 @@ namespace FitUtil { (const_cast &>(func)).SetParameters(p); - const T kdmax1 = vecCore::math::Sqrt(vecCore::NumericLimits::Max()); - const T kdmax2 = vecCore::NumericLimits::Max() / (4 * initialNPoints); + const T kdmax1 = sqrt(Detail::NumericMax()); + const T kdmax2 = Detail::NumericMax() / (4 * initialNPoints); auto mapFunction = [&](const unsigned int i) { std::vector gradFunc(npar); std::vector pointContributionVec(npar); T x1; - vecCore::Load(x1, data.GetCoordComponent(i * vecSize, 0)); + Detail::Load(x1, data.GetCoordComponent(i * vecSize, 0)); const T *x = nullptr; @@ -1297,7 +1340,7 @@ namespace FitUtil { xc.resize(ndim); xc[0] = x1; for (unsigned int j = 1; j < ndim; ++j) - vecCore::Load(xc[j], data.GetCoordComponent(i * vecSize, j)); + Detail::Load(xc[j], data.GetCoordComponent(i * vecSize, j)); x = xc.data(); } else { x = &x1; @@ -1314,18 +1357,19 @@ namespace FitUtil { } #endif - vecCore::Mask positiveValues = fval > 0; + auto positiveValues = fval > 0; for (unsigned int kpar = 0; kpar < npar; ++kpar) { - if (!vecCore::MaskEmpty(positiveValues)) - vecCore::MaskedAssign(pointContributionVec[kpar], positiveValues, -1. / fval * gradFunc[kpar]); + if (!Detail::MaskEmpty(positiveValues)) { + where(positiveValues, pointContributionVec[kpar]) = -1. / fval * gradFunc[kpar]; + } - vecCore::Mask nonZeroGradientValues = !positiveValues && gradFunc[kpar] != 0; - if (!vecCore::MaskEmpty(nonZeroGradientValues)) { + auto nonZeroGradientValues = !positiveValues && gradFunc[kpar] != 0; + if (!Detail::MaskEmpty(nonZeroGradientValues)) { T gg = kdmax1 * gradFunc[kpar]; - pointContributionVec[kpar] = - vecCore::Blend(nonZeroGradientValues && gg > 0, -vecCore::math::Min(gg, kdmax2), - -vecCore::math::Max(gg, -kdmax2)); + auto mask = nonZeroGradientValues && gg > 0; + where(mask, pointContributionVec[kpar]) = -min(gg, kdmax2); + where(!mask, pointContributionVec[kpar]) = -max(gg, -kdmax2); } // if func derivative is zero term is also zero so do not add in g[kpar] } @@ -1383,14 +1427,14 @@ namespace FitUtil { if (remainingPoints > 0) { auto remainingPointsContribution = mapFunction(numVectors); // Add the contribution from the valid remaining points and store the result in the output variable - auto remainingMask = vecCore::Int2Mask(initialNPoints % vecSize); + auto remainingMask = Detail::Int2Mask(initialNPoints % vecSize); for (unsigned int param = 0; param < npar; param++) { - vecCore::MaskedAssign(gVec[param], remainingMask, gVec[param] + remainingPointsContribution[param]); + where(remainingMask, gVec[param]) = gVec[param] + remainingPointsContribution[param]; } } // reduce final gradient result from T to double for (unsigned int param = 0; param < npar; param++) { - grad[param] = vecCore::ReduceAdd(gVec[param]); + grad[param] = Detail::ReduceAdd(gVec[param]); } #ifdef DEBUG_FITUTIL @@ -1481,15 +1525,10 @@ namespace FitUtil { } }; -} // end namespace FitUtil + } // end namespace FitUtil } // end namespace Fit -} // end namespace ROOT - -#if defined (R__HAS_VECCORE) && defined(R__HAS_VC) -//Fixes alignment for structures of SIMD structures -Vc_DECLARE_ALLOCATOR(ROOT::Fit::FitUtil::LikelihoodAux); -#endif + } // end namespace ROOT #endif /* ROOT_Fit_FitUtil */ diff --git a/math/mathcore/inc/Fit/Fitter.h b/math/mathcore/inc/Fit/Fitter.h index 59889413badbc..6718643dc3004 100644 --- a/math/mathcore/inc/Fit/Fitter.h +++ b/math/mathcore/inc/Fit/Fitter.h @@ -82,7 +82,7 @@ class Fitter { typedef ROOT::Math::IParamMultiFunction IModelFunction; template using IModelFunctionTempl = ROOT::Math::IParamMultiFunctionTempl; -#ifdef R__HAS_VECCORE +#ifdef R__HAS_STD_SIMD typedef ROOT::Math::IParametricFunctionMultiDimTempl IModelFunction_v; typedef ROOT::Math::IParamMultiGradFunctionTempl IGradModelFunction_v; #else @@ -372,7 +372,7 @@ class Fitter { /** Set the fitted function (model function) from a vectorized parametric function interface */ -#ifdef R__HAS_VECCORE +#ifdef R__HAS_STD_SIMD template ::value)>> void SetFunction(const IModelFunction_v &func, bool useGradient = false); @@ -572,8 +572,7 @@ class Fitter { const ROOT::Math::IMultiGenFunction *fExtObjFunction = nullptr; /// void Fitter::SetFunction(const IModelFunction_v &func, bool useGradient) { diff --git a/math/mathcore/inc/Math/Math.h b/math/mathcore/inc/Math/Math.h index 1e31ec59fa5fd..fa78300ff1385 100644 --- a/math/mathcore/inc/Math/Math.h +++ b/math/mathcore/inc/Math/Math.h @@ -54,29 +54,6 @@ See the \ref Math "Math Libraries" page for a detailed description. */ namespace Math { -// Enable Vc/VecCore template instantiations to replace std math functions. -// -// Vc declares `std::sqrt(Vc-type)`. To use this for Vc-`SCALAR`s, the call -// to `sqrt()` must only be resolved at the template instantiation time, when -// the Vc headers are guaranteed to be included, and thus its `sqrt()` -// overloads have been declared. -// The trick is to keep sqrt() dependent (on its argument type) by making it -// an unqualified name. The `std::` of `std::sqrt()` makes it a qualified -// name, so the code here has to use `sqrt()`, not `std::sqrt()`. To still -// find `std::sqrt()` we pull `std::sqrt()` into the surrounding namespace. -// -// We don't want to use 'using namespace std' because it would pollute the including headers. -using std::atan2; -using std::cos; -using std::cosh; -using std::exp; -using std::floor; -using std::log; -using std::pow; -using std::sin; -using std::sinh; -using std::sqrt; -using std::tan; /** Mathematical constants @@ -132,7 +109,7 @@ inline double expm1( double x) { #endif } - } // end namespace Math +} // end namespace Math } // end namespace ROOT diff --git a/math/mathcore/inc/Math/Types.h b/math/mathcore/inc/Math/Types.h index 95d71a78c7619..5953db4eb3e2a 100644 --- a/math/mathcore/inc/Math/Types.h +++ b/math/mathcore/inc/Math/Types.h @@ -3,61 +3,31 @@ #include "RConfigure.h" -#ifdef R__HAS_VECCORE - -#if defined(R__HAS_VC) - -#pragma GCC diagnostic push -#pragma GCC diagnostic ignored "-Wall" -#pragma GCC diagnostic ignored "-Wunused-parameter" -#pragma GCC diagnostic ignored "-Wdeprecated-declarations" -#if (__cplusplus >= 202002L) // only for C++20 -#pragma GCC diagnostic ignored "-Wdeprecated-enum-enum-conversion" -#endif +#include "RtypesCore.h" -#ifdef __clang__ -#pragma clang diagnostic ignored "-Wconditional-uninitialized" -#pragma clang diagnostic ignored "-Wdeprecated-copy" -#endif +#ifdef R__HAS_STD_SIMD -#include -#pragma GCC diagnostic pop -#endif - -#include +#include namespace ROOT { namespace Internal { - using ScalarBackend = vecCore::backend::Scalar; -#ifdef VECCORE_ENABLE_VC - using VectorBackend = vecCore::backend::VcVector; -#else - using VectorBackend = vecCore::backend::Scalar; -#endif -} - using Float_v = typename Internal::VectorBackend::Float_v; - using Double_v = typename Internal::VectorBackend::Double_v; - using Int_v = typename Internal::VectorBackend::Int_v; - using Int32_v = typename Internal::VectorBackend::Int32_v; - using UInt_v = typename Internal::VectorBackend::UInt_v; - using UInt32_v = typename Internal::VectorBackend::UInt32_v; -} -#else // R__HAS_VECCORE +template +using SIMDTag = std::experimental::simd_abi::native; -// We do not have explicit vectorisation support enabled. Fall back to regular ROOT types. +} // namespace Internal -#include "RtypesCore.h" +// FIXME: Should we introduce Int32_t and UInt32_t in RtypesCore.h? +using Float_v = std::experimental::simd>; +using Double_v = std::experimental::simd>; +using Int_v = std::experimental::simd>; +using Int32_v = std::experimental::simd>; +using UInt_v = std::experimental::simd>; +using UInt32_v = std::experimental::simd>; + +} // namespace ROOT -namespace ROOT { - using Float_v = Float_t; - using Double_v = Double_t; - using Int_v = Int_t; - using Int32_v = Int_t; // FIXME: Should we introduce Int32_t in RtypesCore.h? - using UInt_v = UInt_t; - using UInt32_v = UInt_t; // FIXME: Should we introduce UInt32_t in RtypesCore.h? -} #endif #endif // ROOT_Math_VecTypes diff --git a/math/mathcore/inc/Math/Util.h b/math/mathcore/inc/Math/Util.h index c04797a1ab57c..46b4c39c5bc59 100644 --- a/math/mathcore/inc/Math/Util.h +++ b/math/mathcore/inc/Math/Util.h @@ -22,9 +22,8 @@ #include #include - // This can be protected against by defining ROOT_Math_VecTypes -// This is only used for the R__HAS_VECCORE define +// This is only used for the R__HAS_STD_SIMD define // and a single VecCore function in EvalLog #ifndef ROOT_Math_VecTypes #include "Types.h" @@ -78,12 +77,17 @@ class TimingScope { template inline T EvalLog(T x) { static const T epsilon = T(2.0 * std::numeric_limits::min()); -#ifdef R__HAS_VECCORE - T logval = vecCore::Blend(x <= epsilon, x / epsilon + std::log(epsilon) - T(1.0), std::log(x)); -#else - T logval = x <= epsilon ? x / epsilon + std::log(epsilon) - T(1.0) : std::log(x); -#endif - return logval; + + if constexpr (std::is_arithmetic_v) { + return x <= epsilon ? x / epsilon + std::log(epsilon) - T(1.0) : std::log(x); + } else { + // assume std::simd for non-arithmetic types + T logval{}; + auto mask = x <= epsilon; + where(mask, logval) = x / epsilon + log(epsilon) - T(1.0); + where(!mask, logval) = log(x); + return logval; + } } } // end namespace Util diff --git a/math/mathcore/inc/VectorizedTMath.h b/math/mathcore/inc/VectorizedTMath.h index de95214c86194..610dc33aacc2d 100644 --- a/math/mathcore/inc/VectorizedTMath.h +++ b/math/mathcore/inc/VectorizedTMath.h @@ -5,29 +5,248 @@ #include "Math/Types.h" #include "TMath.h" -#if defined(R__HAS_VECCORE) && defined(R__HAS_VC) +#include + +#ifdef R__HAS_STD_SIMD namespace TMath { -::ROOT::Double_v Log2(::ROOT::Double_v &x); -::ROOT::Double_v BreitWigner(::ROOT::Double_v &x, Double_t mean = 0, Double_t gamma = 1); -::ROOT::Double_v Gaus(::ROOT::Double_v &x, Double_t mean = 0, Double_t sigma = 1, Bool_t norm = kFALSE); -::ROOT::Double_v LaplaceDist(::ROOT::Double_v &x, Double_t alpha = 0, Double_t beta = 1); -::ROOT::Double_v LaplaceDistI(::ROOT::Double_v &x, Double_t alpha = 0, Double_t beta = 1); -::ROOT::Double_v Freq(::ROOT::Double_v &x); -::ROOT::Double_v BesselI0_Split_More(::ROOT::Double_v &ax); -::ROOT::Double_v BesselI0_Split_Less(::ROOT::Double_v &x); -::ROOT::Double_v BesselI0(::ROOT::Double_v &x); -::ROOT::Double_v BesselI1_Split_More(::ROOT::Double_v &ax, ::ROOT::Double_v &x); -::ROOT::Double_v BesselI1_Split_Less(::ROOT::Double_v &x); -::ROOT::Double_v BesselI1(::ROOT::Double_v &x); -::ROOT::Double_v BesselJ0_Split1_More(::ROOT::Double_v &ax); -::ROOT::Double_v BesselJ0_Split1_Less(::ROOT::Double_v &x); -::ROOT::Double_v BesselJ0(::ROOT::Double_v &x); -::ROOT::Double_v BesselJ1_Split1_More(::ROOT::Double_v &ax, ::ROOT::Double_v &x); -::ROOT::Double_v BesselJ1_Split1_Less(::ROOT::Double_v &x); -::ROOT::Double_v BesselJ1(::ROOT::Double_v &x); +inline ::ROOT::Double_v Log2(::ROOT::Double_v &x) +{ + return log2(x); +} + +/// Calculate a Breit Wigner function with mean and gamma. +inline ::ROOT::Double_v BreitWigner(::ROOT::Double_v &x, Double_t mean = 0, Double_t gamma = 1) +{ + return 0.5 * M_1_PI * (gamma / (0.25 * gamma * gamma + (x - mean) * (x - mean))); +} + +/// Calculate a gaussian function with mean and sigma. +/// If norm=kTRUE (default is kFALSE) the result is divided +/// by sqrt(2*Pi)*sigma. +inline ::ROOT::Double_v Gaus(::ROOT::Double_v &x, Double_t mean = 0, Double_t sigma = 1, Bool_t norm = false) +{ + if (sigma == 0) + return 1.e30; + + auto inv_sigma = 1.0 / ::ROOT::Double_v(sigma); + auto arg = (x - ::ROOT::Double_v(mean)) * inv_sigma; + + // For those entries of |arg| > 39 result is zero in double precision + ::ROOT::Double_v out{}; + where(abs(arg) < 39.0, out) = exp(::ROOT::Double_v(-0.5) * arg * arg); + + if (norm) + out *= 0.3989422804014327 * inv_sigma; // 1/sqrt(2*Pi)=0.3989422804014327 + return out; +} + +/// Computes the probability density function of Laplace distribution +/// at point x, with location parameter alpha and shape parameter beta. +/// By default, alpha=0, beta=1 +/// This distribution is known under different names, most common is +/// double exponential distribution, but it also appears as +/// the two-tailed exponential or the bilateral exponential distribution +inline ::ROOT::Double_v LaplaceDist(::ROOT::Double_v &x, Double_t alpha = 0, Double_t beta = 1) +{ + auto beta_v_inv = ::ROOT::Double_v(1.0 / beta); + auto out = exp(-abs((x - alpha) * beta_v_inv)); + out *= 0.5 * beta_v_inv; + return out; +} + +/// Computes the distribution function of Laplace distribution +/// at point x, with location parameter alpha and shape parameter beta. +/// By default, alpha=0, beta=1 +/// This distribution is known under different names, most common is +/// double exponential distribution, but it also appears as +/// the two-tailed exponential or the bilateral exponential distribution +inline ::ROOT::Double_v LaplaceDistI(::ROOT::Double_v &x, Double_t alpha = 0, Double_t beta = 1) +{ + auto alpha_v = ::ROOT::Double_v(alpha); + auto beta_v_inv = ::ROOT::Double_v(1.0) / ::ROOT::Double_v(beta); + auto mask = x <= alpha_v; + ::ROOT::Double_v out{}; + where(mask, out) = 0.5 * exp(-abs((x - alpha_v) * beta_v_inv)); + where(!mask, out) = 1 - 0.5 * exp(-abs((x - alpha_v) * beta_v_inv)); + return out; +} + +/// Computation of the normal frequency function freq(x). +/// Freq(x) = (1/sqrt(2pi)) Integral(exp(-t^2/2))dt between -infinity and x. +/// +/// Translated from CERNLIB C300 by Rene Brun. +inline ::ROOT::Double_v Freq(::ROOT::Double_v &x) +{ + Double_t c1 = 0.56418958354775629; + Double_t w2 = 1.41421356237309505; + + Double_t p10 = 2.4266795523053175e+2, q10 = 2.1505887586986120e+2, p11 = 2.1979261618294152e+1, + q11 = 9.1164905404514901e+1, p12 = 6.9963834886191355e+0, q12 = 1.5082797630407787e+1, + p13 = -3.5609843701815385e-2; + + Double_t p20 = 3.00459261020161601e+2, q20 = 3.00459260956983293e+2, p21 = 4.51918953711872942e+2, + q21 = 7.90950925327898027e+2, p22 = 3.39320816734343687e+2, q22 = 9.31354094850609621e+2, + p23 = 1.52989285046940404e+2, q23 = 6.38980264465631167e+2, p24 = 4.31622272220567353e+1, + q24 = 2.77585444743987643e+2, p25 = 7.21175825088309366e+0, q25 = 7.70001529352294730e+1, + p26 = 5.64195517478973971e-1, q26 = 1.27827273196294235e+1, p27 = -1.36864857382716707e-7; + + Double_t p30 = -2.99610707703542174e-3, q30 = 1.06209230528467918e-2, p31 = -4.94730910623250734e-2, + q31 = 1.91308926107829841e-1, p32 = -2.26956593539686930e-1, q32 = 1.05167510706793207e+0, + p33 = -2.78661308609647788e-1, q33 = 1.98733201817135256e+0, p34 = -2.23192459734184686e-2, q34 = 1; + + auto v = abs(x) / w2; + + ::ROOT::Double_v result{}; + + auto mask1 = v < 0.5; + auto mask2 = !mask1 && v < 4.0; + auto mask3 = !(mask1 || mask2); + + auto v2 = v * v; + auto v3 = v2 * v; + auto v4 = v3 * v; + auto v5 = v4 * v; + auto v6 = v5 * v; + auto v7 = v6 * v; + auto v8 = v7 * v; + + where(mask1, result) = v * (p10 + p11 * v2 + p12 * v4 + p13 * v6) / (q10 + q11 * v2 + q12 * v4 + v6); + where(mask2, result) = + 1.0 - (p20 + p21 * v + p22 * v2 + p23 * v3 + p24 * v4 + p25 * v5 + p26 * v6 + p27 * v7) / + (exp(v2) * (q20 + q21 * v + q22 * v2 + q23 * v3 + q24 * v4 + q25 * v5 + q26 * v6 + v7)); + where(mask3, result) = 1.0 - (c1 + (p30 * v8 + p31 * v6 + p32 * v4 + p33 * v2 + p34) / + ((q30 * v8 + q31 * v6 + q32 * v4 + q33 * v2 + q34) * v2)) / + (v * exp(v2)); + + auto out = 0.5 * (1 - result); + where(x > 0, out) = 0.5 + 0.5 * result; + return out; +} + +/// Vectorized implementation of Bessel function I_0(x) for a vector x. +inline ::ROOT::Double_v BesselI0_Split_More(::ROOT::Double_v &ax) +{ + auto y = 3.75 / ax; + return (exp(ax) / sqrt(ax)) * + (0.39894228 + + y * (1.328592e-2 + + y * (2.25319e-3 + + y * (-1.57565e-3 + + y * (9.16281e-3 + + y * (-2.057706e-2 + y * (2.635537e-2 + y * (-1.647633e-2 + y * 3.92377e-3)))))))); +} + +inline ::ROOT::Double_v BesselI0_Split_Less(::ROOT::Double_v &x) +{ + auto y = x * x * 0.071111111; + + return 1.0 + + y * (3.5156229 + y * (3.0899424 + y * (1.2067492 + y * (0.2659732 + y * (3.60768e-2 + y * 4.5813e-3))))); +} + +inline ::ROOT::Double_v BesselI0(::ROOT::Double_v &x) +{ + auto ax = abs(x); + + auto out = BesselI0_Split_More(ax); + where(ax <= 3.75, out) = BesselI0_Split_Less(x); + return out; +} + +/// Vectorized implementation of modified Bessel function I_1(x) for a vector x. +inline ::ROOT::Double_v BesselI1_Split_More(::ROOT::Double_v &ax, ::ROOT::Double_v &x) +{ + auto y = 3.75 / ax; + auto result = (exp(ax) / sqrt(ax)) * + (0.39894228 + + y * (-3.988024e-2 + + y * (-3.62018e-3 + + y * (1.63801e-3 + + y * (-1.031555e-2 + + y * (2.282967e-2 + y * (-2.895312e-2 + y * (1.787654e-2 + y * -4.20059e-3)))))))); + where(x < 0, result) = -result; + return result; +} + +inline ::ROOT::Double_v BesselI1_Split_Less(::ROOT::Double_v &x) +{ + auto y = x * x * 0.071111111; + + return x * (0.5 + y * (0.87890594 + + y * (0.51498869 + y * (0.15084934 + y * (2.658733e-2 + y * (3.01532e-3 + y * 3.2411e-4)))))); +} + +inline ::ROOT::Double_v BesselI1(::ROOT::Double_v &x) +{ + auto ax = abs(x); + + auto out = BesselI1_Split_More(ax, x); + where(ax <= 3.75, out) = BesselI1_Split_Less(x); + return out; +} + +/// Vectorized implementation of Bessel function J0(x) for a vector x. +inline ::ROOT::Double_v BesselJ0_Split1_More(::ROOT::Double_v &ax) +{ + auto z = 8 / ax; + auto y = z * z; + auto xx = ax - 0.785398164; + auto result1 = 1 + y * (-0.1098628627e-2 + y * (0.2734510407e-4 + y * (-0.2073370639e-5 + y * 0.2093887211e-6))); + auto result2 = + -0.1562499995e-1 + y * (0.1430488765e-3 + y * (-0.6911147651e-5 + y * (0.7621095161e-6 - y * 0.934935152e-7))); + return sqrt(0.636619772 / ax) * (cos(xx) * result1 - z * sin(xx) * result2); +} + +inline ::ROOT::Double_v BesselJ0_Split1_Less(::ROOT::Double_v &x) +{ + auto y = x * x; + return (57568490574.0 + + y * (-13362590354.0 + y * (651619640.7 + y * (-11214424.18 + y * (77392.33017 + y * -184.9052456))))) / + (57568490411.0 + y * (1029532985.0 + y * (9494680.718 + y * (59272.64853 + y * (267.8532712 + y))))); +} + +inline ::ROOT::Double_v BesselJ0(::ROOT::Double_v &x) +{ + auto ax = abs(x); + auto out = BesselJ0_Split1_More(ax); + where(ax < 8, out) = BesselJ0_Split1_Less(x); + return out; +} + +/// Vectorized implementation of Bessel function J1(x) for a vector x. +inline ::ROOT::Double_v BesselJ1_Split1_More(::ROOT::Double_v &ax, ::ROOT::Double_v &x) +{ + auto z = 8 / ax; + auto y = z * z; + auto xx = ax - 2.356194491; + auto result1 = 1 + y * (0.183105e-2 + y * (-0.3516396496e-4 + y * (0.2457520174e-5 + y * -0.240337019e-6))); + auto result2 = + 0.04687499995 + y * (-0.2002690873e-3 + y * (0.8449199096e-5 + y * (-0.88228987e-6 - y * 0.105787412e-6))); + auto result = sqrt(0.636619772 / ax) * (cos(xx) * result1 - z * sin(xx) * result2); + where(x < 0, result) = -result; + return result; +} + +inline ::ROOT::Double_v BesselJ1_Split1_Less(::ROOT::Double_v &x) +{ + auto y = x * x; + return x * + (72362614232.0 + + y * (-7895059235.0 + y * (242396853.1 + y * (-2972611.439 + y * (15704.48260 + y * -30.16036606))))) / + (144725228442.0 + y * (2300535178.0 + y * (18583304.74 + y * (99447.43394 + y * (376.9991397 + y))))); +} + +inline ::ROOT::Double_v BesselJ1(::ROOT::Double_v &x) +{ + auto ax = abs(x); + auto out = BesselJ1_Split1_More(ax, x); + where(ax < 8, out) = BesselJ1_Split1_Less(x); + return out; +} + } // namespace TMath -#endif // VECCORE and VC exist check +#endif // R__HAS_STD_SIMD #endif diff --git a/math/mathcore/src/TMath.cxx b/math/mathcore/src/TMath.cxx index 67ce138a7e1e1..e08ebb007bb6b 100644 --- a/math/mathcore/src/TMath.cxx +++ b/math/mathcore/src/TMath.cxx @@ -3276,19 +3276,3 @@ void TMath::KNNDensity(std::span observations, std::span -template ROOT::Double_v vecCore::math::Sin(const ROOT::Double_v & x); -template ROOT::Double_v vecCore::math::Cos(const ROOT::Double_v & x); -template ROOT::Double_v vecCore::math::ASin(const ROOT::Double_v & x); -template ROOT::Double_v vecCore::math::ATan(const ROOT::Double_v & x); -template ROOT::Double_v vecCore::math::ATan2(const ROOT::Double_v & x,const ROOT::Double_v & y); -// missing in veccore -// template ROOT::Double_v vecCore::math::ACos(const ROOT::Double_v & x); -// template ROOT::Double_v vecCore::math::Sinh(const ROOT::Double_v & x); -// template ROOT::Double_v vecCore::math::Cosh(const ROOT::Double_v & x); -// template ROOT::Double_v vecCore::math::Tanh(const ROOT::Double_v & x); -// template ROOT::Double_v vecCore::math::Cbrt(const ROOT::Double_v & x); -#endif diff --git a/math/mathcore/src/VectorizedTMath.cxx b/math/mathcore/src/VectorizedTMath.cxx deleted file mode 100644 index bd3bc6b5fa8f4..0000000000000 --- a/math/mathcore/src/VectorizedTMath.cxx +++ /dev/null @@ -1,250 +0,0 @@ -#include "VectorizedTMath.h" - -#if defined(R__HAS_VECCORE) && defined(R__HAS_VC) - -namespace TMath { -//////////////////////////////////////////////////////////////////////////////// -::ROOT::Double_v Log2(::ROOT::Double_v &x) -{ - return vecCore::math::Log2(x); -} - -//////////////////////////////////////////////////////////////////////////////// -/// Calculate a Breit Wigner function with mean and gamma. -::ROOT::Double_v BreitWigner(::ROOT::Double_v &x, Double_t mean, Double_t gamma) -{ - return 0.5 * M_1_PI * (gamma / (0.25 * gamma * gamma + (x - mean) * (x - mean))); -} - -//////////////////////////////////////////////////////////////////////////////// -/// Calculate a gaussian function with mean and sigma. -/// If norm=kTRUE (default is kFALSE) the result is divided -/// by sqrt(2*Pi)*sigma. -::ROOT::Double_v Gaus(::ROOT::Double_v &x, Double_t mean, Double_t sigma, Bool_t norm) -{ - if (sigma == 0) - return ::ROOT::Double_v(1.e30); - - ::ROOT::Double_v inv_sigma = 1.0 / ::ROOT::Double_v(sigma); - ::ROOT::Double_v arg = (x - ::ROOT::Double_v(mean)) * inv_sigma; - - // For those entries of |arg| > 39 result is zero in double precision - ::ROOT::Double_v out = - vecCore::Blend<::ROOT::Double_v>(vecCore::math::Abs(arg) < ::ROOT::Double_v(39.0), - vecCore::math::Exp(::ROOT::Double_v(-0.5) * arg * arg), ::ROOT::Double_v(0.0)); - if (norm) - out *= 0.3989422804014327 * inv_sigma; // 1/sqrt(2*Pi)=0.3989422804014327 - return out; -} - -//////////////////////////////////////////////////////////////////////////////// -/// Computes the probability density function of Laplace distribution -/// at point x, with location parameter alpha and shape parameter beta. -/// By default, alpha=0, beta=1 -/// This distribution is known under different names, most common is -/// double exponential distribution, but it also appears as -/// the two-tailed exponential or the bilateral exponential distribution -::ROOT::Double_v LaplaceDist(::ROOT::Double_v &x, Double_t alpha, Double_t beta) -{ - ::ROOT::Double_v beta_v_inv = ::ROOT::Double_v(1.0) / ::ROOT::Double_v(beta); - ::ROOT::Double_v out = vecCore::math::Exp(-vecCore::math::Abs((x - ::ROOT::Double_v(alpha)) * beta_v_inv)); - out *= ::ROOT::Double_v(0.5) * beta_v_inv; - return out; -} - -//////////////////////////////////////////////////////////////////////////////// -/// Computes the distribution function of Laplace distribution -/// at point x, with location parameter alpha and shape parameter beta. -/// By default, alpha=0, beta=1 -/// This distribution is known under different names, most common is -/// double exponential distribution, but it also appears as -/// the two-tailed exponential or the bilateral exponential distribution -::ROOT::Double_v LaplaceDistI(::ROOT::Double_v &x, Double_t alpha, Double_t beta) -{ - ::ROOT::Double_v alpha_v = ::ROOT::Double_v(alpha); - ::ROOT::Double_v beta_v_inv = ::ROOT::Double_v(1.0) / ::ROOT::Double_v(beta); - return vecCore::Blend<::ROOT::Double_v>( - x <= alpha_v, 0.5 * vecCore::math::Exp(-vecCore::math::Abs((x - alpha_v) * beta_v_inv)), - 1 - 0.5 * vecCore::math::Exp(-vecCore::math::Abs((x - alpha_v) * beta_v_inv))); -} - -//////////////////////////////////////////////////////////////////////////////// -/// Computation of the normal frequency function freq(x). -/// Freq(x) = (1/sqrt(2pi)) Integral(exp(-t^2/2))dt between -infinity and x. -/// -/// Translated from CERNLIB C300 by Rene Brun. -::ROOT::Double_v Freq(::ROOT::Double_v &x) -{ - Double_t c1 = 0.56418958354775629; - Double_t w2 = 1.41421356237309505; - - Double_t p10 = 2.4266795523053175e+2, q10 = 2.1505887586986120e+2, p11 = 2.1979261618294152e+1, - q11 = 9.1164905404514901e+1, p12 = 6.9963834886191355e+0, q12 = 1.5082797630407787e+1, - p13 = -3.5609843701815385e-2; - - Double_t p20 = 3.00459261020161601e+2, q20 = 3.00459260956983293e+2, p21 = 4.51918953711872942e+2, - q21 = 7.90950925327898027e+2, p22 = 3.39320816734343687e+2, q22 = 9.31354094850609621e+2, - p23 = 1.52989285046940404e+2, q23 = 6.38980264465631167e+2, p24 = 4.31622272220567353e+1, - q24 = 2.77585444743987643e+2, p25 = 7.21175825088309366e+0, q25 = 7.70001529352294730e+1, - p26 = 5.64195517478973971e-1, q26 = 1.27827273196294235e+1, p27 = -1.36864857382716707e-7; - - Double_t p30 = -2.99610707703542174e-3, q30 = 1.06209230528467918e-2, p31 = -4.94730910623250734e-2, - q31 = 1.91308926107829841e-1, p32 = -2.26956593539686930e-1, q32 = 1.05167510706793207e+0, - p33 = -2.78661308609647788e-1, q33 = 1.98733201817135256e+0, p34 = -2.23192459734184686e-2, q34 = 1; - - ::ROOT::Double_v v = vecCore::math::Abs(x) / w2; - - ::ROOT::Double_v result; - - vecCore::Mask<::ROOT::Double_v> mask1 = v < ::ROOT::Double_v(0.5); - vecCore::Mask<::ROOT::Double_v> mask2 = !mask1 && v < ::ROOT::Double_v(4.0); - vecCore::Mask<::ROOT::Double_v> mask3 = !(mask1 || mask2); - - ::ROOT::Double_v v2 = v * v; - ::ROOT::Double_v v3 = v2 * v; - ::ROOT::Double_v v4 = v3 * v; - ::ROOT::Double_v v5 = v4 * v; - ::ROOT::Double_v v6 = v5 * v; - ::ROOT::Double_v v7 = v6 * v; - ::ROOT::Double_v v8 = v7 * v; - - vecCore::MaskedAssign<::ROOT::Double_v>( - result, mask1, v * (p10 + p11 * v2 + p12 * v4 + p13 * v6) / (q10 + q11 * v2 + q12 * v4 + v6)); - vecCore::MaskedAssign<::ROOT::Double_v>( - result, mask2, - ::ROOT::Double_v(1.0) - - (p20 + p21 * v + p22 * v2 + p23 * v3 + p24 * v4 + p25 * v5 + p26 * v6 + p27 * v7) / - (vecCore::math::Exp(v2) * (q20 + q21 * v + q22 * v2 + q23 * v3 + q24 * v4 + q25 * v5 + q26 * v6 + v7))); - vecCore::MaskedAssign<::ROOT::Double_v>(result, mask3, - ::ROOT::Double_v(1.0) - - (c1 + (p30 * v8 + p31 * v6 + p32 * v4 + p33 * v2 + p34) / - ((q30 * v8 + q31 * v6 + q32 * v4 + q33 * v2 + q34) * v2)) / - (v * vecCore::math::Exp(v2))); - - return vecCore::Blend<::ROOT::Double_v>(x > 0, ::ROOT::Double_v(0.5) + ::ROOT::Double_v(0.5) * result, - ::ROOT::Double_v(0.5) * (::ROOT::Double_v(1) - result)); -} - -//////////////////////////////////////////////////////////////////////////////// -/// Vectorized implementation of Bessel function I_0(x) for a vector x. -::ROOT::Double_v BesselI0_Split_More(::ROOT::Double_v &ax) -{ - ::ROOT::Double_v y = 3.75 / ax; - return (vecCore::math::Exp(ax) / vecCore::math::Sqrt(ax)) * - (0.39894228 + - y * (1.328592e-2 + - y * (2.25319e-3 + - y * (-1.57565e-3 + - y * (9.16281e-3 + - y * (-2.057706e-2 + y * (2.635537e-2 + y * (-1.647633e-2 + y * 3.92377e-3)))))))); -} - -::ROOT::Double_v BesselI0_Split_Less(::ROOT::Double_v &x) -{ - ::ROOT::Double_v y = x * x * 0.071111111; - - return 1.0 + - y * (3.5156229 + y * (3.0899424 + y * (1.2067492 + y * (0.2659732 + y * (3.60768e-2 + y * 4.5813e-3))))); -} - -::ROOT::Double_v BesselI0(::ROOT::Double_v &x) -{ - ::ROOT::Double_v ax = vecCore::math::Abs(x); - - return vecCore::Blend<::ROOT::Double_v>(ax <= 3.75, BesselI0_Split_Less(x), BesselI0_Split_More(ax)); -} - -//////////////////////////////////////////////////////////////////////////////// -/// Vectorized implementation of modified Bessel function I_1(x) for a vector x. -::ROOT::Double_v BesselI1_Split_More(::ROOT::Double_v &ax, ::ROOT::Double_v &x) -{ - ::ROOT::Double_v y = 3.75 / ax; - ::ROOT::Double_v result = - (vecCore::math::Exp(ax) / vecCore::math::Sqrt(ax)) * - (0.39894228 + y * (-3.988024e-2 + - y * (-3.62018e-3 + - y * (1.63801e-3 + y * (-1.031555e-2 + - y * (2.282967e-2 + y * (-2.895312e-2 + - y * (1.787654e-2 + y * -4.20059e-3)))))))); - return vecCore::Blend<::ROOT::Double_v>(x < 0, ::ROOT::Double_v(-1.0) * result, result); -} - -::ROOT::Double_v BesselI1_Split_Less(::ROOT::Double_v &x) -{ - ::ROOT::Double_v y = x * x * 0.071111111; - - return x * (0.5 + y * (0.87890594 + - y * (0.51498869 + y * (0.15084934 + y * (2.658733e-2 + y * (3.01532e-3 + y * 3.2411e-4)))))); -} - -::ROOT::Double_v BesselI1(::ROOT::Double_v &x) -{ - ::ROOT::Double_v ax = vecCore::math::Abs(x); - - return vecCore::Blend<::ROOT::Double_v>(ax <= 3.75, BesselI1_Split_Less(x), BesselI1_Split_More(ax, x)); -} - -//////////////////////////////////////////////////////////////////////////////// -/// Vectorized implementation of Bessel function J0(x) for a vector x. -::ROOT::Double_v BesselJ0_Split1_More(::ROOT::Double_v &ax) -{ - ::ROOT::Double_v z = 8 / ax; - ::ROOT::Double_v y = z * z; - ::ROOT::Double_v xx = ax - 0.785398164; - ::ROOT::Double_v result1 = - 1 + y * (-0.1098628627e-2 + y * (0.2734510407e-4 + y * (-0.2073370639e-5 + y * 0.2093887211e-6))); - ::ROOT::Double_v result2 = - -0.1562499995e-1 + y * (0.1430488765e-3 + y * (-0.6911147651e-5 + y * (0.7621095161e-6 - y * 0.934935152e-7))); - return vecCore::math::Sqrt(0.636619772 / ax) * - (vecCore::math::Cos(xx) * result1 - z * vecCore::math::Sin(xx) * result2); -} - -::ROOT::Double_v BesselJ0_Split1_Less(::ROOT::Double_v &x) -{ - ::ROOT::Double_v y = x * x; - return (57568490574.0 + - y * (-13362590354.0 + y * (651619640.7 + y * (-11214424.18 + y * (77392.33017 + y * -184.9052456))))) / - (57568490411.0 + y * (1029532985.0 + y * (9494680.718 + y * (59272.64853 + y * (267.8532712 + y))))); -} - -::ROOT::Double_v BesselJ0(::ROOT::Double_v &x) -{ - ::ROOT::Double_v ax = vecCore::math::Abs(x); - return vecCore::Blend<::ROOT::Double_v>(ax < 8, BesselJ0_Split1_Less(x), BesselJ0_Split1_More(ax)); -} - -//////////////////////////////////////////////////////////////////////////////// -/// Vectorized implementation of Bessel function J1(x) for a vector x. -::ROOT::Double_v BesselJ1_Split1_More(::ROOT::Double_v &ax, ::ROOT::Double_v &x) -{ - ::ROOT::Double_v z = 8 / ax; - ::ROOT::Double_v y = z * z; - ::ROOT::Double_v xx = ax - 2.356194491; - ::ROOT::Double_v result1 = - 1 + y * (0.183105e-2 + y * (-0.3516396496e-4 + y * (0.2457520174e-5 + y * -0.240337019e-6))); - ::ROOT::Double_v result2 = - 0.04687499995 + y * (-0.2002690873e-3 + y * (0.8449199096e-5 + y * (-0.88228987e-6 - y * 0.105787412e-6))); - ::ROOT::Double_v result = - vecCore::math::Sqrt(0.636619772 / ax) * (vecCore::math::Cos(xx) * result1 - z * vecCore::math::Sin(xx) * result2); - vecCore::MaskedAssign<::ROOT::Double_v>(result, x < 0, -result); - return result; -} - -::ROOT::Double_v BesselJ1_Split1_Less(::ROOT::Double_v &x) -{ - ::ROOT::Double_v y = x * x; - return x * - (72362614232.0 + - y * (-7895059235.0 + y * (242396853.1 + y * (-2972611.439 + y * (15704.48260 + y * -30.16036606))))) / - (144725228442.0 + y * (2300535178.0 + y * (18583304.74 + y * (99447.43394 + y * (376.9991397 + y))))); -} - -::ROOT::Double_v BesselJ1(::ROOT::Double_v &x) -{ - ::ROOT::Double_v ax = vecCore::math::Abs(x); - return vecCore::Blend<::ROOT::Double_v>(ax < 8, BesselJ1_Split1_Less(x), BesselJ1_Split1_More(ax, x)); -} - -} // namespace TMath - -#endif diff --git a/math/mathcore/test/CMakeLists.txt b/math/mathcore/test/CMakeLists.txt index 36a6ad3196f90..f2381d113d6db 100644 --- a/math/mathcore/test/CMakeLists.txt +++ b/math/mathcore/test/CMakeLists.txt @@ -75,31 +75,20 @@ ROOT_ADD_GTEST(stressMathCoreUnit stress/testSMatrix.cxx stress/testGenVector.cx stress/TestHelper.cxx LIBRARIES Core MathCore Hist RIO Tree GenVector) -ROOT_ADD_GTEST(GradientUnit testGradient.cxx LIBRARIES Core MathCore Hist ) - ROOT_ADD_GTEST(GradientFittingUnit testGradientFitting.cxx LIBRARIES Core MathCore Hist) - -ROOT_ADD_GTEST(MulmodUnitOpt mulmod_opt.cxx) +ROOT_ADD_GTEST(GradientUnit testGradient.cxx LIBRARIES Core MathCore Hist ) ROOT_ADD_GTEST(MulmodUnitNoInt128 mulmod_noint128.cxx) +ROOT_ADD_GTEST(MulmodUnitOpt mulmod_opt.cxx) ROOT_ADD_GTEST(RanluxLCGUnit ranlux_lcg.cxx) - -ROOT_ADD_GTEST(RanluxppEngineTests RanluxppEngine.cxx - LIBRARIES Core MathCore) - -if(veccore AND vc) - ROOT_ADD_GTEST(VectorizedTMathUnit testVectorizedTMath.cxx - LIBRARIES Core MathCore) -endif() - -ROOT_ADD_GTEST(testRootFinder testRootFinder.cxx LIBRARIES ${Libraries}) - -ROOT_ADD_GTEST(testKahan testKahan.cxx LIBRARIES Core MathCore) +ROOT_ADD_GTEST(RanluxppEngineTests RanluxppEngine.cxx LIBRARIES Core MathCore) +ROOT_ADD_GTEST(VectorizedTMathUnit testVectorizedTMath.cxx LIBRARIES Core MathCore) ROOT_ADD_GTEST(testDelaunay2D testDelaunay2D.cxx LIBRARIES Core MathCore) +ROOT_ADD_GTEST(testFitter testFitter.cxx LIBRARIES Core MathCore) ROOT_ADD_GTEST(testKNNDensity testKNNDensity.cxx LIBRARIES Core MathCore Hist) +ROOT_ADD_GTEST(testKahan testKahan.cxx LIBRARIES Core MathCore) +ROOT_ADD_GTEST(testLFSR testLFSR.cxx LIBRARIES Core MathCore) +ROOT_ADD_GTEST(testRootFinder testRootFinder.cxx LIBRARIES ${Libraries}) if(clad) ROOT_ADD_GTEST(CladDerivatorTests CladDerivatorTests.cxx LIBRARIES Core MathCore) endif() - -ROOT_ADD_GTEST(testFitter testFitter.cxx LIBRARIES Core MathCore) -ROOT_ADD_GTEST(testLFSR testLFSR.cxx LIBRARIES Core MathCore) diff --git a/math/mathcore/test/fit/testBinnedFitExecPolicy.cxx b/math/mathcore/test/fit/testBinnedFitExecPolicy.cxx index 8eb6169b92053..bf609112a7235 100644 --- a/math/mathcore/test/fit/testBinnedFitExecPolicy.cxx +++ b/math/mathcore/test/fit/testBinnedFitExecPolicy.cxx @@ -114,7 +114,7 @@ int main() benchmarkFit(f, h1f, "S L", fit, models[EModel::kPoisson]); #endif -#ifdef R__HAS_VECCORE +#ifdef R__HAS_STD_SIMD TF1 *fvecCore = new TF1("fvCore", func, 100, 200, 4); fit = "Vectorized"; diff --git a/math/mathcore/test/fit/testLogLExecPolicy.cxx b/math/mathcore/test/fit/testLogLExecPolicy.cxx index ddc8733d53bed..09874f6feeddd 100644 --- a/math/mathcore/test/fit/testLogLExecPolicy.cxx +++ b/math/mathcore/test/fit/testLogLExecPolicy.cxx @@ -124,7 +124,7 @@ class TestVector { //wfSeq = new ROOT::Math::WrappedMultiTF1Templ(*fSeq); wfSeq = new Func (p); -#ifdef R__HAS_VECCORE +#ifdef R__HAS_STD_SIMD // not needed but just to test vectorized ctor of TF1 fVec = new TF1("fvCore", Func(p), 100, 200, paramSize); //wfVec = new ROOT::Math::WrappedMultiTF1Templ(*fVec); @@ -198,7 +198,7 @@ class TestVector { return ret; } -#ifdef R__HAS_VECCORE +#ifdef R__HAS_STD_SIMD bool testFitVec() { std::cout << "\n////////////////////////////VECTOR TEST////////////////////////////" << std::endl << std::endl; @@ -249,7 +249,7 @@ class TestVector { private: TF1 *fSeq; ROOT::Math::IParametricFunctionMultiDimTempl *wfSeq; -#ifdef R__HAS_VECCORE +#ifdef R__HAS_STD_SIMD TF1 *fVec; ROOT::Math::IParametricFunctionMultiDimTempl *wfVec; #endif @@ -276,7 +276,7 @@ int main() return -1; } -#if defined(R__USE_IMT) || defined(R__HAS_VECCORE) +#if defined(R__USE_IMT) || defined(R__HAS_STD_SIMD) auto seq = test.GetFitter().Result().MinFcnValue(); #endif @@ -291,7 +291,7 @@ int main() return 1; #endif -#ifdef R__HAS_VECCORE +#ifdef R__HAS_STD_SIMD //Vectorized if (!test.testFitVec()) { Error("testLogLExecPolicy", "Vectorized Fit failed!"); @@ -302,7 +302,7 @@ int main() return 2; #endif -#if defined(R__USE_IMT) && defined(R__HAS_VECCORE) +#if defined(R__USE_IMT) && defined(R__HAS_STD_SIMD) //Multithreaded and vectorized if (!test.testMTFitVec()) { Error("testLogLExecPolicy", "Multithreaded + vectorized Fit failed!"); diff --git a/math/mathcore/test/testGradient.cxx b/math/mathcore/test/testGradient.cxx index bc22be089cd47..878d2c850a1cb 100644 --- a/math/mathcore/test/testGradient.cxx +++ b/math/mathcore/test/testGradient.cxx @@ -72,7 +72,7 @@ using ScalarSerial2D = GradientTestTraits; -#ifdef R__HAS_VECCORE +#ifdef R__HAS_STD_SIMD // Typedefs of GradientTestTraits for vectorial (serial and multithreaded) // scenarios @@ -193,7 +193,7 @@ struct Model2D : public Model { fHistogram->FillRandom(nameTF2.c_str(), 1000); } -#ifdef R__HAS_VECCORE +#ifdef R__HAS_STD_SIMD static T TemplatedGaus(T x, Double_t mean, Double_t sigma, Bool_t norm = false) { if (sigma == 0) @@ -202,13 +202,18 @@ struct Model2D : public Model { T arg = (x - mean) / sigma; // for |arg| > 39 result is zero in double precision - vecCore::Mask_v mask = !(arg < -39.0 || arg > 39.0); + auto mask = !(arg < -39.0 || arg > 39.0); // Initialize the result to 0.0 T res(0.0); // Compute the function only when the arg meets the criteria, using the mask computed before - vecCore::MaskedAssign(res, mask, vecCore::math::Exp(-0.5 * arg * arg)); + if constexpr (std::is_arithmetic_v) { + res = mask ? exp(-0.5 * arg * arg) : 0.0; + } else { + // assume std::simd for non-arithmetic types + where(mask, res) = exp(-0.5 * arg * arg); + } if (!norm) return res; @@ -439,7 +444,7 @@ class LogLikelihoodGradientTest : public ::testing::Test, public LogLikelihoodGr }; // Types used by Google Test to instantiate the tests. -#ifdef R__HAS_VECCORE +#ifdef R__HAS_STD_SIMD # ifdef R__USE_IMT typedef ::testing::Types diff --git a/math/mathcore/test/testGradientFitting.cxx b/math/mathcore/test/testGradientFitting.cxx index f85f826d9fd58..0b9e4677d2e69 100644 --- a/math/mathcore/test/testGradientFitting.cxx +++ b/math/mathcore/test/testGradientFitting.cxx @@ -107,7 +107,7 @@ using ScalarBinned = GradientFittingTestTraits; // Typedefs of GradientTestTraits for vectorial (binned and unbinned) data -#ifdef R__HAS_VECCORE +#ifdef R__HAS_STD_SIMD using VectorialChi2 = GradientFittingTestTraits; using VectorialBinned = GradientFittingTestTraits; using VectorialUnBinned = GradientFittingTestTraits; @@ -260,7 +260,7 @@ class GradientFittingTest : public ::testing::Test { }; // Types used by Google Test to instantiate the tests. -#ifdef R__HAS_VECCORE +#ifdef R__HAS_STD_SIMD typedef ::testing::Types TestTypes; //typedef ::testing::Types TestTypes; diff --git a/math/mathcore/test/testTemplatedGradient.cxx b/math/mathcore/test/testTemplatedGradient.cxx index a0dbbbae7e589..9843ef31695b5 100644 --- a/math/mathcore/test/testTemplatedGradient.cxx +++ b/math/mathcore/test/testTemplatedGradient.cxx @@ -21,9 +21,7 @@ #include #include -#ifdef R__HAS_VECCORE - -#include +#ifdef R__HAS_STD_SIMD using namespace ROOT::Math; using namespace ROOT::Fit::FitUtil; diff --git a/math/mathcore/test/testVectorizedTMath.cxx b/math/mathcore/test/testVectorizedTMath.cxx index c70591fa6e06e..029a0e3fc01ac 100644 --- a/math/mathcore/test/testVectorizedTMath.cxx +++ b/math/mathcore/test/testVectorizedTMath.cxx @@ -3,6 +3,8 @@ #include "VectorizedTMath.h" #include +#ifdef R__HAS_STD_SIMD + #define N 16384 Double_t uniform_random(Double_t a, Double_t b) @@ -10,60 +12,75 @@ Double_t uniform_random(Double_t a, Double_t b) return a + (b - a) * drand48(); } +template +void load_simd(V &v, S const *ptr) +{ + for (size_t i = 0; i < V::size(); ++i) + v[i] = ptr[i]; +} + +template +void store_simd(V const &v, S *ptr) +{ + for (size_t i = 0; i < V::size(); ++i) + ptr[i] = v[i]; +} + class VectorizedTMathTest : public ::testing::Test { protected: VectorizedTMathTest() {} - size_t kVS = vecCore::VectorSize(); - Double_t input_array1[N] __attribute__((aligned(VECCORE_SIMD_ALIGN))); - Double_t input_array2[N] __attribute__((aligned(VECCORE_SIMD_ALIGN))); + size_t kVS = ROOT::Double_v::size(); + Double_t input_array1[N]; + Double_t input_array2[N]; - Double_t output_array[N] __attribute__((aligned(VECCORE_SIMD_ALIGN))); + Double_t output_array[N]; }; #define TEST_VECTORIZED_TMATH_FUNCTION(tmathfunc, a, b) \ TEST_F(VectorizedTMathTest, tmathfunc) \ { \ - int trials = N; \ + int trials = N; \ for (int i = 0; i < trials; i++) \ input_array1[i] = uniform_random(a, b); \ \ - ROOT::Double_v x, y; \ + ROOT::Double_v x{}; \ + ROOT::Double_v y{}; \ for (int j = 0; j < trials; j += kVS) { \ - vecCore::Load(x, &input_array1[j]); \ + load_simd(x, &input_array1[j]); \ y = TMath::tmathfunc(x); \ - vecCore::Store(y, &output_array[j]); \ + store_simd(y, &output_array[j]); \ } \ for (int j = 0; j < trials; j++) { \ Double_t scalar_output = TMath::tmathfunc(input_array1[j]); \ Double_t vec_output = output_array[j]; \ Double_t re = \ (scalar_output == vec_output && scalar_output == 0) ? 0 : (vec_output - scalar_output) / scalar_output; \ - EXPECT_NEAR(0, re, 1e9 * std::numeric_limits::epsilon()); \ + EXPECT_NEAR(0, re, 1e9 * std::numeric_limits::epsilon()); \ } \ } #define TEST_VECTORIZED_TMATH_FUNCTION2(tmathfunc, a, b, c, d) \ TEST_F(VectorizedTMathTest, tmathfunc) \ { \ - int trials = N; \ + int trials = N; \ for (int i = 0; i < trials; i++) { \ input_array1[i] = uniform_random(a, b); \ input_array2[i] = uniform_random(c, d); \ } \ ROOT::Double_v x1, x2, y; \ for (int j = 0; j < trials; j += kVS) { \ - vecCore::Load(x1, &input_array1[j]); \ - vecCore::Load(x2, &input_array2[j]); \ + load_simd(x1, &input_array1[j]); \ + load_simd(x2, &input_array2[j]); \ y = TMath::tmathfunc(x1, x2); \ - vecCore::Store(y, &output_array[j]); \ + store_simd(y, &output_array[j]); \ } \ for (int j = 0; j < trials; j++) { \ Double_t scalar_output = TMath::tmathfunc(input_array1[j], input_array2[j]); \ Double_t vec_output = output_array[j]; \ Double_t re = \ (scalar_output == vec_output && scalar_output == 0) ? 0 : (vec_output - scalar_output) / scalar_output; \ - EXPECT_NEAR(0, re, 1e10 * std::numeric_limits::epsilon()); \ + EXPECT_NEAR(0, re, 1e10 * std::numeric_limits::epsilon()); \ } \ } @@ -99,6 +116,8 @@ TEST_VECTORIZED_TMATH_FUNCTION(BesselI1, FLT_MIN, FLT_MAX_10_EXP); TEST_VECTORIZED_TMATH_FUNCTION_FLT_BESSEL(BesselJ0); TEST_VECTORIZED_TMATH_FUNCTION_FLT_BESSEL(BesselJ1); +#endif // R__HAS_STD_SIMD + int main(int argc, char *argv[]) { ::testing::InitGoogleTest(&argc, argv); diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index 9298f43d2106d..352e93ddf1e05 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -307,31 +307,9 @@ if(MSVC) endif() ROOT_ADD_TEST(test-TFormulaTests COMMAND TFormulaTests FAILREGEX "FAILED|Error in") -#--Vc basic test----------------------------------------------------------------------------------- -if(vc) - ROOT_EXECUTABLE(testVc testVc.cxx LIBRARIES ${Vc_LIBRARIES} BUILTINS Vc) - target_include_directories(testVc SYSTEM BEFORE PRIVATE ${Vc_INCLUDE_DIRS}) - ROOT_ADD_TEST(test-Vc COMMAND testVc FAILREGEX "FAILED|Error in") -endif() - -#--VecCore basic test------------------------------------------------------------------------------ -if(veccore) - ROOT_EXECUTABLE(test-veccore test-veccore.cxx LIBRARIES ${VecCore_LIBRARIES} BUILTINS VECCORE) - target_include_directories(test-veccore BEFORE PRIVATE ${VecCore_INCLUDE_DIRS}) - target_compile_definitions(test-veccore PRIVATE ${VecCore_DEFINITIONS}) - if(veccore AND vc) - ROOT_ADD_TEST(VecCore COMMAND test-veccore REGEX "Vc") - else() - ROOT_ADD_TEST(VecCore COMMAND test-veccore REGEX "Scalar") - endif() -endif() - -#--Vc GenVector test----------------------------------------------------------------------------------- -if(vc) - ROOT_EXECUTABLE(testGenVectorVc testGenVectorVc.cxx LIBRARIES Physics GenVector ${Vc_LIBRARIES} BUILTINS Vc) - target_include_directories(testGenVectorVc SYSTEM BEFORE PRIVATE ${Vc_INCLUDE_DIRS}) - ROOT_ADD_TEST(test-GenVector-Vc COMMAND testGenVectorVc FAILREGEX "FAILED|Error in") -endif() +#--std::simd GenVector test----------------------------------------------------------------------------------- +ROOT_EXECUTABLE(testGenVectorSimd testGenVectorSimd.cxx LIBRARIES Physics GenVector) +ROOT_ADD_TEST(test-GenVector-Simd COMMAND testGenVectorSimd FAILREGEX "FAILED|Error in") #--g2root------------------------------------------------------------------------------------------ if(TARGET g2root) diff --git a/test/TFormulaVecTests.h b/test/TFormulaVecTests.h index 365d3e9e384ac..d0b0f6870c2e6 100644 --- a/test/TFormulaVecTests.h +++ b/test/TFormulaVecTests.h @@ -28,10 +28,10 @@ bool testVec1D(TF1 * f1, const TString & formula, FreeFunc1D func, double x ) { bool ret = CheckValues(formula, y, y0); // check passing double_v interface -#ifdef R__HAS_VECCORE +#ifdef R__HAS_STD_SIMD ROOT::Double_v vx = x; ROOT::Double_v vy = f1->EvalPar(&vx, nullptr); - double y2 = vecCore::Get(vy,0); + double y2 = vy[0]; ret &= CheckValues(formula+TString("_v"), y2, y0); #endif @@ -64,10 +64,10 @@ bool testVec2D(TF2 * f1, const TString & formula, FreeFunc2D func, double x, dou bool ret = CheckValues(formula,r,r0); // check passing double_v interface -#ifdef R__HAS_VECCORE +#ifdef R__HAS_STD_SIMD ROOT::Double_v vx[2] = { x, y}; ROOT::Double_v vy = f1->EvalPar(vx, nullptr); - double r2 = vecCore::Get(vy,0); + double r2 = vy[0]; ret &= CheckValues(formula+TString("_v"), r2, r0); #endif @@ -103,30 +103,24 @@ bool testVecFormula() { // https://github.com/root-project/root/pull/20769 ok &= testVec1D("3+[0]",constant_function, 3.333); - ok &= testVec1D("3",constant_function, 3.333); -#ifndef R__HAS_VC + ok &= testVec1D("3", constant_function, 3.333); ok &= testVec1D("sin(x)",std::sin,1); - ok &= testVec1D("cos(x)",std::cos,1); -#endif + ok &= testVec1D("cos(x)", std::cos, 1); ok &= testVec1D("exp(x)",std::exp,1); ok &= testVec1D("log(x)",std::log,2); - ok &= testVec1D("log10(x)",TMath::Log10,2); -#ifndef R__HAS_VC + ok &= testVec1D("log10(x)", TMath::Log10, 2); ok &= testVec1D("tan(x)",std::tan,1); ok &= testVec1D("sinh(x)",std::sinh,1); ok &= testVec1D("cosh(x)",std::cosh,1); ok &= testVec1D("tanh(x)",std::tanh,1); ok &= testVec1D("asin(x)",std::asin,.1); - ok &= testVec1D("acos(x)",std::acos,.1); -#endif + ok &= testVec1D("acos(x)", std::acos, .1); ok &= testVec1D("atan(x)",std::atan,.1); ok &= testVec1D("sqrt(x)",std::sqrt,2); ok &= testVec1D("abs(x)",std::abs,-1); - ok &= testVec2D("pow(x,y)",std::pow,2,3); -#ifndef R__HAS_VC + ok &= testVec2D("pow(x,y)", std::pow, 2, 3); ok &= testVec2D("min(x,y)",TMath::Min,2,3); - ok &= testVec2D("max(x,y)",TMath::Max,2,3); -#endif + ok &= testVec2D("max(x,y)", TMath::Max, 2, 3); ok &= testVec2D("atan2(x,y)",TMath::ATan2,2,3); return ok; diff --git a/test/test-veccore.cxx b/test/test-veccore.cxx deleted file mode 100644 index 6a8315b55880c..0000000000000 --- a/test/test-veccore.cxx +++ /dev/null @@ -1,15 +0,0 @@ -#include -#include - -int main(int, char **) -{ - printf("VecCore available backends: Scalar "); -#ifdef VECCORE_ENABLE_UMESIMD - printf("UME::SIMD "); -#endif -#ifdef VECCORE_ENABLE_VC - printf("Vc"); -#endif - printf("\n"); - return 0; -} diff --git a/test/testGenVectorVc.cxx b/test/testGenVectorSimd.cxx similarity index 75% rename from test/testGenVectorVc.cxx rename to test/testGenVectorSimd.cxx index 4c09ee87bdc9a..393335a024b4c 100644 --- a/test/testGenVectorVc.cxx +++ b/test/testGenVectorSimd.cxx @@ -5,12 +5,6 @@ #include "Math/GenVector/Transform3D.h" #include "TStopwatch.h" -#pragma GCC diagnostic push -#pragma GCC diagnostic ignored "-Wall" -#pragma GCC diagnostic ignored "-Wunused-parameter" -#include -#pragma GCC diagnostic pop - // STL #include #include @@ -20,7 +14,27 @@ #include #include -template +#ifdef R__HAS_STD_SIMD + +#include + +#define std_simd std::experimental::simd + +template +std::string simd_to_string(const Simd &v) +{ + std::stringstream os{}; + os << "["; + for (std::size_t i = 0; i < Simd::size(); ++i) { + if (i) + os << ", "; + os << v[i]; + } + os << "]"; + return os.str(); +} + +template T relativeError(const T &x, const T &y) { if (x == y) @@ -40,12 +54,12 @@ int compare(double x, double y, double tolerance = 1.0e-12) if (error > tolerance) { int pr = std::cerr.precision(16); - std::cerr << "Error above tolerance:" << std::endl - << " expected = " << x << std::endl - << "true value = " << y << std::endl - << "abs. error = " << std::abs(x-y) << std::endl - << "rel. error = " << error << std::endl - << "tolerance = " << tolerance << std::endl; + std::cerr << "Error above tolerance:" << std::endl + << " expected = " << x << std::endl + << "true value = " << y << std::endl + << "abs. error = " << std::abs(x - y) << std::endl + << "rel. error = " << error << std::endl + << "tolerance = " << tolerance << std::endl; std::cerr.precision(pr); return 1; } @@ -65,26 +79,31 @@ static std::uniform_real_distribution p0(-0.002, 0.002), p1(-0.2, 0.2), template class Data { public: - typedef std::vector> Vector; + typedef std::vector Vector; public: - POINT position; + POINT position; VECTOR direction; - POINT CoC; - PLANE plane; - FTYPE radius{0}; + POINT CoC; + PLANE plane; + FTYPE radius{0}; public: template Data(const INDATA &ind) : position(ind.position.x(), ind.position.y(), ind.position.z()), - direction(ind.direction.x(), ind.direction.y(), ind.direction.z()), CoC(ind.CoC.x(), ind.CoC.y(), ind.CoC.z()), - plane(ind.plane.A(), ind.plane.B(), ind.plane.C(), ind.plane.D()), radius(ind.radius) + direction(ind.direction.x(), ind.direction.y(), ind.direction.z()), + CoC(ind.CoC.x(), ind.CoC.y(), ind.CoC.z()), + plane(ind.plane.A(), ind.plane.B(), ind.plane.C(), ind.plane.D()), + radius(ind.radius) { } Data() - : position(p_x(gen), p_y(gen), p_z(gen)), direction(d_x(gen), d_y(gen), d_z(gen)), - CoC(c_x(gen), c_y(gen), c_z(gen)), plane(p0(gen), p1(gen), p2(gen), p3(gen)), radius(r_rad(gen)) + : position(p_x(gen), p_y(gen), p_z(gen)), + direction(d_x(gen), d_y(gen), d_z(gen)), + CoC(c_x(gen), c_y(gen), c_z(gen)), + plane(p0(gen), p1(gen), p2(gen), p3(gen)), + radius(r_rad(gen)) { } }; @@ -106,12 +125,12 @@ template zero; + const FTYPE a = direction.Mag2(); + const VECTOR delta = position - CoC; + const FTYPE b = two * direction.Dot(delta); + const FTYPE c = delta.Mag2() - radius * radius; + const FTYPE discr = b * b - four * a * c; + const bool OK = discr > zero; if (OK) { const FTYPE dist = half * (std::sqrt(discr) - b) / a; // change position to the intersection point @@ -128,19 +147,19 @@ template ::value && !std::is_arithmetic::value && !std::is_arithmetic::value>::type> -inline typename FTYPE::mask_type reflectSpherical(POINT &position, VECTOR &direction, const POINT &CoC, - const FTYPE radius) +inline typename FTYPE::mask_type +reflectSpherical(POINT &position, VECTOR &direction, const POINT &CoC, const FTYPE radius) { - const FTYPE two(2.0), four(4.0), half(0.5); - const FTYPE a = direction.Mag2(); - const VECTOR delta = position - CoC; - const FTYPE b = two * direction.Dot(delta); - const FTYPE c = delta.Mag2() - radius * radius; - FTYPE discr = b * b - four * a * c; - typename FTYPE::mask_type OK = discr > FTYPE::Zero(); + const FTYPE two(2.0), four(4.0), half(0.5); + const FTYPE a = direction.Mag2(); + const VECTOR delta = position - CoC; + const FTYPE b = two * direction.Dot(delta); + const FTYPE c = delta.Mag2() - radius * radius; + FTYPE discr = b * b - four * a * c; + typename FTYPE::mask_type OK = discr > FTYPE(0); if (any_of(OK)) { // Zero out the negative values in discr, to prevent sqrt(-ve) - discr(!OK) = FTYPE::Zero(); + where(!OK, discr) = FTYPE(0); // compute the distance const FTYPE dist = half * (sqrt(discr) - b) / a; // change position to the intersection point @@ -160,11 +179,11 @@ template ::value>::type> inline typename FTYPE::mask_type reflectPlane(POINT &position, VECTOR &direction, const PLANE &plane) { - const typename POINT::Scalar two(2.0); + const typename POINT::Scalar two(2.0); const typename FTYPE::mask_type OK(true); // Plane normal const VECTOR normal = plane.Normal(); // compute distance to the plane - const FTYPE scalar = direction.Dot(normal); + const FTYPE scalar = direction.Dot(normal); const FTYPE distance = -(plane.Distance(position)) / scalar; // change position to reflection point and update direction position += distance * direction; @@ -209,11 +228,12 @@ int main(int /*argc*/, char ** /*argv*/) // Scalar Types Data, Vector, Plane, double>::Vector scalar_data(nPhotons); - // Vc Types - Data, Vector, Plane, Vc::double_v>::Vector vc_data; + // std::simd Types + Data>, Vector>, Plane>, + std_simd>::Vector vc_data; // Clone the exact random values from the Scalar vector // Note we are making the same number of entries in the container, but each entry is a vector entry - // with Vc::double_t::Size entries. + // with std::simd::size() entries. fill(scalar_data, vc_data); // Loop over the two containers and compare @@ -232,7 +252,7 @@ int main(int /*argc*/, char ** /*argv*/) std::cout << "Position " << sc.position << " " << vc.position << std::endl; std::cout << "Direction " << sc.direction << " " << vc.direction << std::endl; - for (std::size_t j = 0; j < Vc::double_v::Size; ++j) { + for (std::size_t j = 0; j < std_simd::size(); ++j) { ret |= compare(sc.position.x(), vc.position.x()[j]); ret |= compare(sc.position.y(), vc.position.y()[j]); ret |= compare(sc.position.z(), vc.position.z()[j]); @@ -255,19 +275,19 @@ int main(int /*argc*/, char ** /*argv*/) PositionVector sp4(p_x(gen), p_y(gen), p_z(gen)); PositionVector sp5(p_x(gen), p_y(gen), p_z(gen)); PositionVector sp6(p_x(gen), p_y(gen), p_z(gen)); - // clone to Vc versions - PositionVector vp1(sp1.x(), sp1.y(), sp1.z()); - PositionVector vp2(sp2.x(), sp2.y(), sp2.z()); - PositionVector vp3(sp3.x(), sp3.y(), sp3.z()); - PositionVector vp4(sp4.x(), sp4.y(), sp4.z()); - PositionVector vp5(sp5.x(), sp5.y(), sp5.z()); - PositionVector vp6(sp6.x(), sp6.y(), sp6.z()); + // clone to std::simd versions + PositionVector> vp1(sp1.x(), sp1.y(), sp1.z()); + PositionVector> vp2(sp2.x(), sp2.y(), sp2.z()); + PositionVector> vp3(sp3.x(), sp3.y(), sp3.z()); + PositionVector> vp4(sp4.x(), sp4.y(), sp4.z()); + PositionVector> vp5(sp5.x(), sp5.y(), sp5.z()); + PositionVector> vp6(sp6.x(), sp6.y(), sp6.z()); // Make transformations from points // note warnings about axis not having the same angles expected here... // point is to check scalar and vector versions do the same thing const ROOT::Math::Impl::Transform3D st(sp1, sp2, sp3, sp4, sp5, sp6); - const ROOT::Math::Impl::Transform3D vt(vp1, vp2, vp3, vp4, vp5, vp6); + const ROOT::Math::Impl::Transform3D> vt(vp1, vp2, vp3, vp4, vp5, vp6); // transform the vectors const auto sv = st * sc.direction; @@ -283,7 +303,7 @@ int main(int /*argc*/, char ** /*argv*/) const auto vv_i = vt_i * vv; std::cout << "Transformed Back Direction " << sc.direction << " " << sv_i << " " << vv_i << std::endl; - for (std::size_t j = 0; j < Vc::double_v::Size; ++j) { + for (std::size_t j = 0; j < std_simd::size(); ++j) { ret |= compare(sv.x(), vv.x()[j]); ret |= compare(sv.y(), vv.y()[j]); ret |= compare(sv.z(), vv.z()[j]); @@ -300,7 +320,7 @@ int main(int /*argc*/, char ** /*argv*/) const double a(p0(gen)), b(p1(gen)), c(p2(gen)), d(p3(gen)); Plane sc_plane(a, b, c, d); // make a vector plane - Plane vc_plane(a, b, c, d); + Plane> vc_plane(a, b, c, d); // transform the planes const auto new_sc_plane = st * sc_plane; @@ -312,7 +332,7 @@ int main(int /*argc*/, char ** /*argv*/) const auto vc_plane_i = vt_i * new_vc_plane; std::cout << "Transformed Back plane " << sc_plane_i << " " << vc_plane_i << std::endl; - for (std::size_t j = 0; j < Vc::double_v::Size; ++j) { + for (std::size_t j = 0; j < std_simd::size(); ++j) { ret |= compare(vc_plane.A()[j], vc_plane_i.A()[j]); ret |= compare(vc_plane.B()[j], vc_plane_i.B()[j]); ret |= compare(vc_plane.C()[j], vc_plane_i.C()[j]); @@ -334,8 +354,8 @@ int main(int /*argc*/, char ** /*argv*/) // scalar data Data, Vector, Plane, double>::Vector scalar_data(nPhotons); // vector data with total equal number of photons (including vectorised size) - Data, Vector, Plane, Vc::double_v>::Vector vc_data( - nPhotons / Vc::double_v::Size); + Data>, Vector>, Plane>, + std_simd>::Vector vc_data(nPhotons / std_simd::size()); TStopwatch t; @@ -355,7 +375,7 @@ int main(int /*argc*/, char ** /*argv*/) } } - // time the Vc implementation + // time the std::simd implementation for (unsigned int i = 0; i < nTests; ++i) { t.Start(); for (auto &vc : vc_data) { @@ -370,13 +390,13 @@ int main(int /*argc*/, char ** /*argv*/) } std::cout << "Scalar best time = " << best_time_scalar << std::endl; - std::cout << "Vectorised Vc best time = " << best_time_vector << std::endl; - std::cout << "Vectorised Vc SIMD size = " << Vc::double_v::Size << std::endl; - std::cout << "Vectorised Vc speedup = " << best_time_scalar / best_time_vector << std::endl; + std::cout << "Vectorised std::simd best time = " << best_time_vector << std::endl; + std::cout << "Vectorised std::simd SIMD size = " << std_simd::size() << std::endl; + std::cout << "Vectorised std::simd speedup = " << best_time_scalar / best_time_vector << std::endl; - // assert that the vector time is roughly Vc::double_v::Size times smaller than the scalar time + // assert that the vector time is roughly std_simd::size() times smaller than the scalar time // allow 25% for 'safety' - // if (std::fabs((best_time_vector * Vc::double_v::Size) - best_time_scalar) > 0.25 * best_time_scalar) { + // if (std::fabs((best_time_vector * std_simd::size()) - best_time_scalar) > 0.25 * best_time_scalar) { // ++ret; // } } @@ -387,3 +407,13 @@ int main(int /*argc*/, char ** /*argv*/) std::cout << "test OK " << std::endl; return ret; } + +#else + +int main(int /*argc*/, char ** /*argv*/) +{ + std::cout << "test did nothing because std::experimental::simd is not available" << std::endl; + return 0; +} + +#endif // R__HAS_STD_SIMD diff --git a/test/testVc.cxx b/test/testVc.cxx deleted file mode 100644 index 3b8886a76408d..0000000000000 --- a/test/testVc.cxx +++ /dev/null @@ -1,94 +0,0 @@ -/* This file is part of the Vc project - Copyright (C) 2009-2010 Matthias Kretz - - Permission to use, copy, modify, and distribute this software - and its documentation for any purpose and without fee is hereby - granted, provided that the above copyright notice appear in all - copies and that both that the copyright notice and this - permission notice and warranty disclaimer appear in supporting - documentation, and that the name of the author not be used in - advertising or publicity pertaining to distribution of the - software without specific, written prior permission. - - The author disclaim all warranties with regard to this - software, including all implied warranties of merchantability - and fitness. In no event shall the author be liable for any - special, indirect or consequential damages or any damages - whatsoever resulting from loss of use, data or profits, whether - in an action of contract, negligence or other tortious action, - arising out of or in connection with the use or performance of - this software. - -*/ - -#ifdef __clang__ -#pragma clang diagnostic push -#pragma clang diagnostic ignored "-Wconditional-uninitialized" -#endif - -#pragma GCC diagnostic push -#pragma GCC diagnostic ignored "-Wall" -#pragma GCC diagnostic ignored "-Wunused-parameter" -#include -#include -#pragma GCC diagnostic pop - -#ifdef __clang__ -#pragma clang diagnostic pop -#endif - -#include -#include - -template class Matrix; -template std::ostream &operator<<(std::ostream &, const Matrix &); - -template -class Matrix -{ - friend std::ostream &operator<< <>(std::ostream &, const Matrix &); - private: - typedef Vc::Vector V; - Vc::Memory m_mem; - public: - Matrix &operator=(const T &val) { - V vec(val); - for (unsigned int i = 0; i < m_mem.vectorsCount(); ++i) { - m_mem.vector(i) = vec; - } - return *this; - } - - Matrix &operator+=(const Matrix &rhs) { - for (unsigned int i = 0; i < m_mem.vectorsCount(); ++i) { - V v1(m_mem.vector(i)); - v1 += V(rhs.m_mem.vector(i)); - m_mem.vector(i) = v1; - } - return *this; - } -}; - -template -std::ostream &operator<<(std::ostream &out, const Matrix &m) -{ - for (unsigned int i = 0; i < Size; ++i) { - std::cout << "[" << std::setw(6) << m.m_mem[i * Size]; - for (unsigned int j = 1; j < Size; ++j) { - std::cout << std::setw(6) << m.m_mem[i * Size + j]; - } - std::cout << " ]\n"; - } - return out; -} - -int main() -{ - Matrix m1; - m1 = 1.f; - Matrix m2; - m2 = 2.f; - m1 += m2; - std::cout << m1 << std::endl; - return 0; -}