Skip to content

Commit

Permalink
Intel(R) oneAPI Collective Communications Library (oneCCL) 2021.1-beta08
Browse files Browse the repository at this point in the history
  • Loading branch information
sazanovd committed Aug 7, 2020
1 parent 4bbbbb4 commit 7348045
Show file tree
Hide file tree
Showing 484 changed files with 30,154 additions and 27,535 deletions.
24 changes: 14 additions & 10 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -162,10 +162,10 @@ set(ICC_BFP16_AVX512BF_MIN_SUPPORTED "19.1.0")
set(CLANG_BFP16_MIN_SUPPORTED "9.0.0")
set(CLANG_BFP16_AVX512BF_MIN_SUPPORTED "10.0.0")

if (${CMAKE_C_COMPILER_ID} STREQUAL "Intel"
OR (${CMAKE_C_COMPILER_ID} STREQUAL "Clang"
if (${CMAKE_C_COMPILER_ID} STREQUAL "Intel"
OR (${CMAKE_C_COMPILER_ID} STREQUAL "Clang"
AND NOT ${CMAKE_C_COMPILER_VERSION} VERSION_LESS ${CLANG_BFP16_MIN_SUPPORTED})
OR (${CMAKE_C_COMPILER_ID} STREQUAL "GNU"
OR (${CMAKE_C_COMPILER_ID} STREQUAL "GNU"
AND NOT ${CMAKE_C_COMPILER_VERSION} VERSION_LESS ${GCC_BFP16_MIN_SUPPORTED})
)
add_definitions(-DCCL_BFP16_COMPILER)
Expand All @@ -176,11 +176,11 @@ else()
message(STATUS "BFP16 compiler: no")
endif()

if ((${CMAKE_C_COMPILER_ID} STREQUAL "Intel"
if ((${CMAKE_C_COMPILER_ID} STREQUAL "Intel"
AND NOT ${CMAKE_C_COMPILER_VERSION} VERSION_LESS ${ICC_BFP16_AVX512BF_MIN_SUPPORTED})
OR (${CMAKE_C_COMPILER_ID} STREQUAL "Clang"
OR (${CMAKE_C_COMPILER_ID} STREQUAL "Clang"
AND NOT ${CMAKE_C_COMPILER_VERSION} VERSION_LESS ${CLANG_BFP16_AVX512BF_MIN_SUPPORTED})
OR (${CMAKE_C_COMPILER_ID} STREQUAL "GNU"
OR (${CMAKE_C_COMPILER_ID} STREQUAL "GNU"
AND NOT ${CMAKE_C_COMPILER_VERSION} VERSION_LESS ${GCC_BFP16_AVX512BF_MIN_SUPPORTED})
)
add_definitions(-DCCL_BFP16_AVX512BF_COMPILER)
Expand Down Expand Up @@ -211,7 +211,7 @@ set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS}")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS}")

if (MULTI_GPU_SUPPORT)
message(FATAL_ERROR "MULTI GPU IS NOT SUPPORTED YET")
message(FATAL_ERROR "MULTI GPU IS NOT SUPPORTED YET")
endif(MULTI_GPU_SUPPORT)

#include other CMakeLists
Expand All @@ -220,16 +220,20 @@ add_subdirectory(examples/cpu)

if (CCL_ENABLE_SYCL)
if (${COMPUTE_RUNTIME_TARGET_NAME} STREQUAL "Intel::SYCL")
set (CCL_ENABLCE_SYCL_CHECK_CONTRACT "#if defined(__cplusplus)\n#if !defined(__clang__) || __clang_major__ < 9 || !defined(CL_SYCL_LANGUAGE_VERSION)\n#error This version of CCL configured only for DPC++ compiler\n#endif\n#endif")
set (CCL_ENABLCE_SYCL_CHECK_CONTRACT "#if defined(__cplusplus)\n#if !defined(__clang__) || __clang_major__ < 9 || !defined(CL_SYCL_LANGUAGE_VERSION)\n#error This version of CCL configured only for oneAPI DPC++ Compiler\n#endif\n#endif")
else(${COMPUTE_RUNTIME_TARGET_NAME} STREQUAL "Codeplay::ComputeCpp")
set (CCL_ENABLCE_SYCL_CHECK_CONTRACT "#if defined(__cplusplus)\n#if !defined(__clang__) || __clang_major__ < 6\n#error This version of CCL configured only for DPC++ compiler\n#endif\n#endif")
set (CCL_ENABLCE_SYCL_CHECK_CONTRACT "#if defined(__cplusplus)\n#if !defined(__clang__) || __clang_major__ < 6\n#error This version of CCL configured only for oneAPI DPC++ Compiler\n#endif\n#endif")
endif()
add_subdirectory(examples/sycl)
endif()

add_subdirectory(examples/benchmark)
add_subdirectory(examples/common)
add_subdirectory(tests/functional)
if(MULTI_GPU_SUPPORT)
add_subdirectory(examples/level_zero)
add_subdirectory(tests/unit)
endif()

#generate & install vars.sh
configure_file(cmake/vars.sh.in ${CMAKE_CURRENT_BINARY_DIR}/vars.sh @ONLY)
Expand All @@ -243,7 +247,7 @@ install(PROGRAMS ${CMAKE_CURRENT_BINARY_DIR}/third-party-programs.txt DESTINATIO
install(PROGRAMS ${PROJECT_SOURCE_DIR}/LICENSE DESTINATION ${CCL_INSTALL_LICENSE})

set(CCL_MAJOR_VERSION "0")
set(CCL_MINOR_VERSION "7")
set(CCL_MINOR_VERSION "8")
set(CCL_UPDATE_VERSION "0")
set(CCL_PRODUCT_STATUS "beta")
string(TIMESTAMP CCL_PRODUCT_BUILD_DATE "%Y-%m-%dT %H:%M:%SZ")
Expand Down
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -89,7 +89,7 @@ Use the command:
```
$ source <install_dir>/env/setvars.sh
$ cd <install_dir>/examples
$ mpirun -n 2 ./common/benchmark
$ mpirun -n 2 ./benchmark/benchmark
```
### Setting workers affinity
There are two ways to set workers threads affinity - explicit and automatic
Expand Down
13 changes: 6 additions & 7 deletions cmake/FindIntelSYCL.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -25,16 +25,15 @@ else()
endif()

set(OPENCLROOT "${dpcpp_root_hints}/include/sycl/CL/")
find_package(OpenCL)
if(OpenCL_FOUND)
set(COMPUTE_RUNTIME_NAME OpenCL::OpenCL)
endif()

find_package(L0)
if(LevelZero_FOUND)
set(COMPUTE_RUNTIME_NAME ze_loader)
if(MULTI_GPU_SUPPORT)
find_package(L0 REQUIRED)
if(LevelZero_FOUND)
set(COMPUTE_RUNTIME_NAME ze_loader)
endif()
endif()


if (NOT COMPUTE_RUNTIME_NAME)
message("Not OpenCL or L0")
endif()
Expand Down
6 changes: 3 additions & 3 deletions cmake/FindL0.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
list(APPEND l0_root_hints
${ZE_ROOT}
$ENV{ZE_ROOT})

set(original_cmake_prefix_path ${CMAKE_PREFIX_PATH})
if(NOT l0_root_hints)
set(l0_root_hints "/usr")
Expand All @@ -33,8 +33,8 @@ if(NOT TARGET ze_loader)
PATHS
ENV ZE_ROOT
${l0_root_hints}
PATH_SUFFIXES
lib
PATH_SUFFIXES
lib
local/lib
lib/level_zero/
local/lib/level_zero
Expand Down
4 changes: 4 additions & 0 deletions cmake/FindOpenCL.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -94,6 +94,10 @@ find_path(OpenCL_INCLUDE_DIR
PATH_SUFFIXES
include
OpenCL/common/inc
sycl
sycl/CL
include/sycl
include/sycl/CL
"AMD APP/include")

_FIND_OPENCL_VERSION()
Expand Down
6 changes: 4 additions & 2 deletions cmake/ccl
Original file line number Diff line number Diff line change
Expand Up @@ -36,12 +36,12 @@ set oneapiroot "[file dirname "$componentroot"]"

# On load print component name and version being loaded
if { [ module-info mode load ] } {
puts stderr "Loading $modulefilename"
puts stderr "Loading $modulefilename"
}

# On remove print component name and version being removed
if { [ module-info mode ] == "unload" || [ module-info mode ] == "remove" } {
puts stderr "Removing $modulefilename"
puts stderr "Removing $modulefilename"
}


Expand All @@ -55,3 +55,5 @@ setenv CCL_ROOT "$topdir"
setenv CCL_ATL_TRANSPORT_PATH "$topdir/lib"

prepend-path LD_LIBRARY_PATH "$topdir/lib"
prepend-path LIBRARY_PATH "$topdir/lib"
prepend-path CPATH "$topdir/include"
36 changes: 18 additions & 18 deletions cmake/helpers.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -43,36 +43,36 @@ endfunction(get_vcs_properties)


function(activate_compute_runtime MODULES_PATH COMPUTE_RUNTIME)

string( TOLOWER "${COMPUTE_RUNTIME}" COMPUTE_RUNTIME)
set(CCL_ENABLE_SYCL_V 0 PARENT_SCOPE)
message("Search Compute Runtime by MODULES_PATH: ${MODULES_PATH}")
list(APPEND CMAKE_MODULE_PATH "${MODULES_PATH}")

if(COMPUTE_RUNTIME STREQUAL "computecpp")
message ("COMPUTE_RUNTIME=${COMPUTE_RUNTIME} requested. Using ComputeCpp provider")
SET (COMPUTE_RUNTIME_LOAD_MODULE "ComputeCpp"
CACHE STRING
SET (COMPUTE_RUNTIME_LOAD_MODULE "ComputeCpp"
CACHE STRING
"COMPUTE_RUNTIME=${COMPUTE_RUNTIME} requested. Using ComputeCpp provider")
find_package(${COMPUTE_RUNTIME_LOAD_MODULE} REQUIRED)

set (CCL_ENABLE_SYCL_V 1 PARENT_SCOPE)

# remember compilation flags, because flag required for OBJECTS target
# but if we use `target_link_libraries`, then these flags applied to all compiler options
# for c & cxx. But we need special flags for cxx only
# So set it manually
set (COMPUTE_RUNTIME_CXXFLAGS_LOCAL "${COMPUTE_RUNTIME_CXXFLAGS_LOCAL} ${COMPUTECPP_FLAGS}")

# remember current target for `target_link_libraries` in ccl
set (COMPUTE_RUNTIME_TARGET_NAME Codeplay::ComputeCpp)
set (COMPUTE_RUNTIME_TARGET_NAME Codeplay::ComputeCpp PARENT_SCOPE)
endif()

if(COMPUTE_RUNTIME STREQUAL "dpcpp")
message ("COMPUTE_RUNTIME=${COMPUTE_RUNTIME} requested. Using DPC++ provider")
SET (COMPUTE_RUNTIME_LOAD_MODULE "IntelSYCL"
CACHE STRING
SET (COMPUTE_RUNTIME_LOAD_MODULE "IntelSYCL"
CACHE STRING
"COMPUTE_RUNTIME=${COMPUTE_RUNTIME} requested. Using DPC++ provider")
find_package(${COMPUTE_RUNTIME_LOAD_MODULE} REQUIRED)

Expand All @@ -83,21 +83,21 @@ function(activate_compute_runtime MODULES_PATH COMPUTE_RUNTIME)
# for c & cxx. But we need special flags for cxx only
# So set it manually
set (COMPUTE_RUNTIME_CXXFLAGS_LOCAL "${COMPUTE_RUNTIME_CXXFLAGS_LOCAL} ${INTEL_SYCL_FLAGS}")

# remember current target for `target_link_libraries` in ccl
set (COMPUTE_RUNTIME_TARGET_NAME Intel::SYCL)
set (COMPUTE_RUNTIME_TARGET_NAME Intel::SYCL PARENT_SCOPE)
endif()

if(COMPUTE_RUNTIME STREQUAL "l0")
SET (COMPUTE_RUNTIME_LOAD_MODULE "L0"
CACHE STRING
SET (COMPUTE_RUNTIME_LOAD_MODULE "L0"
CACHE STRING
"COMPUTE_RUNTIME=${COMPUTE_RUNTIME} requested")
find_package(${COMPUTE_RUNTIME_LOAD_MODULE} REQUIRED)

# No compiler flags
# No compiler flags
set (COMPUTE_RUNTIME_CXXFLAGS_LOCAL "")

# remember current target for `target_link_libraries` in ccl
set (COMPUTE_RUNTIME_TARGET_NAME ze_loader)
set (COMPUTE_RUNTIME_TARGET_NAME ze_loader PARENT_SCOPE)
Expand All @@ -106,11 +106,11 @@ function(activate_compute_runtime MODULES_PATH COMPUTE_RUNTIME)
# extract target properties
get_target_property(COMPUTE_RUNTIME_INCLUDE_DIRS_LOCAL
${COMPUTE_RUNTIME_TARGET_NAME} INTERFACE_INCLUDE_DIRECTORIES)
get_target_property(COMPUTE_RUNTIME_LIBRARIES_LOCAL
get_target_property(COMPUTE_RUNTIME_LIBRARIES_LOCAL
${COMPUTE_RUNTIME_TARGET_NAME} INTERFACE_LINK_LIBRARIES)

# set output variables in the parent scope:
# Only `COMPUTE_RUNTIME_FLAGS` is actually required, because the other flags are derived from
# Only `COMPUTE_RUNTIME_FLAGS` is actually required, because the other flags are derived from
# 'target_link_libraries'.
# For simplicity, set all variables
set(COMPUTE_RUNTIME_FLAGS ${COMPUTE_RUNTIME_CXXFLAGS_LOCAL} PARENT_SCOPE)
Expand Down
13 changes: 5 additions & 8 deletions cmake/vars.sh.in
Original file line number Diff line number Diff line change
Expand Up @@ -16,13 +16,10 @@
#

WORK_DIR="$(cd "$( dirname "${BASH_SOURCE[0]}" )" > /dev/null && pwd)"
export CCL_ROOT="$(cd ${WORK_DIR}/../; pwd -P)"

if [ -z "${LD_LIBRARY_PATH}" ]
then
LD_LIBRARY_PATH="${CCL_ROOT}/lib"; export LD_LIBRARY_PATH
else
LD_LIBRARY_PATH="${CCL_ROOT}/lib:${LD_LIBRARY_PATH}"; export LD_LIBRARY_PATH
fi
export CCL_ROOT="$(cd ${WORK_DIR}/../; pwd -P)"
export CCL_ATL_TRANSPORT_PATH="${CCL_ROOT}/lib"

CCL_ATL_TRANSPORT_PATH="${CCL_ROOT}/lib"; export CCL_ATL_TRANSPORT_PATH
export CPATH=${CCL_ROOT}/include/${CPATH+:${CPATH}}
export LIBRARY_PATH=${CCL_ROOT}/lib/${LIBRARY_PATH+:${LIBRARY_PATH}}
export LD_LIBRARY_PATH=${CCL_ROOT}/lib/${LD_LIBRARY_PATH+:${LD_LIBRARY_PATH}}
39 changes: 21 additions & 18 deletions doc/rst/source/sparse_collectives.rst
Original file line number Diff line number Diff line change
Expand Up @@ -16,25 +16,28 @@ The ultimate goal of |product_short| is to provide a common API for sparse colle

The ``sparse_allreduce`` function has the following parameters:

- ``send_ind_buf`` - a buffer of indices with ``send_ind_count`` elements of ``index_dtype``
- ``send_int_count`` - the number of ``send_ind_buf`` elements of type ``index_type``
- ``send_val_buf`` - a buffer of values with ``send_val_count`` elements of ``value_dtype``
- ``send_val_count`` - the number of ``send_val_buf`` elements of type ``value_type``
- ``recv_ind_buf`` - a buffer to store reduced indices (ignored for now)
- ``recv_ind_count`` - the number of reduced indices (ignored for now)
- ``recv_val_buf``` - a buffer to store reduced values (ignored for now)
- ``recv_val_count`` - the number of reduced values (ignored for now)
- ``index_dtype`` - index type of elements in ``send_ind_buf`` and ``recv_ind_buf`` buffers
- ``value_dtype`` - data type of elements in ``send_val_buf`` and ``recv_val_buf`` buffers
- ``reduction`` - the type of reduction operation to be applied
- ``attributes`` - attributes that customize operation
- returns ``ccl::request`` object to track the progress of the operation

For ``sparse_allreduce``, a completion callback is required to get the results.
- ``send_ind_buf`` - a buffer of indices with ``send_ind_count`` elements of ``index_dtype``
- ``send_int_count`` - the number of ``send_ind_buf`` elements of type ``index_type``
- ``send_val_buf`` - a buffer of values with ``send_val_count`` elements of ``value_dtype``
- ``send_val_count`` - the number of ``send_val_buf`` elements of type ``value_type``
- ``recv_ind_buf`` - a buffer to store reduced indices (ignored for now)
- ``recv_ind_count`` - the number of reduced indices (ignored for now)
- ``recv_val_buf``` - a buffer to store reduced values (ignored for now)
- ``recv_val_count`` - the number of reduced values (ignored for now)
- ``index_dtype`` - index type of elements in ``send_ind_buf`` and ``recv_ind_buf`` buffers
- ``value_dtype`` - data type of elements in ``send_val_buf`` and ``recv_val_buf`` buffers
- ``reduction`` - the type of reduction operation to be applied
- ``attributes`` - attributes that customize operation
- returns ``ccl::request`` object to track the progress of the operation

For ``sparse_allreduce``, a completion callback or an allocation callback is required.

Use the following :ref:`Collective Call Attributes` fields:

- ``sparse_allreduce_completion_fn`` - a completion callback function pointer (must not be set to ``NULL``)
- ``sparse_allreduce_completion_ctx``- a user context pointer of type ``void*``
- ``sparse_allreduce_completion_fn`` - a completion callback function pointer
- ``sparse_allreduce_alloc_fn`` - an allocation callback function pointer
- ``sparse_allreduce_fn_ctx``- an user context pointer of type ``void*``
- ``sparse_coalesce_mode``- a coalesce mode

Here is an example of a function definition for ``sparse_allreduce`` completion callback:

Expand All @@ -53,4 +56,4 @@ Here is an example of a function definition for ``sparse_allreduce`` completion
return ccl_status_success;
}

For more details, refer to `this example <https://github.com/oneapi-src/oneCCL/blob/master/examples/cpu/sparse_test_algo.hpp>`_
For more details, refer to `this example <https://github.com/oneapi-src/oneCCL/blob/master/examples/cpu/sparse_allreduce.cpp>`_.
16 changes: 12 additions & 4 deletions doc/rst/source/spec/collective_call_attributes.rst
Original file line number Diff line number Diff line change
Expand Up @@ -14,23 +14,31 @@ Collective Call Attributes
ccl_prologue_fn_t prologue_fn;
ccl_epilogue_fn_t epilogue_fn;
ccl_reduction_fn_t reduction_fn;
/* Sparse allreduce collective related fields */
ccl_sparse_allreduce_completion_fn_t sparse_allreduce_completion_fn;
/* User context for saving sparse_allreduce results */
const void* sparse_allreduce_completion_ctx;

/* Priority for collective operation */
size_t priority;

/* Blocking/non-blocking */
int synchronous;

/* Persistent/non-persistent */
int to_cache;

/* Treat buffer as vector/regular - applicable for allgatherv only */
int vector_buf;

/**
* Id of the operation. If specified, new communicator is created and collective
* operations with the same @b match_id are executed in the same order.
*/
const char* match_id;

/* Sparse allreduce specific */
ccl_sparse_allreduce_completion_fn_t sparse_allreduce_completion_fn;
ccl_sparse_allreduce_alloc_fn_t sparse_allreduce_alloc_fn;
const void* sparse_allreduce_fn_ctx;
ccl_sparse_coalesce_mode_t sparse_coalesce_mode;

} ccl_coll_attr_t;

``ccl_coll_attr_t`` (``ccl::coll_attr`` in C++ version of API) is an extendable structure that serves as a modificator of communication primitive behaviour.
Expand Down
16 changes: 7 additions & 9 deletions examples/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -57,11 +57,11 @@ set(CMAKE_CXX_FLAGS_RELWITHDEBINFO "${CMAKE_CXX_FLAGS_RELWITHDEBINFO} ${CXX_COMP
set(CMAKE_CXX_STANDARD 11)
set(CMAKE_CXX_STANDARD_REQUIRED ON)

if (${CMAKE_C_COMPILER_ID} STREQUAL "Clang" AND ${CMAKE_CXX_COMPILER_ID} STREQUAL "Clang")
set(CMAKE_CLANG_FLAGS "-fsycl")
set(CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS} -lsycl")
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${CMAKE_CLANG_FLAGS}")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${CMAKE_CLANG_FLAGS}")
if (COMPUTE_RUNTIME)
activate_compute_runtime("${CCL_ROOT}/lib;${PROJECT_SOURCE_DIR}/cmake" ${COMPUTE_RUNTIME})
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${COMPUTE_RUNTIME_FLAGS}")
list(APPEND CCL_INC_DIRS ${COMPUTE_RUNTIME_INCLUDE_DIRS})
list(APPEND EXTERNAL_LIBS ${COMPUTE_RUNTIME_LIBRARIES})
endif()

set(GCC_BFP16_MIN_SUPPORTED "4.9.0")
Expand All @@ -84,13 +84,11 @@ if (CCL_BFP16_COMPILER)
endif()
endif()


include_directories(${CCL_ROOT}/include/${CCL_CONFIGURATION})
link_directories(${CCL_ROOT}/lib/${CCL_CONFIGURATION})

include(cmake/helpers.cmake)
include_directories(include)

link_directories(${CCL_ROOT}/lib/${CCL_CONFIGURATION})

add_subdirectory(cpu)
if (${CMAKE_CXX_COMPILER_ID} STREQUAL "Clang")
add_subdirectory(sycl)
Expand Down
Loading

0 comments on commit 7348045

Please sign in to comment.