diff --git a/libdevice/cmake/modules/SYCLLibdevice.cmake b/libdevice/cmake/modules/SYCLLibdevice.cmake index 39f92b435b48c..84cbb352de8bb 100644 --- a/libdevice/cmake/modules/SYCLLibdevice.cmake +++ b/libdevice/cmake/modules/SYCLLibdevice.cmake @@ -315,13 +315,25 @@ if("native_cpu" IN_LIST SYCL_ENABLE_BACKENDS) endif() # Include NativeCPU UR adapter path to enable finding header file with state struct. # libsycl-nativecpu_utils is only needed as BC file by NativeCPU. - # Todo: add versions for other targets (for cross-compilation) - compile_lib(libsycl-nativecpu_utils - FILETYPE bc - SRC nativecpu_utils.cpp - DEPENDENCIES ${itt_obj_deps} - EXTRA_OPTS -I ${NATIVE_CPU_DIR} -fsycl-targets=native_cpu -fsycl-device-only - -fsycl-device-obj=llvmir) + add_custom_command( + OUTPUT ${bc_binary_dir}/nativecpu_utils.bc + COMMAND ${clang_exe} ${compile_opts} ${bc_device_compile_opts} -fsycl-targets=native_cpu + -I ${NATIVE_CPU_DIR} + ${CMAKE_CURRENT_SOURCE_DIR}/nativecpu_utils.cpp + -o ${bc_binary_dir}/nativecpu_utils.bc + MAIN_DEPENDENCY nativecpu_utils.cpp + DEPENDS ${sycl-compiler_deps} + VERBATIM) + add_custom_target(nativecpu_utils-bc DEPENDS ${bc_binary_dir}/nativecpu_utils.bc) + process_bc(libsycl-nativecpu_utils.bc + LIB_TGT libsycl-nativecpu_utils + IN_FILE ${bc_binary_dir}/nativecpu_utils.bc + OUT_DIR ${bc_binary_dir}) + add_custom_target(libsycl-nativecpu_utils-bc DEPENDS ${bc_binary_dir}/libsycl-nativecpu_utils.bc) + add_dependencies(libsycldevice-bc libsycl-nativecpu_utils-bc) + install(FILES ${bc_binary_dir}/libsycl-nativecpu_utils.bc + DESTINATION ${install_dest_bc} + COMPONENT libsycldevice) endif() # Add all device libraries for each filetype except for the Intel math function diff --git a/libdevice/nativecpu_utils.cpp b/libdevice/nativecpu_utils.cpp index 01e3b13bcb9c6..51ef68cfada96 100644 --- a/libdevice/nativecpu_utils.cpp +++ b/libdevice/nativecpu_utils.cpp @@ -29,6 +29,11 @@ using __nativecpu_state = native_cpu::state; #define DEVICE_EXTERNAL_C DEVICE_EXTERN_C __attribute__((always_inline)) #define DEVICE_EXTERNAL SYCL_EXTERNAL __attribute__((always_inline)) +// Several functions are used implicitly by WorkItemLoopsPass and +// PrepareSYCLNativeCPUPass and need to be marked as used to prevent them being +// removed early. +#define USED __attribute__((used)) + #define OCL_LOCAL __attribute__((opencl_local)) #define OCL_GLOBAL __attribute__((opencl_global)) #define OCL_PRIVATE __attribute__((opencl_private)) @@ -354,7 +359,7 @@ using MakeGlobalType = typename sycl::detail::DecoratedType< T, sycl::access::address_space::global_space>::type; #define DefStateSetWithType(name, field, type) \ - DEVICE_EXTERNAL_C void __dpcpp_nativecpu_##name( \ + DEVICE_EXTERNAL_C USED void __dpcpp_nativecpu_##name( \ type value, MakeGlobalType<__nativecpu_state> *s) { \ s->field = value; \ } \ @@ -366,7 +371,7 @@ DefStateSetWithType(set_sub_group_id, SubGroup_id, uint32_t); DefStateSetWithType(set_max_sub_group_size, SubGroup_size, uint32_t); #define DefineStateGetWithType(name, field, type) \ - DEVICE_EXTERNAL_C GET_PROPS type __dpcpp_nativecpu_##name( \ + DEVICE_EXTERNAL_C GET_PROPS USED type __dpcpp_nativecpu_##name( \ MakeGlobalType *s) { \ return s->field; \ } \ @@ -382,7 +387,7 @@ DefineStateGet_U32(get_max_sub_group_size, SubGroup_size); DefineStateGet_U32(get_num_sub_groups, NumSubGroups); #define DefineStateGetWithType2(name, field, rtype, ptype) \ - DEVICE_EXTERNAL_C GET_PROPS rtype __dpcpp_nativecpu_##name( \ + DEVICE_EXTERNAL_C GET_PROPS USED rtype __dpcpp_nativecpu_##name( \ ptype dim, MakeGlobalType *s) { \ return s->field[dim]; \ } \ @@ -400,9 +405,9 @@ DefineStateGet_U64(get_num_groups, MNumGroups); DefineStateGet_U64(get_wg_size, MWorkGroup_size); DefineStateGet_U64(get_wg_id, MWorkGroup_id); -DEVICE_EXTERNAL_C -void __dpcpp_nativecpu_set_local_id(uint32_t dim, uint64_t value, - MakeGlobalType<__nativecpu_state> *s) { +DEVICE_EXTERNAL_C USED void +__dpcpp_nativecpu_set_local_id(uint32_t dim, uint64_t value, + MakeGlobalType<__nativecpu_state> *s) { s->MLocal_id[dim] = value; s->MGlobal_id[dim] = s->MWorkGroup_size[dim] * s->MWorkGroup_id[dim] + s->MLocal_id[dim] + s->MGlobalOffset[dim];