Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[SYCL][NativeCPU] Process nativecpu_utils with prepare_builtins #17850

Open
wants to merge 1 commit into
base: sycl
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
26 changes: 19 additions & 7 deletions libdevice/cmake/modules/SYCLLibdevice.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -315,13 +315,25 @@ if("native_cpu" IN_LIST SYCL_ENABLE_BACKENDS)
endif()
# Include NativeCPU UR adapter path to enable finding header file with state struct.
# libsycl-nativecpu_utils is only needed as BC file by NativeCPU.
# Todo: add versions for other targets (for cross-compilation)
compile_lib(libsycl-nativecpu_utils
FILETYPE bc
SRC nativecpu_utils.cpp
DEPENDENCIES ${itt_obj_deps}
EXTRA_OPTS -I ${NATIVE_CPU_DIR} -fsycl-targets=native_cpu -fsycl-device-only
-fsycl-device-obj=llvmir)
add_custom_command(
OUTPUT ${bc_binary_dir}/nativecpu_utils.bc
COMMAND ${clang_exe} ${compile_opts} ${bc_device_compile_opts} -fsycl-targets=native_cpu
-I ${NATIVE_CPU_DIR}
${CMAKE_CURRENT_SOURCE_DIR}/nativecpu_utils.cpp
-o ${bc_binary_dir}/nativecpu_utils.bc
MAIN_DEPENDENCY nativecpu_utils.cpp
DEPENDS ${sycl-compiler_deps}
VERBATIM)
add_custom_target(nativecpu_utils-bc DEPENDS ${bc_binary_dir}/nativecpu_utils.bc)
process_bc(libsycl-nativecpu_utils.bc
LIB_TGT libsycl-nativecpu_utils
IN_FILE ${bc_binary_dir}/nativecpu_utils.bc
OUT_DIR ${bc_binary_dir})
add_custom_target(libsycl-nativecpu_utils-bc DEPENDS ${bc_binary_dir}/libsycl-nativecpu_utils.bc)
add_dependencies(libsycldevice-bc libsycl-nativecpu_utils-bc)
install(FILES ${bc_binary_dir}/libsycl-nativecpu_utils.bc
DESTINATION ${install_dest_bc}
COMPONENT libsycldevice)
endif()

# Add all device libraries for each filetype except for the Intel math function
Expand Down
17 changes: 11 additions & 6 deletions libdevice/nativecpu_utils.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,11 @@ using __nativecpu_state = native_cpu::state;
#define DEVICE_EXTERNAL_C DEVICE_EXTERN_C __attribute__((always_inline))
#define DEVICE_EXTERNAL SYCL_EXTERNAL __attribute__((always_inline))

// Several functions are used implicitly by WorkItemLoopsPass and
// PrepareSYCLNativeCPUPass and need to be marked as used to prevent them being
// removed early.
#define USED __attribute__((used))

#define OCL_LOCAL __attribute__((opencl_local))
#define OCL_GLOBAL __attribute__((opencl_global))
#define OCL_PRIVATE __attribute__((opencl_private))
Expand Down Expand Up @@ -354,7 +359,7 @@ using MakeGlobalType = typename sycl::detail::DecoratedType<
T, sycl::access::address_space::global_space>::type;

#define DefStateSetWithType(name, field, type) \
DEVICE_EXTERNAL_C void __dpcpp_nativecpu_##name( \
DEVICE_EXTERNAL_C USED void __dpcpp_nativecpu_##name( \
type value, MakeGlobalType<__nativecpu_state> *s) { \
s->field = value; \
} \
Expand All @@ -366,7 +371,7 @@ DefStateSetWithType(set_sub_group_id, SubGroup_id, uint32_t);
DefStateSetWithType(set_max_sub_group_size, SubGroup_size, uint32_t);

#define DefineStateGetWithType(name, field, type) \
DEVICE_EXTERNAL_C GET_PROPS type __dpcpp_nativecpu_##name( \
DEVICE_EXTERNAL_C GET_PROPS USED type __dpcpp_nativecpu_##name( \
MakeGlobalType<const __nativecpu_state> *s) { \
return s->field; \
} \
Expand All @@ -382,7 +387,7 @@ DefineStateGet_U32(get_max_sub_group_size, SubGroup_size);
DefineStateGet_U32(get_num_sub_groups, NumSubGroups);

#define DefineStateGetWithType2(name, field, rtype, ptype) \
DEVICE_EXTERNAL_C GET_PROPS rtype __dpcpp_nativecpu_##name( \
DEVICE_EXTERNAL_C GET_PROPS USED rtype __dpcpp_nativecpu_##name( \
ptype dim, MakeGlobalType<const __nativecpu_state> *s) { \
return s->field[dim]; \
} \
Expand All @@ -400,9 +405,9 @@ DefineStateGet_U64(get_num_groups, MNumGroups);
DefineStateGet_U64(get_wg_size, MWorkGroup_size);
DefineStateGet_U64(get_wg_id, MWorkGroup_id);

DEVICE_EXTERNAL_C
void __dpcpp_nativecpu_set_local_id(uint32_t dim, uint64_t value,
MakeGlobalType<__nativecpu_state> *s) {
DEVICE_EXTERNAL_C USED void
__dpcpp_nativecpu_set_local_id(uint32_t dim, uint64_t value,
MakeGlobalType<__nativecpu_state> *s) {
s->MLocal_id[dim] = value;
s->MGlobal_id[dim] = s->MWorkGroup_size[dim] * s->MWorkGroup_id[dim] +
s->MLocal_id[dim] + s->MGlobalOffset[dim];
Expand Down