Skip to content

Commit 3ce9b0c

Browse files
committed
[SYCL][NativeCPU] Process nativecpu_utils with prepare_builtins
This ensures that functions have the right linkage. Several functions are marked as used to prevent them from being removed as dead code before the work item loop pass runs.
1 parent 48a08bf commit 3ce9b0c

File tree

2 files changed

+30
-13
lines changed

2 files changed

+30
-13
lines changed

libdevice/cmake/modules/SYCLLibdevice.cmake

+19-7
Original file line numberDiff line numberDiff line change
@@ -315,13 +315,25 @@ if("native_cpu" IN_LIST SYCL_ENABLE_BACKENDS)
315315
endif()
316316
# Include NativeCPU UR adapter path to enable finding header file with state struct.
317317
# libsycl-nativecpu_utils is only needed as BC file by NativeCPU.
318-
# Todo: add versions for other targets (for cross-compilation)
319-
compile_lib(libsycl-nativecpu_utils
320-
FILETYPE bc
321-
SRC nativecpu_utils.cpp
322-
DEPENDENCIES ${itt_obj_deps}
323-
EXTRA_OPTS -I ${NATIVE_CPU_DIR} -fsycl-targets=native_cpu -fsycl-device-only
324-
-fsycl-device-obj=llvmir)
318+
add_custom_command(
319+
OUTPUT ${bc_binary_dir}/nativecpu_utils.bc
320+
COMMAND ${clang_exe} ${compile_opts} ${bc_device_compile_opts} -fsycl-targets=native_cpu
321+
-I ${NATIVE_CPU_DIR}
322+
${CMAKE_CURRENT_SOURCE_DIR}/nativecpu_utils.cpp
323+
-o ${bc_binary_dir}/nativecpu_utils.bc
324+
MAIN_DEPENDENCY nativecpu_utils.cpp
325+
DEPENDS ${sycl-compiler_deps}
326+
VERBATIM)
327+
add_custom_target(nativecpu_utils-bc DEPENDS ${bc_binary_dir}/nativecpu_utils.bc)
328+
process_bc(libsycl-nativecpu_utils.bc
329+
LIB_TGT libsycl-nativecpu_utils
330+
IN_FILE ${bc_binary_dir}/nativecpu_utils.bc
331+
OUT_DIR ${bc_binary_dir})
332+
add_custom_target(libsycl-nativecpu_utils-bc DEPENDS ${bc_binary_dir}/libsycl-nativecpu_utils.bc)
333+
add_dependencies(libsycldevice-bc libsycl-nativecpu_utils-bc)
334+
install(FILES ${bc_binary_dir}/libsycl-nativecpu_utils.bc
335+
DESTINATION ${install_dest_bc}
336+
COMPONENT libsycldevice)
325337
endif()
326338

327339
# Add all device libraries for each filetype except for the Intel math function

libdevice/nativecpu_utils.cpp

+11-6
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,11 @@ using __nativecpu_state = native_cpu::state;
2929
#define DEVICE_EXTERNAL_C DEVICE_EXTERN_C __attribute__((always_inline))
3030
#define DEVICE_EXTERNAL SYCL_EXTERNAL __attribute__((always_inline))
3131

32+
// Several functions are used implicitly by WorkItemLoopsPass and
33+
// PrepareSYCLNativeCPUPass and need to be marked as used to prevent them being
34+
// removed early.
35+
#define USED __attribute__((used))
36+
3237
#define OCL_LOCAL __attribute__((opencl_local))
3338
#define OCL_GLOBAL __attribute__((opencl_global))
3439
#define OCL_PRIVATE __attribute__((opencl_private))
@@ -354,7 +359,7 @@ using MakeGlobalType = typename sycl::detail::DecoratedType<
354359
T, sycl::access::address_space::global_space>::type;
355360

356361
#define DefStateSetWithType(name, field, type) \
357-
DEVICE_EXTERNAL_C void __dpcpp_nativecpu_##name( \
362+
DEVICE_EXTERNAL_C USED void __dpcpp_nativecpu_##name( \
358363
type value, MakeGlobalType<__nativecpu_state> *s) { \
359364
s->field = value; \
360365
} \
@@ -366,7 +371,7 @@ DefStateSetWithType(set_sub_group_id, SubGroup_id, uint32_t);
366371
DefStateSetWithType(set_max_sub_group_size, SubGroup_size, uint32_t);
367372

368373
#define DefineStateGetWithType(name, field, type) \
369-
DEVICE_EXTERNAL_C GET_PROPS type __dpcpp_nativecpu_##name( \
374+
DEVICE_EXTERNAL_C GET_PROPS USED type __dpcpp_nativecpu_##name( \
370375
MakeGlobalType<const __nativecpu_state> *s) { \
371376
return s->field; \
372377
} \
@@ -382,7 +387,7 @@ DefineStateGet_U32(get_max_sub_group_size, SubGroup_size);
382387
DefineStateGet_U32(get_num_sub_groups, NumSubGroups);
383388

384389
#define DefineStateGetWithType2(name, field, rtype, ptype) \
385-
DEVICE_EXTERNAL_C GET_PROPS rtype __dpcpp_nativecpu_##name( \
390+
DEVICE_EXTERNAL_C GET_PROPS USED rtype __dpcpp_nativecpu_##name( \
386391
ptype dim, MakeGlobalType<const __nativecpu_state> *s) { \
387392
return s->field[dim]; \
388393
} \
@@ -400,9 +405,9 @@ DefineStateGet_U64(get_num_groups, MNumGroups);
400405
DefineStateGet_U64(get_wg_size, MWorkGroup_size);
401406
DefineStateGet_U64(get_wg_id, MWorkGroup_id);
402407

403-
DEVICE_EXTERNAL_C
404-
void __dpcpp_nativecpu_set_local_id(uint32_t dim, uint64_t value,
405-
MakeGlobalType<__nativecpu_state> *s) {
408+
DEVICE_EXTERNAL_C USED void
409+
__dpcpp_nativecpu_set_local_id(uint32_t dim, uint64_t value,
410+
MakeGlobalType<__nativecpu_state> *s) {
406411
s->MLocal_id[dim] = value;
407412
s->MGlobal_id[dim] = s->MWorkGroup_size[dim] * s->MWorkGroup_id[dim] +
408413
s->MLocal_id[dim] + s->MGlobalOffset[dim];

0 commit comments

Comments
 (0)