Skip to content

Commit 4ae8003

Browse files
committed
temp
1 parent 728c3f1 commit 4ae8003

File tree

12 files changed

+1877
-124
lines changed

12 files changed

+1877
-124
lines changed

CMakeLists.txt

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,8 @@ include(${TORCH_XPU_OPS_ROOT}/cmake/SYCL.cmake)
3939
include(${TORCH_XPU_OPS_ROOT}/cmake/ONEMKL.cmake)
4040
include(${TORCH_XPU_OPS_ROOT}/cmake/BuildFlags.cmake)
4141

42+
set_build_flags()
43+
4244
# -- [ Re-generate the macros file for https://github.com/pytorch/pytorch/pull/147161
4345
macro(update_caffe2_macros_file)
4446
configure_file(
@@ -56,6 +58,11 @@ if(USE_XCCL)
5658
endif()
5759
endif()
5860

61+
set(USE_CUTLASS ON)
62+
if (USE_CUTLASS)
63+
include(${TORCH_XPU_OPS_ROOT}/cmake/CUTLASS.cmake)
64+
endif()
65+
5966
if(BUILD_TEST)
6067
add_subdirectory(${TORCH_XPU_OPS_ROOT}/test/sycl ${CMAKE_BINARY_DIR}/test_sycl)
6168
endif()

cmake/BuildFlags.cmake

Lines changed: 138 additions & 115 deletions
Original file line numberDiff line numberDiff line change
@@ -23,129 +23,152 @@ function(CHECK_SYCL_FLAG FLAG VARIABLE_NAME)
2323
endfunction()
2424

2525
# Support GCC on Linux and MSVC on Windows at the moment.
26-
if(CMAKE_CXX_COMPILER_ID STREQUAL "GNU" OR CMAKE_CXX_COMPILER_ID STREQUAL "MSVC")
27-
# # -- Host flags (SYCL_CXX_FLAGS)
28-
if(CMAKE_CXX_COMPILER_ID STREQUAL "MSVC")
29-
list(APPEND SYCL_HOST_FLAGS /std:c++17)
30-
list(APPEND SYCL_HOST_FLAGS /MD)
31-
list(APPEND SYCL_HOST_FLAGS /EHsc) # exception handling
32-
# SYCL headers warnings
33-
list(APPEND SYCL_HOST_FLAGS /wd4996) # allow usage of deprecated functions
34-
list(APPEND SYCL_HOST_FLAGS /wd4018) # allow signed and unsigned comparison
35-
elseif(CMAKE_CXX_COMPILER_ID STREQUAL "GNU")
36-
list(APPEND SYCL_HOST_FLAGS -fPIC)
37-
list(APPEND SYCL_HOST_FLAGS -std=c++17)
38-
list(APPEND SYCL_HOST_FLAGS -Wunused-variable)
39-
# SYCL headers warnings
40-
list(APPEND SYCL_HOST_FLAGS -Wno-deprecated-declarations)
41-
list(APPEND SYCL_HOST_FLAGS -Wno-deprecated)
42-
list(APPEND SYCL_HOST_FLAGS -Wno-attributes)
43-
list(APPEND SYCL_HOST_FLAGS -Wno-sign-compare)
44-
endif()
26+
macro(set_build_flags)
27+
if(CMAKE_CXX_COMPILER_ID STREQUAL "GNU" OR CMAKE_CXX_COMPILER_ID STREQUAL "MSVC")
28+
set(SYCL_HOST_FLAGS)
29+
set(SYCL_KERNEL_OPTIONS)
30+
set(SYCL_COMPILE_FLAGS ${SYCL_FLAGS})
31+
set(SYCL_DEVICE_LINK_FLAGS ${SYCL_LINK_FLAGS})
32+
set(SYCL_OFFLINE_COMPILER_AOT_OPTIONS)
33+
set(SYCL_OFFLINE_COMPILER_CG_OPTIONS)
34+
set(SYCL_OFFLINE_COMPILER_FLAGS)
4535

46-
if(CMAKE_BUILD_TYPE MATCHES Debug)
47-
list(APPEND SYCL_HOST_FLAGS -g -fno-omit-frame-pointer -O0)
48-
elseif(CMAKE_BUILD_TYPE MATCHES RelWithDebInfo)
49-
list(APPEND SYCL_HOST_FLAGS -g -O2)
50-
endif()
51-
if(USE_PER_OPERATOR_HEADERS)
52-
list(APPEND SYCL_HOST_FLAGS -DAT_PER_OPERATOR_HEADERS)
53-
endif()
54-
list(APPEND SYCL_HOST_FLAGS -D__INTEL_LLVM_COMPILER_VERSION=${__INTEL_LLVM_COMPILER})
55-
# -- Kernel flags (SYCL_KERNEL_OPTIONS)
56-
# The fast-math will be enabled by default in SYCL compiler.
57-
# Refer to [https://clang.llvm.org/docs/UsersManual.html#cmdoption-fno-fast-math]
58-
# 1. We enable below flags here to be warn about NaN and Infinity,
59-
# which will be hidden by fast-math by default.
60-
# 2. The associative-math in fast-math allows floating point
61-
# operations to be reassociated, which will lead to non-deterministic
62-
# results compared with CUDA backend.
63-
# 3. The approx-func allows certain math function calls (such as log, sqrt, pow, etc)
64-
# to be replaced with an approximately equivalent set of instructions or
65-
# alternative math function calls, which have great errors.
66-
#
67-
# PSEUDO of separate compilation with DPCPP compiler.
68-
# 1. Kernel source compilation:
69-
# icpx -fsycl -fsycl-target=${SYCL_TARGETS_OPTION} ${SYCL_FLAGS} -fsycl-host-compiler=gcc -fsycl-host-compiler-options='${CMAKE_HOST_FLAGS}' kernel.cpp -o kernel.o
70-
# 2. Device code linkage:
71-
# icpx -fsycl -fsycl-target=${SYCL_TARGETS_OPTION} -fsycl-link ${SYCL_DEVICE_LINK_FLAGS} -Xs '${SYCL_OFFLINE_COMPILER_FLAGS}' kernel.o -o device-code.o
72-
# 3. Host only source compilation:
73-
# gcc ${CMAKE_HOST_FLAGS} host.cpp -o host.o
74-
# 4. Linkage:
75-
# gcc -shared host.o kernel.o device-code.o -o libxxx.so
76-
set(SYCL_KERNEL_OPTIONS ${SYCL_KERNEL_OPTIONS} -fno-sycl-unnamed-lambda)
77-
set(SYCL_KERNEL_OPTIONS ${SYCL_KERNEL_OPTIONS} -sycl-std=2020)
78-
if(CMAKE_CXX_COMPILER_ID STREQUAL "MSVC")
79-
set(SYCL_KERNEL_OPTIONS ${SYCL_KERNEL_OPTIONS} /fp:strict)
80-
elseif(CMAKE_CXX_COMPILER_ID STREQUAL "GNU")
81-
set(SYCL_KERNEL_OPTIONS ${SYCL_KERNEL_OPTIONS} -fhonor-nans)
82-
set(SYCL_KERNEL_OPTIONS ${SYCL_KERNEL_OPTIONS} -fhonor-infinities)
83-
set(SYCL_KERNEL_OPTIONS ${SYCL_KERNEL_OPTIONS} -fno-associative-math)
84-
set(SYCL_KERNEL_OPTIONS ${SYCL_KERNEL_OPTIONS} -fno-approx-func)
85-
set(SYCL_KERNEL_OPTIONS ${SYCL_KERNEL_OPTIONS} -Wno-absolute-value)
86-
set(SYCL_KERNEL_OPTIONS ${SYCL_KERNEL_OPTIONS} -no-ftz)
87-
endif()
36+
if(REPLACE_FLAGS_FOR_CUTLASS)
37+
set(CPP_STD c++20)
38+
else()
39+
set(CPP_STD c++17)
40+
endif()
41+
# # -- Host flags (SYCL_CXX_FLAGS)
42+
if(CMAKE_CXX_COMPILER_ID STREQUAL "MSVC")
43+
list(APPEND SYCL_HOST_FLAGS /std:${CPP_STD})
44+
list(APPEND SYCL_HOST_FLAGS /MD)
45+
list(APPEND SYCL_HOST_FLAGS /EHsc) # exception handling
46+
# SYCL headers warnings
47+
list(APPEND SYCL_HOST_FLAGS /wd4996) # allow usage of deprecated functions
48+
list(APPEND SYCL_HOST_FLAGS /wd4018) # allow signed and unsigned comparison
49+
elseif(CMAKE_CXX_COMPILER_ID STREQUAL "GNU")
50+
list(APPEND SYCL_HOST_FLAGS -fPIC)
51+
list(APPEND SYCL_HOST_FLAGS -std=${CPP_STD})
52+
list(APPEND SYCL_HOST_FLAGS -Wunused-variable)
53+
# SYCL headers warnings
54+
list(APPEND SYCL_HOST_FLAGS -Wno-deprecated-declarations)
55+
list(APPEND SYCL_HOST_FLAGS -Wno-deprecated)
56+
list(APPEND SYCL_HOST_FLAGS -Wno-attributes)
57+
list(APPEND SYCL_HOST_FLAGS -Wno-sign-compare)
58+
endif()
8859

89-
if(CMAKE_BUILD_TYPE MATCHES Debug)
90-
set(SYCL_KERNEL_OPTIONS ${SYCL_KERNEL_OPTIONS} -g -O0 -Rno-debug-disables-optimization)
91-
elseif(CMAKE_BUILD_TYPE MATCHES RelWithDebInfo)
92-
set(SYCL_KERNEL_OPTIONS ${SYCL_KERNEL_OPTIONS} -gline-tables-only -O2)
93-
endif()
60+
if(CMAKE_BUILD_TYPE MATCHES Debug)
61+
list(APPEND SYCL_HOST_FLAGS -g -fno-omit-frame-pointer -O0)
62+
elseif(CMAKE_BUILD_TYPE MATCHES RelWithDebInfo)
63+
list(APPEND SYCL_HOST_FLAGS -g -O2)
64+
endif()
65+
if(USE_PER_OPERATOR_HEADERS)
66+
list(APPEND SYCL_HOST_FLAGS -DAT_PER_OPERATOR_HEADERS)
67+
endif()
68+
list(APPEND SYCL_HOST_FLAGS -D__INTEL_LLVM_COMPILER_VERSION=${__INTEL_LLVM_COMPILER})
69+
# -- Kernel flags (SYCL_KERNEL_OPTIONS)
70+
# The fast-math will be enabled by default in SYCL compiler.
71+
# Refer to [https://clang.llvm.org/docs/UsersManual.html#cmdoption-fno-fast-math]
72+
# 1. We enable below flags here to be warn about NaN and Infinity,
73+
# which will be hidden by fast-math by default.
74+
# 2. The associative-math in fast-math allows floating point
75+
# operations to be reassociated, which will lead to non-deterministic
76+
# results compared with CUDA backend.
77+
# 3. The approx-func allows certain math function calls (such as log, sqrt, pow, etc)
78+
# to be replaced with an approximately equivalent set of instructions or
79+
# alternative math function calls, which have great errors.
80+
#
81+
# PSEUDO of separate compilation with DPCPP compiler.
82+
# 1. Kernel source compilation:
83+
# icpx -fsycl -fsycl-target=${SYCL_TARGETS_OPTION} ${SYCL_KERNEL_OPTIONS} -fsycl-host-compiler=gcc -fsycl-host-compiler-options='${CMAKE_HOST_FLAGS}' kernel.cpp -o kernel.o
84+
# 2. Device code linkage:
85+
# icpx -fsycl -fsycl-target=${SYCL_TARGETS_OPTION} -fsycl-link ${SYCL_DEVICE_LINK_FLAGS} -Xs '${SYCL_OFFLINE_COMPILER_FLAGS}' kernel.o -o device-code.o
86+
# 3. Host only source compilation:
87+
# gcc ${CMAKE_HOST_FLAGS} host.cpp -o host.o
88+
# 4. Linkage:
89+
# gcc -shared host.o kernel.o device-code.o -o libxxx.so
90+
set(SYCL_KERNEL_OPTIONS ${SYCL_KERNEL_OPTIONS} -fno-sycl-unnamed-lambda)
91+
set(SYCL_KERNEL_OPTIONS ${SYCL_KERNEL_OPTIONS} -sycl-std=2020)
92+
if(CMAKE_CXX_COMPILER_ID STREQUAL "MSVC")
93+
set(SYCL_KERNEL_OPTIONS ${SYCL_KERNEL_OPTIONS} /fp:strict)
94+
elseif(CMAKE_CXX_COMPILER_ID STREQUAL "GNU")
95+
set(SYCL_KERNEL_OPTIONS ${SYCL_KERNEL_OPTIONS} -fhonor-nans)
96+
set(SYCL_KERNEL_OPTIONS ${SYCL_KERNEL_OPTIONS} -fhonor-infinities)
97+
set(SYCL_KERNEL_OPTIONS ${SYCL_KERNEL_OPTIONS} -fno-associative-math)
98+
set(SYCL_KERNEL_OPTIONS ${SYCL_KERNEL_OPTIONS} -fno-approx-func)
99+
set(SYCL_KERNEL_OPTIONS ${SYCL_KERNEL_OPTIONS} -Wno-absolute-value)
100+
set(SYCL_KERNEL_OPTIONS ${SYCL_KERNEL_OPTIONS} -no-ftz)
101+
endif()
94102

95-
set(SYCL_KERNEL_OPTIONS ${SYCL_KERNEL_OPTIONS} -D__INTEL_LLVM_COMPILER_VERSION=${__INTEL_LLVM_COMPILER})
103+
if(CMAKE_BUILD_TYPE MATCHES Debug)
104+
set(SYCL_KERNEL_OPTIONS ${SYCL_KERNEL_OPTIONS} -g -O0 -Rno-debug-disables-optimization)
105+
elseif(CMAKE_BUILD_TYPE MATCHES RelWithDebInfo)
106+
set(SYCL_KERNEL_OPTIONS ${SYCL_KERNEL_OPTIONS} -gline-tables-only -O2)
107+
endif()
96108

97-
CHECK_SYCL_FLAG("-fsycl-fp64-conv-emu" SUPPORTS_FP64_CONV_EMU)
98-
if(SUPPORTS_FP64_CONV_EMU)
99-
set(SYCL_KERNEL_OPTIONS ${SYCL_KERNEL_OPTIONS} -fsycl-fp64-conv-emu)
100-
else()
101-
message(WARNING "The compiler does not support the '-fsycl-fp64-conv-emu' flag, \
102-
will disable it. On some platforms that don't support FP64, \
103-
running operations with the FP64 datatype will raise a Runtime error: Required aspect fp64 is not supported on the device \
104-
or a Native API failed error.")
105-
endif()
109+
set(SYCL_KERNEL_OPTIONS ${SYCL_KERNEL_OPTIONS} -D__INTEL_LLVM_COMPILER_VERSION=${__INTEL_LLVM_COMPILER})
106110

107-
set(TORCH_XPU_OPS_FLAGS ${SYCL_HOST_FLAGS})
111+
CHECK_SYCL_FLAG("-fsycl-fp64-conv-emu" SUPPORTS_FP64_CONV_EMU)
112+
if(SUPPORTS_FP64_CONV_EMU)
113+
set(SYCL_KERNEL_OPTIONS ${SYCL_KERNEL_OPTIONS} -fsycl-fp64-conv-emu)
114+
else()
115+
message(WARNING "The compiler does not support the '-fsycl-fp64-conv-emu' flag, \
116+
will disable it. On some platforms that don't support FP64, \
117+
running operations with the FP64 datatype will raise a Runtime error: Required aspect fp64 is not supported on the device \
118+
or a Native API failed error.")
119+
endif()
108120

109-
# -- SYCL device object linkage flags
110-
include(ProcessorCount)
111-
ProcessorCount(proc_cnt)
112-
if((DEFINED ENV{MAX_JOBS}) AND ("$ENV{MAX_JOBS}" LESS_EQUAL ${proc_cnt}))
113-
set(SYCL_MAX_PARALLEL_LINK_JOBS $ENV{MAX_JOBS})
114-
else()
115-
set(SYCL_MAX_PARALLEL_LINK_JOBS ${proc_cnt})
116-
endif()
117-
set(SYCL_DEVICE_LINK_FLAGS ${SYCL_DEVICE_LINK_FLAGS} -fsycl-max-parallel-link-jobs=${SYCL_MAX_PARALLEL_LINK_JOBS})
118-
set(SYCL_DEVICE_LINK_FLAGS ${SYCL_DEVICE_LINK_FLAGS} --offload-compress)
121+
set(TORCH_XPU_OPS_FLAGS ${SYCL_HOST_FLAGS})
119122

120-
set(SYCL_OFFLINE_COMPILER_CG_OPTIONS "${SYCL_OFFLINE_COMPILER_CG_OPTIONS} -options -cl-poison-unsupported-fp64-kernels")
121-
set(SYCL_OFFLINE_COMPILER_CG_OPTIONS "${SYCL_OFFLINE_COMPILER_CG_OPTIONS} -options -cl-intel-enable-auto-large-GRF-mode")
122-
set(SYCL_OFFLINE_COMPILER_CG_OPTIONS "${SYCL_OFFLINE_COMPILER_CG_OPTIONS} -options -cl-fp32-correctly-rounded-divide-sqrt")
123-
set(SYCL_OFFLINE_COMPILER_CG_OPTIONS "${SYCL_OFFLINE_COMPILER_CG_OPTIONS} -options -cl-intel-greater-than-4GB-buffer-required")
123+
# -- SYCL device object linkage flags
124+
include(ProcessorCount)
125+
ProcessorCount(proc_cnt)
126+
if((DEFINED ENV{MAX_JOBS}) AND ("$ENV{MAX_JOBS}" LESS_EQUAL ${proc_cnt}))
127+
set(SYCL_MAX_PARALLEL_LINK_JOBS $ENV{MAX_JOBS})
128+
else()
129+
set(SYCL_MAX_PARALLEL_LINK_JOBS ${proc_cnt})
130+
endif()
131+
set(SYCL_DEVICE_LINK_FLAGS ${SYCL_DEVICE_LINK_FLAGS} -fsycl-max-parallel-link-jobs=${SYCL_MAX_PARALLEL_LINK_JOBS})
132+
set(SYCL_DEVICE_LINK_FLAGS ${SYCL_DEVICE_LINK_FLAGS} --offload-compress)
124133

134+
set(SYCL_OFFLINE_COMPILER_CG_OPTIONS "${SYCL_OFFLINE_COMPILER_CG_OPTIONS} -options -cl-poison-unsupported-fp64-kernels")
135+
set(SYCL_OFFLINE_COMPILER_CG_OPTIONS "${SYCL_OFFLINE_COMPILER_CG_OPTIONS} -options -cl-intel-enable-auto-large-GRF-mode")
136+
set(SYCL_OFFLINE_COMPILER_CG_OPTIONS "${SYCL_OFFLINE_COMPILER_CG_OPTIONS} -options -cl-fp32-correctly-rounded-divide-sqrt")
137+
set(SYCL_OFFLINE_COMPILER_CG_OPTIONS "${SYCL_OFFLINE_COMPILER_CG_OPTIONS} -options -cl-intel-greater-than-4GB-buffer-required")
125138

126-
if(WIN32)
127-
set(AOT_TARGETS "mtl,mtl-h,bmg,dg2,arl-h,lnl-m")
128-
else()
129-
set(AOT_TARGETS "pvc,bmg,dg2,arl-h,mtl-h,lnl-m")
130-
endif()
131-
if(TORCH_XPU_ARCH_LIST)
132-
set(AOT_TARGETS "${TORCH_XPU_ARCH_LIST}")
133-
endif()
134-
if(AOT_TARGETS STREQUAL "none")
135-
set(TORCH_XPU_ARCH_LIST "" PARENT_SCOPE)
136-
else()
137-
set(SYCL_TARGETS_OPTION -fsycl-targets=spir64_gen,spir64)
138-
set(SYCL_KERNEL_OPTIONS ${SYCL_KERNEL_OPTIONS} ${SYCL_TARGETS_OPTION})
139-
set(SYCL_DEVICE_LINK_FLAGS ${SYCL_DEVICE_LINK_FLAGS} ${SYCL_TARGETS_OPTION})
140-
set(SYCL_OFFLINE_COMPILER_AOT_OPTIONS "-device ${AOT_TARGETS}")
141-
set(TORCH_XPU_ARCH_LIST ${AOT_TARGETS} PARENT_SCOPE)
142-
endif()
143-
message(STATUS "Compile Intel GPU AOT Targets for ${AOT_TARGETS}")
144139

145-
set(SYCL_FLAGS ${SYCL_FLAGS} ${SYCL_KERNEL_OPTIONS})
140+
if(REPLACE_FLAGS_FOR_CUTLASS)
141+
set(SYCL_TARGETS_OPTION -fsycl-targets=spir64_gen)
142+
set(SYCL_KERNEL_OPTIONS ${SYCL_KERNEL_OPTIONS} ${SYCL_TARGETS_OPTION})
143+
set(SYCL_DEVICE_LINK_FLAGS ${SYCL_DEVICE_LINK_FLAGS} ${SYCL_TARGETS_OPTION})
144+
set(SYCL_DEVICE_LINK_FLAGS ${SYCL_DEVICE_LINK_FLAGS} "-Xspirv-translator;-spirv-ext=+SPV_INTEL_split_barrier,+SPV_INTEL_2d_block_io,+SPV_INTEL_subgroup_matrix_multiply_accumulate")
145+
set(SYCL_OFFLINE_COMPILER_AOT_OPTIONS "-device pvc,bmg")
146+
else()
147+
if(WIN32)
148+
set(AOT_TARGETS "mtl,mtl-h,bmg,dg2,arl-h,lnl-m")
149+
else()
150+
set(AOT_TARGETS "pvc,bmg,dg2,arl-h,mtl-h,lnl-m")
151+
endif()
152+
if(TORCH_XPU_ARCH_LIST)
153+
set(AOT_TARGETS "${TORCH_XPU_ARCH_LIST}")
154+
endif()
155+
if(AOT_TARGETS STREQUAL "none")
156+
set(TORCH_XPU_ARCH_LIST "" PARENT_SCOPE)
157+
else()
158+
set(SYCL_TARGETS_OPTION -fsycl-targets=spir64_gen,spir64)
159+
set(SYCL_KERNEL_OPTIONS ${SYCL_KERNEL_OPTIONS} ${SYCL_TARGETS_OPTION})
160+
set(SYCL_DEVICE_LINK_FLAGS ${SYCL_DEVICE_LINK_FLAGS} ${SYCL_TARGETS_OPTION})
161+
set(SYCL_OFFLINE_COMPILER_AOT_OPTIONS "-device ${AOT_TARGETS}")
162+
set(TORCH_XPU_ARCH_LIST ${AOT_TARGETS} PARENT_SCOPE)
163+
endif()
164+
message(STATUS "Compile Intel GPU AOT Targets for ${AOT_TARGETS}")
165+
endif()
166+
167+
set(SYCL_COMPILE_FLAGS ${SYCL_COMPILE_FLAGS} ${SYCL_KERNEL_OPTIONS})
146168

147-
set(SYCL_OFFLINE_COMPILER_FLAGS "${SYCL_OFFLINE_COMPILER_AOT_OPTIONS}${SYCL_OFFLINE_COMPILER_CG_OPTIONS}")
148-
else()
149-
message("Not compiling with XPU. Currently only support GCC compiler on Linux and MSVC compiler on Windows as CXX compiler.")
150-
return()
151-
endif()
169+
set(SYCL_OFFLINE_COMPILER_FLAGS "${SYCL_OFFLINE_COMPILER_AOT_OPTIONS}${SYCL_OFFLINE_COMPILER_CG_OPTIONS}")
170+
else()
171+
message("Not compiling with XPU. Currently only support GCC compiler on Linux and MSVC compiler on Windows as CXX compiler.")
172+
return()
173+
endif()
174+
endmacro()

cmake/CUTLASS.cmake

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,29 @@
1+
macro(replace_cmake_build_flags)
2+
set(CMAKE_C_FLAG_BK "${CMAKE_C_FLAGS}")
3+
set(CMAKE_CXX_FLAGS_BK "${CMAKE_CXX_FLAGS}")
4+
string(REPLACE "-Werror=format" "" CMAKE_C_FLAGS "${CMAKE_C_FLAGS}")
5+
string(REPLACE "-Werror=format" "" CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS}")
6+
endmacro()
7+
8+
macro(restore_cmake_build_flags)
9+
set(CMAKE_C_FLAGS "${CMAKE_C_FLAG_BK}")
10+
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS_BK}")
11+
endmacro()
12+
13+
if(NOT __CUTLASS_INCLUDED)
14+
set(__CUTLASS_INCLUDED TRUE)
15+
include(FetchContent)
16+
FetchContent_Declare(
17+
repo-cutlass-sycl
18+
GIT_REPOSITORY https://github.com/LiyangLingIntel/cutlass-sycl.git # https://github.com/intel/cutlass-sycl
19+
GIT_TAG liyang/unnamed-poc # main
20+
GIT_SHALLOW OFF
21+
)
22+
FetchContent_GetProperties(repo-cutlass-sycl)
23+
if(NOT repo-cutlass-sycl_POPULATED)
24+
FetchContent_Populate(repo-cutlass-sycl)
25+
endif()
26+
set(CUTLASS_SYCL_INCLUDE_DIRS ${repo-cutlass-sycl_SOURCE_DIR}/include
27+
${repo-cutlass-sycl_SOURCE_DIR}/tools/util/include)
28+
set(CUTLASS_SYCL_COMPILE_DEFINITIONS CUTLASS_ENABLE_SYCL SYCL_INTEL_TARGET)
29+
endif()

cmake/Modules/FindSYCL.cmake

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@
1010
# SYCL_COMPILER
1111
# -- SYCL compiler's executable.
1212
#
13-
# SYCL_FLAGS
13+
# SYCL_COMPILE_FLAGS
1414
# -- SYCL compiler's compilation command line arguments.
1515
#
1616
# SYCL_HOST_FLAGS
@@ -212,7 +212,6 @@ endfunction()
212212

213213
macro(SYCL_WRAP_SRCS sycl_target generated_files)
214214
# Optional arguments
215-
set(SYCL_flags "")
216215
set(generated_extension ${CMAKE_${SYCL_C_OR_CXX}_OUTPUT_EXTENSION})
217216

218217
set(SYCL_include_dirs "${SYCL_INCLUDE_DIR}")
@@ -383,7 +382,6 @@ macro(SYCL_LINK_DEVICE_OBJECTS output_file sycl_target)
383382
set(SYCL_device_link_flags
384383
${link_type_flag}
385384
${important_host_flags}
386-
${SYCL_FLAGS}
387385
${SYCL_DEVICE_LINK_FLAGS})
388386

389387
file(RELATIVE_PATH output_file_relative_path "${CMAKE_BINARY_DIR}" "${output_file}")

cmake/Modules/FindSYCL/run_sycl.cmake

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,7 @@ set(SYCL_host_compiler "@SYCL_HOST_COMPILER@") # path
2727
set(generated_file_path "@generated_file_path@") # path
2828
set(generated_file_internal "@generated_file@") # path
2929
set(SYCL_executable "@SYCL_EXECUTABLE@") # path
30-
set(SYCL_flags @SYCL_FLAGS@) # list
30+
set(SYCL_compile_flags @SYCL_COMPILE_FLAGS@) # list
3131
set(SYCL_include_dirs [==[@SYCL_include_dirs@]==]) # list
3232
set(SYCL_compile_definitions [==[@SYCL_compile_definitions@]==]) # list
3333

@@ -47,10 +47,10 @@ foreach(dir ${SYCL_include_dirs})
4747
endif()
4848
endforeach()
4949

50-
# Clean up list of compile definitions, add -D flags, and append to SYCL_flags
50+
# Clean up list of compile definitions, add -D flags, and append to SYCL_compile_flags
5151
list(REMOVE_DUPLICATES SYCL_compile_definitions)
5252
foreach(def ${SYCL_compile_definitions})
53-
list(APPEND SYCL_flags "-D${def}")
53+
list(APPEND SYCL_compile_flags "-D${def}")
5454
endforeach()
5555

5656
# Choose host flags in FindSYCL.cmake
@@ -72,7 +72,7 @@ foreach(def ${SYCL_compile_definitions})
7272
endforeach()
7373

7474
# string(APPEND SYCL_host_compiler_flags "\"")
75-
set(SYCL_host_compiler "-fsycl-host-compiler=${SYCL_host_compiler}")
75+
set(SYCL_host_compiler "-fsycl-host-compiler=g++-13")
7676

7777
# SYCL_execute_process - Executes a command with optional command echo and status message.
7878
#
@@ -134,7 +134,7 @@ SYCL_execute_process(
134134
${SYCL_include_args}
135135
${SYCL_host_compiler}
136136
${SYCL_host_compiler_flags}
137-
${SYCL_flags}
137+
${SYCL_compile_flags}
138138
)
139139

140140
if(SYCL_result)

0 commit comments

Comments
 (0)