@@ -23,129 +23,152 @@ function(CHECK_SYCL_FLAG FLAG VARIABLE_NAME)
2323endfunction ()
2424
2525# Support GCC on Linux and MSVC on Windows at the moment.
26- if (CMAKE_CXX_COMPILER_ID STREQUAL "GNU" OR CMAKE_CXX_COMPILER_ID STREQUAL "MSVC" )
27- # # -- Host flags (SYCL_CXX_FLAGS)
28- if (CMAKE_CXX_COMPILER_ID STREQUAL "MSVC" )
29- list (APPEND SYCL_HOST_FLAGS /std:c++17)
30- list (APPEND SYCL_HOST_FLAGS /MD )
31- list (APPEND SYCL_HOST_FLAGS /EHsc) # exception handling
32- # SYCL headers warnings
33- list (APPEND SYCL_HOST_FLAGS /wd4996) # allow usage of deprecated functions
34- list (APPEND SYCL_HOST_FLAGS /wd4018) # allow signed and unsigned comparison
35- elseif (CMAKE_CXX_COMPILER_ID STREQUAL "GNU" )
36- list (APPEND SYCL_HOST_FLAGS -fPIC)
37- list (APPEND SYCL_HOST_FLAGS -std=c++17)
38- list (APPEND SYCL_HOST_FLAGS -Wunused-variable )
39- # SYCL headers warnings
40- list (APPEND SYCL_HOST_FLAGS -Wno-deprecated-declarations)
41- list (APPEND SYCL_HOST_FLAGS -Wno-deprecated)
42- list (APPEND SYCL_HOST_FLAGS -Wno-attributes)
43- list (APPEND SYCL_HOST_FLAGS -Wno-sign-compare)
44- endif ()
26+ macro (set_build_flags)
27+ if (CMAKE_CXX_COMPILER_ID STREQUAL "GNU" OR CMAKE_CXX_COMPILER_ID STREQUAL "MSVC" )
28+ set (SYCL_HOST_FLAGS)
29+ set (SYCL_KERNEL_OPTIONS)
30+ set (SYCL_COMPILE_FLAGS ${SYCL_FLAGS} )
31+ set (SYCL_DEVICE_LINK_FLAGS ${SYCL_LINK_FLAGS} )
32+ set (SYCL_OFFLINE_COMPILER_AOT_OPTIONS)
33+ set (SYCL_OFFLINE_COMPILER_CG_OPTIONS)
34+ set (SYCL_OFFLINE_COMPILER_FLAGS)
4535
46- if (CMAKE_BUILD_TYPE MATCHES Debug)
47- list (APPEND SYCL_HOST_FLAGS -g -fno-omit-frame-pointer -O0)
48- elseif (CMAKE_BUILD_TYPE MATCHES RelWithDebInfo)
49- list (APPEND SYCL_HOST_FLAGS -g -O2)
50- endif ()
51- if (USE_PER_OPERATOR_HEADERS)
52- list (APPEND SYCL_HOST_FLAGS -DAT_PER_OPERATOR_HEADERS)
53- endif ()
54- list (APPEND SYCL_HOST_FLAGS -D__INTEL_LLVM_COMPILER_VERSION=${__INTEL_LLVM_COMPILER} )
55- # -- Kernel flags (SYCL_KERNEL_OPTIONS)
56- # The fast-math will be enabled by default in SYCL compiler.
57- # Refer to [https://clang.llvm.org/docs/UsersManual.html#cmdoption-fno-fast-math]
58- # 1. We enable below flags here to be warn about NaN and Infinity,
59- # which will be hidden by fast-math by default.
60- # 2. The associative-math in fast-math allows floating point
61- # operations to be reassociated, which will lead to non-deterministic
62- # results compared with CUDA backend.
63- # 3. The approx-func allows certain math function calls (such as log, sqrt, pow, etc)
64- # to be replaced with an approximately equivalent set of instructions or
65- # alternative math function calls, which have great errors.
66- #
67- # PSEUDO of separate compilation with DPCPP compiler.
68- # 1. Kernel source compilation:
69- # icpx -fsycl -fsycl-target=${SYCL_TARGETS_OPTION} ${SYCL_FLAGS} -fsycl-host-compiler=gcc -fsycl-host-compiler-options='${CMAKE_HOST_FLAGS}' kernel.cpp -o kernel.o
70- # 2. Device code linkage:
71- # icpx -fsycl -fsycl-target=${SYCL_TARGETS_OPTION} -fsycl-link ${SYCL_DEVICE_LINK_FLAGS} -Xs '${SYCL_OFFLINE_COMPILER_FLAGS}' kernel.o -o device-code.o
72- # 3. Host only source compilation:
73- # gcc ${CMAKE_HOST_FLAGS} host.cpp -o host.o
74- # 4. Linkage:
75- # gcc -shared host.o kernel.o device-code.o -o libxxx.so
76- set (SYCL_KERNEL_OPTIONS ${SYCL_KERNEL_OPTIONS} -fno-sycl-unnamed-lambda)
77- set (SYCL_KERNEL_OPTIONS ${SYCL_KERNEL_OPTIONS} -sycl-std=2020)
78- if (CMAKE_CXX_COMPILER_ID STREQUAL "MSVC" )
79- set (SYCL_KERNEL_OPTIONS ${SYCL_KERNEL_OPTIONS} /fp:strict)
80- elseif (CMAKE_CXX_COMPILER_ID STREQUAL "GNU" )
81- set (SYCL_KERNEL_OPTIONS ${SYCL_KERNEL_OPTIONS} -fhonor-nans)
82- set (SYCL_KERNEL_OPTIONS ${SYCL_KERNEL_OPTIONS} -fhonor-infinities)
83- set (SYCL_KERNEL_OPTIONS ${SYCL_KERNEL_OPTIONS} -fno-associative-math)
84- set (SYCL_KERNEL_OPTIONS ${SYCL_KERNEL_OPTIONS} -fno-approx-func)
85- set (SYCL_KERNEL_OPTIONS ${SYCL_KERNEL_OPTIONS} -Wno-absolute -value )
86- set (SYCL_KERNEL_OPTIONS ${SYCL_KERNEL_OPTIONS} -no -ftz)
87- endif ()
36+ if (REPLACE_FLAGS_FOR_CUTLASS)
37+ set (CPP_STD c++20)
38+ else ()
39+ set (CPP_STD c++17)
40+ endif ()
41+ # # -- Host flags (SYCL_CXX_FLAGS)
42+ if (CMAKE_CXX_COMPILER_ID STREQUAL "MSVC" )
43+ list (APPEND SYCL_HOST_FLAGS /std:${CPP_STD} )
44+ list (APPEND SYCL_HOST_FLAGS /MD )
45+ list (APPEND SYCL_HOST_FLAGS /EHsc) # exception handling
46+ # SYCL headers warnings
47+ list (APPEND SYCL_HOST_FLAGS /wd4996) # allow usage of deprecated functions
48+ list (APPEND SYCL_HOST_FLAGS /wd4018) # allow signed and unsigned comparison
49+ elseif (CMAKE_CXX_COMPILER_ID STREQUAL "GNU" )
50+ list (APPEND SYCL_HOST_FLAGS -fPIC)
51+ list (APPEND SYCL_HOST_FLAGS -std=${CPP_STD} )
52+ list (APPEND SYCL_HOST_FLAGS -Wunused-variable )
53+ # SYCL headers warnings
54+ list (APPEND SYCL_HOST_FLAGS -Wno-deprecated-declarations)
55+ list (APPEND SYCL_HOST_FLAGS -Wno-deprecated)
56+ list (APPEND SYCL_HOST_FLAGS -Wno-attributes)
57+ list (APPEND SYCL_HOST_FLAGS -Wno-sign-compare)
58+ endif ()
8859
89- if (CMAKE_BUILD_TYPE MATCHES Debug)
90- set (SYCL_KERNEL_OPTIONS ${SYCL_KERNEL_OPTIONS} -g -O0 -Rno-debug-disables-optimization)
91- elseif (CMAKE_BUILD_TYPE MATCHES RelWithDebInfo)
92- set (SYCL_KERNEL_OPTIONS ${SYCL_KERNEL_OPTIONS} -gline-tables-only -O2)
93- endif ()
60+ if (CMAKE_BUILD_TYPE MATCHES Debug)
61+ list (APPEND SYCL_HOST_FLAGS -g -fno-omit-frame-pointer -O0)
62+ elseif (CMAKE_BUILD_TYPE MATCHES RelWithDebInfo)
63+ list (APPEND SYCL_HOST_FLAGS -g -O2)
64+ endif ()
65+ if (USE_PER_OPERATOR_HEADERS)
66+ list (APPEND SYCL_HOST_FLAGS -DAT_PER_OPERATOR_HEADERS)
67+ endif ()
68+ list (APPEND SYCL_HOST_FLAGS -D__INTEL_LLVM_COMPILER_VERSION=${__INTEL_LLVM_COMPILER} )
69+ # -- Kernel flags (SYCL_KERNEL_OPTIONS)
70+ # The fast-math will be enabled by default in SYCL compiler.
71+ # Refer to [https://clang.llvm.org/docs/UsersManual.html#cmdoption-fno-fast-math]
72+ # 1. We enable below flags here to be warn about NaN and Infinity,
73+ # which will be hidden by fast-math by default.
74+ # 2. The associative-math in fast-math allows floating point
75+ # operations to be reassociated, which will lead to non-deterministic
76+ # results compared with CUDA backend.
77+ # 3. The approx-func allows certain math function calls (such as log, sqrt, pow, etc)
78+ # to be replaced with an approximately equivalent set of instructions or
79+ # alternative math function calls, which have great errors.
80+ #
81+ # PSEUDO of separate compilation with DPCPP compiler.
82+ # 1. Kernel source compilation:
83+ # icpx -fsycl -fsycl-target=${SYCL_TARGETS_OPTION} ${SYCL_KERNEL_OPTIONS} -fsycl-host-compiler=gcc -fsycl-host-compiler-options='${CMAKE_HOST_FLAGS}' kernel.cpp -o kernel.o
84+ # 2. Device code linkage:
85+ # icpx -fsycl -fsycl-target=${SYCL_TARGETS_OPTION} -fsycl-link ${SYCL_DEVICE_LINK_FLAGS} -Xs '${SYCL_OFFLINE_COMPILER_FLAGS}' kernel.o -o device-code.o
86+ # 3. Host only source compilation:
87+ # gcc ${CMAKE_HOST_FLAGS} host.cpp -o host.o
88+ # 4. Linkage:
89+ # gcc -shared host.o kernel.o device-code.o -o libxxx.so
90+ set (SYCL_KERNEL_OPTIONS ${SYCL_KERNEL_OPTIONS} -fno-sycl-unnamed-lambda)
91+ set (SYCL_KERNEL_OPTIONS ${SYCL_KERNEL_OPTIONS} -sycl-std=2020)
92+ if (CMAKE_CXX_COMPILER_ID STREQUAL "MSVC" )
93+ set (SYCL_KERNEL_OPTIONS ${SYCL_KERNEL_OPTIONS} /fp:strict)
94+ elseif (CMAKE_CXX_COMPILER_ID STREQUAL "GNU" )
95+ set (SYCL_KERNEL_OPTIONS ${SYCL_KERNEL_OPTIONS} -fhonor-nans)
96+ set (SYCL_KERNEL_OPTIONS ${SYCL_KERNEL_OPTIONS} -fhonor-infinities)
97+ set (SYCL_KERNEL_OPTIONS ${SYCL_KERNEL_OPTIONS} -fno-associative-math)
98+ set (SYCL_KERNEL_OPTIONS ${SYCL_KERNEL_OPTIONS} -fno-approx-func)
99+ set (SYCL_KERNEL_OPTIONS ${SYCL_KERNEL_OPTIONS} -Wno-absolute -value )
100+ set (SYCL_KERNEL_OPTIONS ${SYCL_KERNEL_OPTIONS} -no -ftz)
101+ endif ()
94102
95- set (SYCL_KERNEL_OPTIONS ${SYCL_KERNEL_OPTIONS} -D__INTEL_LLVM_COMPILER_VERSION=${__INTEL_LLVM_COMPILER} )
103+ if (CMAKE_BUILD_TYPE MATCHES Debug)
104+ set (SYCL_KERNEL_OPTIONS ${SYCL_KERNEL_OPTIONS} -g -O0 -Rno-debug-disables-optimization)
105+ elseif (CMAKE_BUILD_TYPE MATCHES RelWithDebInfo)
106+ set (SYCL_KERNEL_OPTIONS ${SYCL_KERNEL_OPTIONS} -gline-tables-only -O2)
107+ endif ()
96108
97- CHECK_SYCL_FLAG("-fsycl-fp64-conv-emu" SUPPORTS_FP64_CONV_EMU)
98- if (SUPPORTS_FP64_CONV_EMU)
99- set (SYCL_KERNEL_OPTIONS ${SYCL_KERNEL_OPTIONS} -fsycl-fp64-conv-emu)
100- else ()
101- message (WARNING "The compiler does not support the '-fsycl-fp64-conv-emu' flag, \
102- will disable it. On some platforms that don't support FP64, \
103- running operations with the FP64 datatype will raise a Runtime error: Required aspect fp64 is not supported on the device \
104- or a Native API failed error." )
105- endif ()
109+ set (SYCL_KERNEL_OPTIONS ${SYCL_KERNEL_OPTIONS} -D__INTEL_LLVM_COMPILER_VERSION=${__INTEL_LLVM_COMPILER} )
106110
107- set (TORCH_XPU_OPS_FLAGS ${SYCL_HOST_FLAGS} )
111+ CHECK_SYCL_FLAG("-fsycl-fp64-conv-emu" SUPPORTS_FP64_CONV_EMU)
112+ if (SUPPORTS_FP64_CONV_EMU)
113+ set (SYCL_KERNEL_OPTIONS ${SYCL_KERNEL_OPTIONS} -fsycl-fp64-conv-emu)
114+ else ()
115+ message (WARNING "The compiler does not support the '-fsycl-fp64-conv-emu' flag, \
116+ will disable it. On some platforms that don't support FP64, \
117+ running operations with the FP64 datatype will raise a Runtime error: Required aspect fp64 is not supported on the device \
118+ or a Native API failed error." )
119+ endif ()
108120
109- # -- SYCL device object linkage flags
110- include (ProcessorCount)
111- ProcessorCount(proc_cnt)
112- if ((DEFINED ENV{MAX_JOBS}) AND ("$ENV{MAX_JOBS} " LESS_EQUAL ${proc_cnt} ))
113- set (SYCL_MAX_PARALLEL_LINK_JOBS $ENV{MAX_JOBS} )
114- else ()
115- set (SYCL_MAX_PARALLEL_LINK_JOBS ${proc_cnt} )
116- endif ()
117- set (SYCL_DEVICE_LINK_FLAGS ${SYCL_DEVICE_LINK_FLAGS} -fsycl-max-parallel-link-jobs=${SYCL_MAX_PARALLEL_LINK_JOBS} )
118- set (SYCL_DEVICE_LINK_FLAGS ${SYCL_DEVICE_LINK_FLAGS} --offload-compress)
121+ set (TORCH_XPU_OPS_FLAGS ${SYCL_HOST_FLAGS} )
119122
120- set (SYCL_OFFLINE_COMPILER_CG_OPTIONS "${SYCL_OFFLINE_COMPILER_CG_OPTIONS} -options -cl-poison-unsupported-fp64-kernels" )
121- set (SYCL_OFFLINE_COMPILER_CG_OPTIONS "${SYCL_OFFLINE_COMPILER_CG_OPTIONS} -options -cl-intel-enable-auto-large-GRF-mode" )
122- set (SYCL_OFFLINE_COMPILER_CG_OPTIONS "${SYCL_OFFLINE_COMPILER_CG_OPTIONS} -options -cl-fp32-correctly-rounded-divide-sqrt" )
123- set (SYCL_OFFLINE_COMPILER_CG_OPTIONS "${SYCL_OFFLINE_COMPILER_CG_OPTIONS} -options -cl-intel-greater-than-4GB-buffer-required" )
123+ # -- SYCL device object linkage flags
124+ include (ProcessorCount)
125+ ProcessorCount(proc_cnt)
126+ if ((DEFINED ENV{MAX_JOBS}) AND ("$ENV{MAX_JOBS} " LESS_EQUAL ${proc_cnt} ))
127+ set (SYCL_MAX_PARALLEL_LINK_JOBS $ENV{MAX_JOBS} )
128+ else ()
129+ set (SYCL_MAX_PARALLEL_LINK_JOBS ${proc_cnt} )
130+ endif ()
131+ set (SYCL_DEVICE_LINK_FLAGS ${SYCL_DEVICE_LINK_FLAGS} -fsycl-max-parallel-link-jobs=${SYCL_MAX_PARALLEL_LINK_JOBS} )
132+ set (SYCL_DEVICE_LINK_FLAGS ${SYCL_DEVICE_LINK_FLAGS} --offload-compress)
124133
134+ set (SYCL_OFFLINE_COMPILER_CG_OPTIONS "${SYCL_OFFLINE_COMPILER_CG_OPTIONS} -options -cl-poison-unsupported-fp64-kernels" )
135+ set (SYCL_OFFLINE_COMPILER_CG_OPTIONS "${SYCL_OFFLINE_COMPILER_CG_OPTIONS} -options -cl-intel-enable-auto-large-GRF-mode" )
136+ set (SYCL_OFFLINE_COMPILER_CG_OPTIONS "${SYCL_OFFLINE_COMPILER_CG_OPTIONS} -options -cl-fp32-correctly-rounded-divide-sqrt" )
137+ set (SYCL_OFFLINE_COMPILER_CG_OPTIONS "${SYCL_OFFLINE_COMPILER_CG_OPTIONS} -options -cl-intel-greater-than-4GB-buffer-required" )
125138
126- if (WIN32 )
127- set (AOT_TARGETS "mtl,mtl-h,bmg,dg2,arl-h,lnl-m" )
128- else ()
129- set (AOT_TARGETS "pvc,bmg,dg2,arl-h,mtl-h,lnl-m" )
130- endif ()
131- if (TORCH_XPU_ARCH_LIST)
132- set (AOT_TARGETS "${TORCH_XPU_ARCH_LIST} " )
133- endif ()
134- if (AOT_TARGETS STREQUAL "none" )
135- set (TORCH_XPU_ARCH_LIST "" PARENT_SCOPE)
136- else ()
137- set (SYCL_TARGETS_OPTION -fsycl-targets=spir64_gen,spir64)
138- set (SYCL_KERNEL_OPTIONS ${SYCL_KERNEL_OPTIONS} ${SYCL_TARGETS_OPTION} )
139- set (SYCL_DEVICE_LINK_FLAGS ${SYCL_DEVICE_LINK_FLAGS} ${SYCL_TARGETS_OPTION} )
140- set (SYCL_OFFLINE_COMPILER_AOT_OPTIONS "-device ${AOT_TARGETS} " )
141- set (TORCH_XPU_ARCH_LIST ${AOT_TARGETS} PARENT_SCOPE)
142- endif ()
143- message (STATUS "Compile Intel GPU AOT Targets for ${AOT_TARGETS} " )
144139
145- set (SYCL_FLAGS ${SYCL_FLAGS} ${SYCL_KERNEL_OPTIONS} )
140+ if (REPLACE_FLAGS_FOR_CUTLASS)
141+ set (SYCL_TARGETS_OPTION -fsycl-targets=spir64_gen)
142+ set (SYCL_KERNEL_OPTIONS ${SYCL_KERNEL_OPTIONS} ${SYCL_TARGETS_OPTION} )
143+ set (SYCL_DEVICE_LINK_FLAGS ${SYCL_DEVICE_LINK_FLAGS} ${SYCL_TARGETS_OPTION} )
144+ set (SYCL_DEVICE_LINK_FLAGS ${SYCL_DEVICE_LINK_FLAGS} "-Xspirv-translator;-spirv-ext=+SPV_INTEL_split_barrier,+SPV_INTEL_2d_block_io,+SPV_INTEL_subgroup_matrix_multiply_accumulate" )
145+ set (SYCL_OFFLINE_COMPILER_AOT_OPTIONS "-device pvc,bmg" )
146+ else ()
147+ if (WIN32 )
148+ set (AOT_TARGETS "mtl,mtl-h,bmg,dg2,arl-h,lnl-m" )
149+ else ()
150+ set (AOT_TARGETS "pvc,bmg,dg2,arl-h,mtl-h,lnl-m" )
151+ endif ()
152+ if (TORCH_XPU_ARCH_LIST)
153+ set (AOT_TARGETS "${TORCH_XPU_ARCH_LIST} " )
154+ endif ()
155+ if (AOT_TARGETS STREQUAL "none" )
156+ set (TORCH_XPU_ARCH_LIST "" PARENT_SCOPE)
157+ else ()
158+ set (SYCL_TARGETS_OPTION -fsycl-targets=spir64_gen,spir64)
159+ set (SYCL_KERNEL_OPTIONS ${SYCL_KERNEL_OPTIONS} ${SYCL_TARGETS_OPTION} )
160+ set (SYCL_DEVICE_LINK_FLAGS ${SYCL_DEVICE_LINK_FLAGS} ${SYCL_TARGETS_OPTION} )
161+ set (SYCL_OFFLINE_COMPILER_AOT_OPTIONS "-device ${AOT_TARGETS} " )
162+ set (TORCH_XPU_ARCH_LIST ${AOT_TARGETS} PARENT_SCOPE)
163+ endif ()
164+ message (STATUS "Compile Intel GPU AOT Targets for ${AOT_TARGETS} " )
165+ endif ()
166+
167+ set (SYCL_COMPILE_FLAGS ${SYCL_COMPILE_FLAGS} ${SYCL_KERNEL_OPTIONS} )
146168
147- set (SYCL_OFFLINE_COMPILER_FLAGS "${SYCL_OFFLINE_COMPILER_AOT_OPTIONS}${SYCL_OFFLINE_COMPILER_CG_OPTIONS} " )
148- else ()
149- message ("Not compiling with XPU. Currently only support GCC compiler on Linux and MSVC compiler on Windows as CXX compiler." )
150- return ()
151- endif ()
169+ set (SYCL_OFFLINE_COMPILER_FLAGS "${SYCL_OFFLINE_COMPILER_AOT_OPTIONS}${SYCL_OFFLINE_COMPILER_CG_OPTIONS} " )
170+ else ()
171+ message ("Not compiling with XPU. Currently only support GCC compiler on Linux and MSVC compiler on Windows as CXX compiler." )
172+ return ()
173+ endif ()
174+ endmacro ()
0 commit comments