From ba2e6c4bc8c70684b78ace3af43ec4fc8b40c0c7 Mon Sep 17 00:00:00 2001 From: "Agarwal, Udit" Date: Wed, 23 Jul 2025 20:00:12 +0200 Subject: [PATCH 01/25] Seperate out property parsing in a seperate DS --- .../sycl/detail/kernel_launch_helper.hpp | 186 ++++++++++++++++++ sycl/include/sycl/handler.hpp | 11 +- sycl/source/CMakeLists.txt | 1 + sycl/source/detail/handler_impl.hpp | 9 +- sycl/source/detail/kernel_launch_helper.cpp | 69 +++++++ sycl/source/handler.cpp | 44 +---- 6 files changed, 263 insertions(+), 57 deletions(-) create mode 100644 sycl/source/detail/kernel_launch_helper.cpp diff --git a/sycl/include/sycl/detail/kernel_launch_helper.hpp b/sycl/include/sycl/detail/kernel_launch_helper.hpp index f90d0c4efd497..43443cc0f26dd 100644 --- a/sycl/include/sycl/detail/kernel_launch_helper.hpp +++ b/sycl/include/sycl/detail/kernel_launch_helper.hpp @@ -16,6 +16,9 @@ #include #include #include +#include +#include +#include #include #include @@ -263,6 +266,189 @@ struct KernelWrapper< } }; // KernelWrapper struct +namespace syclex = sycl::ext::oneapi::experimental; + +// Class to encapsulate all member functions, data members required +// to parse and store kernel launch properties. +class KernelLaunchProperties { +public: + // Changing values in this will break ABI/API. + enum class StableKernelCacheConfig : int32_t { + Default = 0, + LargeSLM = 1, + LargeData = 2 + }; + + ur_kernel_cache_config_t MKernelCacheConfig = UR_KERNEL_CACHE_CONFIG_DEFAULT; + bool MKernelIsCooperative = false; + uint32_t MKernelWorkGroupMemorySize = 0; + + // Kernel Cluster launch + bool MKernelUsesClusterLaunch = false; + size_t MKernelClusterDims = 0; + std::array MKernelClusterSize = {0, 0, 0}; + + void + verifyDeviceHasProgressGuarantee(device_impl &dev, + syclex::forward_progress_guarantee guarantee, + syclex::execution_scope threadScope, + syclex::execution_scope coordinationScope); + + template < + syclex::detail::UnsupportedGraphFeatures FeatureT> + static void throwIfGraphAssociated(bool HasGraph) { + if (!HasGraph) + return; + + std::string FeatureString = + syclex::detail::UnsupportedFeatureToString( + FeatureT); + throw sycl::exception(sycl::make_error_code(errc::invalid), + "The " + FeatureString + + " feature is not yet available " + "for use with the SYCL Graph extension."); + } + + // Set value of the gpu cache configuration for the kernel. + void setKernelCacheConfig(StableKernelCacheConfig Config) { + switch (Config) { + case StableKernelCacheConfig::Default: + MKernelCacheConfig = UR_KERNEL_CACHE_CONFIG_DEFAULT; + break; + case StableKernelCacheConfig::LargeSLM: + MKernelCacheConfig = UR_KERNEL_CACHE_CONFIG_LARGE_SLM; + break; + case StableKernelCacheConfig::LargeData: + MKernelCacheConfig = UR_KERNEL_CACHE_CONFIG_LARGE_DATA; + break; + } + } + + // This method is overriden by handler_impl. + virtual void setKernelClusterSize(int dims, sycl::range<3> ClusterSize) { + MKernelUsesClusterLaunch = true; + MKernelClusterDims = dims; + if (dims == 1) { + MKernelClusterSize[0] = ClusterSize[0]; + } else if (dims == 2) { + MKernelClusterSize[0] = ClusterSize[0]; + MKernelClusterSize[1] = ClusterSize[1]; + } else if (dims == 3) { + MKernelClusterSize[0] = ClusterSize[0]; + MKernelClusterSize[1] = ClusterSize[1]; + MKernelClusterSize[2] = ClusterSize[2]; + } else { + assert(dims > 3 && "Only 1D, 2D, and 3D cluster launch is supported."); + } + } + + void setKernelWorkGroupMemorySize(uint32_t Size) { + MKernelWorkGroupMemorySize = Size; + } + + template + void processLaunchProperties(device_impl &Dev, bool HasGraph, PropertiesT Props) { + static_assert( + ext::oneapi::experimental::is_property_list::value, + "Template type is not a property list."); + + static_assert( + !PropertiesT::template has_property< + sycl::ext::intel::experimental::fp_control_key>() || + (PropertiesT::template has_property< + sycl::ext::intel::experimental::fp_control_key>() && + IsESIMDKernel), + "Floating point control property is supported for ESIMD kernels only."); + + static_assert( + !PropertiesT::template has_property< + sycl::ext::oneapi::experimental::indirectly_callable_key>(), + "indirectly_callable property cannot be applied to SYCL kernels"); + + // Process Kernel cache configuration property. + if constexpr (PropertiesT::template has_property< + sycl::ext::intel::experimental::cache_config_key>()) { + auto Config = Props.template get_property< + sycl::ext::intel::experimental::cache_config_key>(); + if (Config == sycl::ext::intel::experimental::large_slm) { + setKernelCacheConfig(StableKernelCacheConfig::LargeSLM); + } else if (Config == sycl::ext::intel::experimental::large_data) { + setKernelCacheConfig(StableKernelCacheConfig::LargeData); + } + } else { + std::ignore = Props; + } + + // Process Kernel cooperative property. + constexpr bool UsesRootSync = PropertiesT::template has_property< + sycl::ext::oneapi::experimental::use_root_sync_key>(); + if constexpr (UsesRootSync) { + MKernelIsCooperative = UsesRootSync; + } + + // Process device progress properties. + if constexpr (PropertiesT::template has_property< + sycl::ext::oneapi::experimental:: + work_group_progress_key>()) { + auto prop = Props.template get_property< + sycl::ext::oneapi::experimental::work_group_progress_key>(); + verifyDeviceHasProgressGuarantee( + Dev, + prop.guarantee, + sycl::ext::oneapi::experimental::execution_scope::work_group, + prop.coordinationScope); + } + if constexpr (PropertiesT::template has_property< + sycl::ext::oneapi::experimental:: + sub_group_progress_key>()) { + auto prop = Props.template get_property< + sycl::ext::oneapi::experimental::sub_group_progress_key>(); + verifyDeviceHasProgressGuarantee( + Dev, + prop.guarantee, + sycl::ext::oneapi::experimental::execution_scope::sub_group, + prop.coordinationScope); + } + if constexpr (PropertiesT::template has_property< + sycl::ext::oneapi::experimental:: + work_item_progress_key>()) { + auto prop = Props.template get_property< + sycl::ext::oneapi::experimental::work_item_progress_key>(); + verifyDeviceHasProgressGuarantee( + Dev, + prop.guarantee, + sycl::ext::oneapi::experimental::execution_scope::work_item, + prop.coordinationScope); + } + + // Process work group scratch memory property. + if constexpr (PropertiesT::template has_property< + sycl::ext::oneapi::experimental:: + work_group_scratch_size>()) { + throwIfGraphAssociated(HasGraph); + auto WorkGroupMemSize = Props.template get_property< + sycl::ext::oneapi::experimental::work_group_scratch_size>(); + setKernelWorkGroupMemorySize(WorkGroupMemSize.size); + } + + // Parse cluster properties. + constexpr std::size_t ClusterDim = + syclex::detail::getClusterDim(); + if constexpr (ClusterDim > 0) { + throwIfGraphAssociated< + syclex::detail::UnsupportedGraphFeatures:: + sycl_ext_oneapi_experimental_cuda_cluster_launch>(HasGraph); + auto ClusterSize = Props + .template get_property< + syclex::cuda::cluster_size_key>() + .get_cluster_size(); + setKernelClusterSize(ClusterDim, ClusterSize); + } + } + +}; // class KernelLaunchProperties + } // namespace detail } // namespace _V1 } // namespace sycl diff --git a/sycl/include/sycl/handler.hpp b/sycl/include/sycl/handler.hpp index 778f178bc537b..6ae886496787b 100644 --- a/sycl/include/sycl/handler.hpp +++ b/sycl/include/sycl/handler.hpp @@ -3559,16 +3559,7 @@ class __SYCL_EXPORT handler { template < ext::oneapi::experimental::detail::UnsupportedGraphFeatures FeatureT> void throwIfGraphAssociated() const { - - if (getCommandGraph()) { - std::string FeatureString = - ext::oneapi::experimental::detail::UnsupportedFeatureToString( - FeatureT); - throw sycl::exception(sycl::make_error_code(errc::invalid), - "The " + FeatureString + - " feature is not yet available " - "for use with the SYCL Graph extension."); - } + detail::KernelLaunchProperties::throwIfGraphAssociated(getCommandGraph() != nullptr); } #endif diff --git a/sycl/source/CMakeLists.txt b/sycl/source/CMakeLists.txt index c564c63b06b3a..0cfb276ddc199 100644 --- a/sycl/source/CMakeLists.txt +++ b/sycl/source/CMakeLists.txt @@ -287,6 +287,7 @@ set(SYCL_COMMON_SOURCES "detail/kernel_compiler/kernel_compiler_opencl.cpp" "detail/kernel_compiler/kernel_compiler_sycl.cpp" "detail/kernel_impl.cpp" + "detail/kernel_launch_helper.cpp" "detail/kernel_name_based_cache.cpp" "detail/kernel_program_cache.cpp" "detail/memory_export.cpp" diff --git a/sycl/source/detail/handler_impl.hpp b/sycl/source/detail/handler_impl.hpp index 0fda3dd4f2769..bfcb17c28c03e 100644 --- a/sycl/source/detail/handler_impl.hpp +++ b/sycl/source/detail/handler_impl.hpp @@ -11,6 +11,7 @@ #include "sycl/handler.hpp" #include #include +#include #include namespace sycl { @@ -28,7 +29,7 @@ enum class HandlerSubmissionState : std::uint8_t { SPEC_CONST_SET_STATE, }; -class handler_impl { +class handler_impl : public KernelLaunchProperties { public: handler_impl(queue_impl &Queue, queue_impl *SubmissionSecondaryQueue, bool EventNeeded) @@ -104,12 +105,6 @@ class handler_impl { // If the pipe operation is read or write, 1 for read 0 for write. bool HostPipeRead = true; - ur_kernel_cache_config_t MKernelCacheConfig = UR_KERNEL_CACHE_CONFIG_DEFAULT; - - bool MKernelIsCooperative = false; - bool MKernelUsesClusterLaunch = false; - uint32_t MKernelWorkGroupMemorySize = 0; - // Extra information for bindless image copy ur_image_desc_t MSrcImageDesc = {}; ur_image_desc_t MDstImageDesc = {}; diff --git a/sycl/source/detail/kernel_launch_helper.cpp b/sycl/source/detail/kernel_launch_helper.cpp new file mode 100644 index 0000000000000..5670aa5962101 --- /dev/null +++ b/sycl/source/detail/kernel_launch_helper.cpp @@ -0,0 +1,69 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +namespace sycl { +inline namespace _V1 { +namespace detail { + + namespace syclex = sycl::ext::oneapi::experimental; + void KernelLaunchProperties::verifyDeviceHasProgressGuarantee( + device_impl &dev, + syclex::forward_progress_guarantee guarantee, + syclex::execution_scope threadScope, + syclex::execution_scope coordinationScope) { + using execution_scope = syclex::execution_scope; + using forward_progress = + syclex::forward_progress_guarantee; + const bool supported = dev.supportsForwardProgress( + guarantee, threadScope, coordinationScope); + if (threadScope == execution_scope::work_group) { + if (!supported) { + throw sycl::exception( + sycl::errc::feature_not_supported, + "Required progress guarantee for work groups is not " + "supported by this device."); + } + // If we are here, the device supports the guarantee required but there is a + // caveat in that if the guarantee required is a concurrent guarantee, then + // we most likely also need to enable cooperative launch of the kernel. That + // is, although the device supports the required guarantee, some setup work + // is needed to truly make the device provide that guarantee at runtime. + // Otherwise, we will get the default guarantee which is weaker than + // concurrent. Same reasoning applies for sub_group but not for work_item. + // TODO: Further design work is probably needed to reflect this behavior in + // Unified Runtime. + if (guarantee == forward_progress::concurrent) + MKernelIsCooperative = true; + } else if (threadScope == execution_scope::sub_group) { + if (!supported) { + throw sycl::exception(sycl::errc::feature_not_supported, + "Required progress guarantee for sub groups is not " + "supported by this device."); + } + // Same reasoning as above. + if (guarantee == forward_progress::concurrent) + MKernelIsCooperative = true; + } else { // threadScope is execution_scope::work_item otherwise undefined + // behavior + if (!supported) { + throw sycl::exception(sycl::errc::feature_not_supported, + "Required progress guarantee for work items is not " + "supported by this device."); + } + } + } + +} +} +} \ No newline at end of file diff --git a/sycl/source/handler.cpp b/sycl/source/handler.cpp index 33f50af815a5b..57099843d8cf0 100644 --- a/sycl/source/handler.cpp +++ b/sycl/source/handler.cpp @@ -2080,46 +2080,9 @@ void handler::verifyDeviceHasProgressGuarantee( sycl::ext::oneapi::experimental::forward_progress_guarantee guarantee, sycl::ext::oneapi::experimental::execution_scope threadScope, sycl::ext::oneapi::experimental::execution_scope coordinationScope) { - using execution_scope = sycl::ext::oneapi::experimental::execution_scope; - using forward_progress = - sycl::ext::oneapi::experimental::forward_progress_guarantee; - const bool supported = impl->get_device().supportsForwardProgress( - guarantee, threadScope, coordinationScope); - if (threadScope == execution_scope::work_group) { - if (!supported) { - throw sycl::exception( - sycl::errc::feature_not_supported, - "Required progress guarantee for work groups is not " - "supported by this device."); - } - // If we are here, the device supports the guarantee required but there is a - // caveat in that if the guarantee required is a concurrent guarantee, then - // we most likely also need to enable cooperative launch of the kernel. That - // is, although the device supports the required guarantee, some setup work - // is needed to truly make the device provide that guarantee at runtime. - // Otherwise, we will get the default guarantee which is weaker than - // concurrent. Same reasoning applies for sub_group but not for work_item. - // TODO: Further design work is probably needed to reflect this behavior in - // Unified Runtime. - if (guarantee == forward_progress::concurrent) - setKernelIsCooperative(true); - } else if (threadScope == execution_scope::sub_group) { - if (!supported) { - throw sycl::exception(sycl::errc::feature_not_supported, - "Required progress guarantee for sub groups is not " - "supported by this device."); - } - // Same reasoning as above. - if (guarantee == forward_progress::concurrent) - setKernelIsCooperative(true); - } else { // threadScope is execution_scope::work_item otherwise undefined - // behavior - if (!supported) { - throw sycl::exception(sycl::errc::feature_not_supported, - "Required progress guarantee for work items is not " - "supported by this device."); - } - } + impl->verifyDeviceHasProgressGuarantee( + impl->get_device(), + guarantee, threadScope, coordinationScope); } bool handler::supportsUSMMemcpy2D() { @@ -2593,6 +2556,7 @@ void handler::copyCodeLoc(const handler &other) { queue handler::getQueue() { return createSyclObjFromImpl(impl->get_queue()); } + namespace detail { __SYCL_EXPORT void HandlerAccess::preProcess(handler &CGH, type_erased_cgfo_ty F) { From a09cbd07ce8509212ac7fa761ef2b51ce436f4a9 Mon Sep 17 00:00:00 2001 From: "Agarwal, Udit" Date: Wed, 23 Jul 2025 22:17:52 +0200 Subject: [PATCH 02/25] [SYCL][NFC] Decouple property parsing from KernelWrapper --- .../sycl/detail/kernel_launch_helper.hpp | 37 +++++++++++-------- sycl/include/sycl/handler.hpp | 22 +++++++---- 2 files changed, 35 insertions(+), 24 deletions(-) diff --git a/sycl/include/sycl/detail/kernel_launch_helper.hpp b/sycl/include/sycl/detail/kernel_launch_helper.hpp index 43443cc0f26dd..725a2b32c37bc 100644 --- a/sycl/include/sycl/detail/kernel_launch_helper.hpp +++ b/sycl/include/sycl/detail/kernel_launch_helper.hpp @@ -198,33 +198,21 @@ struct KernelWrapperHelperFuncs { // embedding them into the kernel's type). template ::type> struct KernelWrapper; template + typename ElementType, typename PropertiesT, typename... MergedProps> struct KernelWrapper< - WrapAsVal, KernelName, KernelType, ElementType, PropertyProcessor, - PropertiesT, + WrapAsVal, KernelName, KernelType, ElementType, PropertiesT, ext::oneapi::experimental::detail::properties_t> : public KernelWrapperHelperFuncs { - static void wrap([[maybe_unused]] PropertyProcessor h, - [[maybe_unused]] const KernelType &KernelFunc) { + static void wrap([[maybe_unused]] const KernelType &KernelFunc) { #ifdef __SYCL_DEVICE_ONLY__ detail::CheckDeviceCopyable(); -#else - // If there are properties provided by get method then process them. - if constexpr (ext::oneapi::experimental::detail:: - HasKernelPropertiesGetMethod::value) { - - // TODO: decouple property processing from KernelWrapper. - h->template processProperties()>( - KernelFunc.get(ext::oneapi::experimental::properties_tag{})); - } #endif // Note: the static_assert below need to be run on both the host and the // device ends to avoid test issues, so don't put it into the #ifdef @@ -449,6 +437,23 @@ class KernelLaunchProperties { }; // class KernelLaunchProperties +struct KernelLaunchPropertyWrapper { + template + static void parseProperties([[maybe_unused]] PropertyProcessor h, + [[maybe_unused]] const KernelType &KernelFunc) { +#ifndef __SYCL_DEVICE_ONLY__ + // If there are properties provided by get method then process them. + if constexpr (ext::oneapi::experimental::detail:: + HasKernelPropertiesGetMethod::value) { + + h->template processProperties()>( + KernelFunc.get(ext::oneapi::experimental::properties_tag{})); + } +#endif + } +}; // KernelLaunchPropertyWrapper struct + } // namespace detail } // namespace _V1 } // namespace sycl diff --git a/sycl/include/sycl/handler.hpp b/sycl/include/sycl/handler.hpp index 6ae886496787b..3f3c9e724f285 100644 --- a/sycl/include/sycl/handler.hpp +++ b/sycl/include/sycl/handler.hpp @@ -1294,7 +1294,10 @@ class __SYCL_EXPORT handler { detail::KernelWrapper::wrap(this, Wrapper); + PropertiesT>::wrap(Wrapper); + + detail::KernelLaunchPropertyWrapper::parseProperties(this, + Wrapper); #ifndef __SYCL_DEVICE_ONLY__ constexpr detail::string_view Name{detail::getKernelName()}; verifyUsedKernelBundleInternal(Name); @@ -1319,8 +1322,9 @@ class __SYCL_EXPORT handler { // If parallel_for range rounding is forced then only range rounded // kernel is generated detail::KernelWrapper::wrap(this, KernelFunc); + TransformedArgType, PropertiesT>::wrap(KernelFunc); + detail::KernelLaunchPropertyWrapper::parseProperties(this, + KernelFunc); #ifndef __SYCL_DEVICE_ONLY__ constexpr detail::string_view Name{detail::getKernelName()}; @@ -1400,7 +1404,9 @@ class __SYCL_EXPORT handler { typename detail::get_kernel_name_t::name; (void)Props; detail::KernelWrapper::wrap(this, KernelFunc); + PropertiesT>::wrap(KernelFunc); + detail::KernelLaunchPropertyWrapper::parseProperties(this, + KernelFunc); #ifndef __SYCL_DEVICE_ONLY__ if constexpr (WrapAsVal == detail::WrapAs::single_task) { throwOnKernelParameterMisuse(); @@ -1441,7 +1447,9 @@ class __SYCL_EXPORT handler { (void)Props; (void)Kernel; detail::KernelWrapper::wrap(this, KernelFunc); + PropertiesT>::wrap(KernelFunc); + detail::KernelLaunchPropertyWrapper::parseProperties(this, + KernelFunc); #ifndef __SYCL_DEVICE_ONLY__ if constexpr (WrapAsVal == detail::WrapAs::single_task) { throwOnKernelParameterMisuse(); @@ -3634,9 +3642,7 @@ class __SYCL_EXPORT handler { void instantiateKernelOnHost(void *InstantiateKernelOnHostPtr); friend class detail::HandlerAccess; - template - friend struct detail::KernelWrapper; + friend struct detail::KernelLaunchPropertyWrapper; #ifdef __INTEL_PREVIEW_BREAKING_CHANGES __SYCL_DLL_LOCAL detail::handler_impl *get_impl() { return impl; } From 364708d712fe28d32fdca63f3ae7bc26e8e46042 Mon Sep 17 00:00:00 2001 From: "Agarwal, Udit" Date: Thu, 24 Jul 2025 22:58:44 +0200 Subject: [PATCH 03/25] Move property pasring into a seperate DS --- .../sycl/detail/kernel_launch_helper.hpp | 312 ++++++++++-------- sycl/include/sycl/handler.hpp | 80 ++++- sycl/source/detail/handler_impl.hpp | 2 +- sycl/source/detail/kernel_launch_helper.cpp | 9 +- sycl/source/handler.cpp | 87 ++++- .../non_esimd_kernel_fp_control.cpp | 4 +- .../include_deps/sycl_detail_core.hpp.cpp | 4 +- .../virtual-functions/properties-negative.cpp | 6 +- 8 files changed, 326 insertions(+), 178 deletions(-) diff --git a/sycl/include/sycl/detail/kernel_launch_helper.hpp b/sycl/include/sycl/detail/kernel_launch_helper.hpp index 725a2b32c37bc..5dc3f35d699e1 100644 --- a/sycl/include/sycl/detail/kernel_launch_helper.hpp +++ b/sycl/include/sycl/detail/kernel_launch_helper.hpp @@ -254,11 +254,12 @@ struct KernelWrapper< } }; // KernelWrapper struct +// TODO: Remove this. namespace syclex = sycl::ext::oneapi::experimental; -// Class to encapsulate all member functions, data members required -// to parse and store kernel launch properties. -class KernelLaunchProperties { +// This class is inherited by handler_impl and sycl::handler is using the static +// methods. +class KernelLaunchPropertyWrapper { public: // Changing values in this will break ABI/API. enum class StableKernelCacheConfig : int32_t { @@ -274,13 +275,17 @@ class KernelLaunchProperties { // Kernel Cluster launch bool MKernelUsesClusterLaunch = false; size_t MKernelClusterDims = 0; - std::array MKernelClusterSize = {0, 0, 0}; - - void - verifyDeviceHasProgressGuarantee(device_impl &dev, - syclex::forward_progress_guarantee guarantee, - syclex::execution_scope threadScope, - syclex::execution_scope coordinationScope); + std::array MKernelClusterSize = {0, 0, 0}; + + struct KernelLaunchPropertiesT { + StableKernelCacheConfig MKernelCacheConfig = + StableKernelCacheConfig::Default; + bool MKernelIsCooperative = false; + uint32_t MKernelWorkGroupMemorySize = 0; + bool MKernelUsesClusterLaunch = false; + size_t MKernelClusterDims = 0; + std::array MKernelClusterSize = {0, 0, 0}; + }; template < syclex::detail::UnsupportedGraphFeatures FeatureT> @@ -297,49 +302,147 @@ class KernelLaunchProperties { "for use with the SYCL Graph extension."); } - // Set value of the gpu cache configuration for the kernel. - void setKernelCacheConfig(StableKernelCacheConfig Config) { - switch (Config) { - case StableKernelCacheConfig::Default: - MKernelCacheConfig = UR_KERNEL_CACHE_CONFIG_DEFAULT; - break; - case StableKernelCacheConfig::LargeSLM: - MKernelCacheConfig = UR_KERNEL_CACHE_CONFIG_LARGE_SLM; - break; - case StableKernelCacheConfig::LargeData: - MKernelCacheConfig = UR_KERNEL_CACHE_CONFIG_LARGE_DATA; - break; + static void + verifyDeviceHasProgressGuarantee(device_impl &dev, + syclex::forward_progress_guarantee guarantee, + syclex::execution_scope threadScope, + syclex::execution_scope coordinationScope, + KernelLaunchPropertiesT &prop); + + /// Process runtime kernel properties. + /// + /// Stores information about kernel properties into the handler. + template + static KernelLaunchPropertiesT processLaunchProperties(PropertiesT Props, + bool HasGraph, device_impl &dev) { + + KernelLaunchPropertiesT retval; + + // Process Kernel cache configuration property. + { + if constexpr (PropertiesT::template has_property< + sycl::ext::intel::experimental::cache_config_key>()) { + auto Config = Props.template get_property< + sycl::ext::intel::experimental::cache_config_key>(); + if (Config == sycl::ext::intel::experimental::large_slm) { + retval.MKernelCacheConfig = StableKernelCacheConfig::LargeSLM; + } else if (Config == sycl::ext::intel::experimental::large_data) { + retval.MKernelCacheConfig = StableKernelCacheConfig::LargeData; + } + } else { + std::ignore = Props; + } } - } - // This method is overriden by handler_impl. - virtual void setKernelClusterSize(int dims, sycl::range<3> ClusterSize) { - MKernelUsesClusterLaunch = true; - MKernelClusterDims = dims; - if (dims == 1) { - MKernelClusterSize[0] = ClusterSize[0]; - } else if (dims == 2) { - MKernelClusterSize[0] = ClusterSize[0]; - MKernelClusterSize[1] = ClusterSize[1]; - } else if (dims == 3) { - MKernelClusterSize[0] = ClusterSize[0]; - MKernelClusterSize[1] = ClusterSize[1]; - MKernelClusterSize[2] = ClusterSize[2]; - } else { - assert(dims > 3 && "Only 1D, 2D, and 3D cluster launch is supported."); + // Process Kernel cooperative property. + { + constexpr bool UsesRootSync = PropertiesT::template has_property< + sycl::ext::oneapi::experimental::use_root_sync_key>(); + retval.MKernelIsCooperative = UsesRootSync; } - } - void setKernelWorkGroupMemorySize(uint32_t Size) { - MKernelWorkGroupMemorySize = Size; + // Process device progress properties. + { + if constexpr (PropertiesT::template has_property< + sycl::ext::oneapi::experimental:: + work_group_progress_key>()) { + auto prop = Props.template get_property< + sycl::ext::oneapi::experimental::work_group_progress_key>(); + verifyDeviceHasProgressGuarantee( + dev, + prop.guarantee, + sycl::ext::oneapi::experimental::execution_scope::work_group, + prop.coordinationScope, + retval); + } + if constexpr (PropertiesT::template has_property< + sycl::ext::oneapi::experimental:: + sub_group_progress_key>()) { + auto prop = Props.template get_property< + sycl::ext::oneapi::experimental::sub_group_progress_key>(); + verifyDeviceHasProgressGuarantee( + dev, + prop.guarantee, + sycl::ext::oneapi::experimental::execution_scope::sub_group, + prop.coordinationScope, + retval); + } + if constexpr (PropertiesT::template has_property< + sycl::ext::oneapi::experimental:: + work_item_progress_key>()) { + auto prop = Props.template get_property< + sycl::ext::oneapi::experimental::work_item_progress_key>(); + verifyDeviceHasProgressGuarantee( + dev, + prop.guarantee, + sycl::ext::oneapi::experimental::execution_scope::work_item, + prop.coordinationScope, + retval); + } + } + + // Process work group scratch memory property. + { + if constexpr (PropertiesT::template has_property< + sycl::ext::oneapi::experimental:: + work_group_scratch_size>()) { + throwIfGraphAssociated(HasGraph); + auto WorkGroupMemSize = Props.template get_property< + sycl::ext::oneapi::experimental::work_group_scratch_size>(); + retval.MKernelWorkGroupMemorySize = WorkGroupMemSize.size; + } + } + + // Parse cluster properties. + { + constexpr std::size_t ClusterDim = + syclex::detail::getClusterDim(); + if constexpr (ClusterDim > 0) { + throwIfGraphAssociated< + syclex::detail::UnsupportedGraphFeatures:: + sycl_ext_oneapi_experimental_cuda_cluster_launch>(HasGraph); + auto ClusterSize = Props + .template get_property< + syclex::cuda::cluster_size_key>() + .get_cluster_size(); + retval.MKernelUsesClusterLaunch = true; + retval.MKernelClusterDims = ClusterDim; + if (ClusterDim == 1) { + retval.MKernelClusterSize[0] = ClusterSize[0]; + } else if (ClusterDim == 2) { + retval.MKernelClusterSize[0] = ClusterSize[0]; + retval.MKernelClusterSize[1] = ClusterSize[1]; + } else if (ClusterDim == 3) { + retval.MKernelClusterSize[0] = ClusterSize[0]; + retval.MKernelClusterSize[1] = ClusterSize[1]; + retval.MKernelClusterSize[2] = ClusterSize[2]; + } else { + assert(ClusterDim > 3 && + "Only 1D, 2D, and 3D cluster launch is supported."); + } + } + } + + return retval; } - template - void processLaunchProperties(device_impl &Dev, bool HasGraph, PropertiesT Props) { + /// Process kernel properties. + /// + /// Stores information about kernel properties into the handler. + /// + /// Note: it is important that this function *does not* depend on kernel + /// name or kernel type, because then it will be instantiated for every + /// kernel, even though body of those instantiated functions could be almost + /// the same, thus unnecessary increasing compilation time. + template < + bool IsESIMDKernel, + typename PropertiesT = ext::oneapi::experimental::empty_properties_t> + static KernelLaunchPropertiesT processProperties(PropertiesT Props, + bool HasGraph, device_impl &dev) { static_assert( ext::oneapi::experimental::is_property_list::value, "Template type is not a property list."); - static_assert( !PropertiesT::template has_property< sycl::ext::intel::experimental::fp_control_key>() || @@ -347,111 +450,52 @@ class KernelLaunchProperties { sycl::ext::intel::experimental::fp_control_key>() && IsESIMDKernel), "Floating point control property is supported for ESIMD kernels only."); - static_assert( !PropertiesT::template has_property< sycl::ext::oneapi::experimental::indirectly_callable_key>(), "indirectly_callable property cannot be applied to SYCL kernels"); - // Process Kernel cache configuration property. - if constexpr (PropertiesT::template has_property< - sycl::ext::intel::experimental::cache_config_key>()) { - auto Config = Props.template get_property< - sycl::ext::intel::experimental::cache_config_key>(); - if (Config == sycl::ext::intel::experimental::large_slm) { - setKernelCacheConfig(StableKernelCacheConfig::LargeSLM); - } else if (Config == sycl::ext::intel::experimental::large_data) { - setKernelCacheConfig(StableKernelCacheConfig::LargeData); - } - } else { - std::ignore = Props; - } - - // Process Kernel cooperative property. - constexpr bool UsesRootSync = PropertiesT::template has_property< - sycl::ext::oneapi::experimental::use_root_sync_key>(); - if constexpr (UsesRootSync) { - MKernelIsCooperative = UsesRootSync; - } - - // Process device progress properties. - if constexpr (PropertiesT::template has_property< - sycl::ext::oneapi::experimental:: - work_group_progress_key>()) { - auto prop = Props.template get_property< - sycl::ext::oneapi::experimental::work_group_progress_key>(); - verifyDeviceHasProgressGuarantee( - Dev, - prop.guarantee, - sycl::ext::oneapi::experimental::execution_scope::work_group, - prop.coordinationScope); - } - if constexpr (PropertiesT::template has_property< - sycl::ext::oneapi::experimental:: - sub_group_progress_key>()) { - auto prop = Props.template get_property< - sycl::ext::oneapi::experimental::sub_group_progress_key>(); - verifyDeviceHasProgressGuarantee( - Dev, - prop.guarantee, - sycl::ext::oneapi::experimental::execution_scope::sub_group, - prop.coordinationScope); - } - if constexpr (PropertiesT::template has_property< - sycl::ext::oneapi::experimental:: - work_item_progress_key>()) { - auto prop = Props.template get_property< - sycl::ext::oneapi::experimental::work_item_progress_key>(); - verifyDeviceHasProgressGuarantee( - Dev, - prop.guarantee, - sycl::ext::oneapi::experimental::execution_scope::work_item, - prop.coordinationScope); - } - - // Process work group scratch memory property. - if constexpr (PropertiesT::template has_property< - sycl::ext::oneapi::experimental:: - work_group_scratch_size>()) { - throwIfGraphAssociated(HasGraph); - auto WorkGroupMemSize = Props.template get_property< - sycl::ext::oneapi::experimental::work_group_scratch_size>(); - setKernelWorkGroupMemorySize(WorkGroupMemSize.size); - } - - // Parse cluster properties. - constexpr std::size_t ClusterDim = - syclex::detail::getClusterDim(); - if constexpr (ClusterDim > 0) { - throwIfGraphAssociated< - syclex::detail::UnsupportedGraphFeatures:: - sycl_ext_oneapi_experimental_cuda_cluster_launch>(HasGraph); - auto ClusterSize = Props - .template get_property< - syclex::cuda::cluster_size_key>() - .get_cluster_size(); - setKernelClusterSize(ClusterDim, ClusterSize); - } + return processLaunchProperties(Props, HasGraph, dev); } -}; // class KernelLaunchProperties - -struct KernelLaunchPropertyWrapper { - template - static void parseProperties([[maybe_unused]] PropertyProcessor h, - [[maybe_unused]] const KernelType &KernelFunc) { + template + static std::optional parseProperties( + [[maybe_unused]] const KernelType &KernelFunc, + [[maybe_unused]] bool HasGraph, + [[maybe_unused]] device_impl &dev) { #ifndef __SYCL_DEVICE_ONLY__ // If there are properties provided by get method then process them. if constexpr (ext::oneapi::experimental::detail:: HasKernelPropertiesGetMethod::value) { - h->template processProperties()>( - KernelFunc.get(ext::oneapi::experimental::properties_tag{})); + return processProperties()>( + KernelFunc.get(ext::oneapi::experimental::properties_tag{}), HasGraph, dev); } #endif + // If there are no properties provided by get method then return empty + // optional. + return std::nullopt; } + + void parseAndSetKernelLaunchProperties(KernelLaunchPropertiesT &props) { + switch (props.MKernelCacheConfig) { + case StableKernelCacheConfig::Default: + MKernelCacheConfig = UR_KERNEL_CACHE_CONFIG_DEFAULT; + break; + case StableKernelCacheConfig::LargeSLM: + MKernelCacheConfig = UR_KERNEL_CACHE_CONFIG_LARGE_SLM; + break; + case StableKernelCacheConfig::LargeData: + MKernelCacheConfig = UR_KERNEL_CACHE_CONFIG_LARGE_DATA; + break; + } + + MKernelIsCooperative = props.MKernelIsCooperative; + MKernelWorkGroupMemorySize = props.MKernelWorkGroupMemorySize; + MKernelUsesClusterLaunch = props.MKernelUsesClusterLaunch; + MKernelClusterDims = props.MKernelClusterDims; + MKernelClusterSize = props.MKernelClusterSize; + } }; // KernelLaunchPropertyWrapper struct } // namespace detail diff --git a/sycl/include/sycl/handler.hpp b/sycl/include/sycl/handler.hpp index 3f3c9e724f285..6c711b7941b4e 100644 --- a/sycl/include/sycl/handler.hpp +++ b/sycl/include/sycl/handler.hpp @@ -877,11 +877,13 @@ class __SYCL_EXPORT handler { } } +#ifndef __INTEL_PREVIEW_BREAKING_CHANGES void verifyDeviceHasProgressGuarantee( sycl::ext::oneapi::experimental::forward_progress_guarantee guarantee, sycl::ext::oneapi::experimental::execution_scope threadScope, sycl::ext::oneapi::experimental::execution_scope coordinationScope); + template void checkAndSetClusterRange(const Properties &Props) { namespace syclex = sycl::ext::oneapi::experimental; @@ -991,6 +993,8 @@ class __SYCL_EXPORT handler { processLaunchProperties(Props); } + #endif // INTEL_PREVIEW_BREAKING_CHANGES + /// Checks whether it is possible to copy the source shape to the destination /// shape(the shapes are described by the accessor ranges) by using /// copying by regions of memory and not copying element by element @@ -1098,6 +1102,8 @@ class __SYCL_EXPORT handler { // supported gcc version is bumped. std::tuple, bool> getMaxWorkGroups_v2(); + detail::device_impl& getDeviceImpl(); + template std::tuple, bool> getRoundedRange(range UserRange) { range RoundedRange = UserRange; @@ -1296,8 +1302,13 @@ class __SYCL_EXPORT handler { decltype(Wrapper), TransformedArgType, PropertiesT>::wrap(Wrapper); - detail::KernelLaunchPropertyWrapper::parseProperties(this, - Wrapper); + if (auto prop = + detail::KernelLaunchPropertyWrapper::parseProperties( + Wrapper, getCommandGraph() != nullptr, + getDeviceImpl())) { + setKernelLaunchProperties(*prop); + } + #ifndef __SYCL_DEVICE_ONLY__ constexpr detail::string_view Name{detail::getKernelName()}; verifyUsedKernelBundleInternal(Name); @@ -1323,13 +1334,21 @@ class __SYCL_EXPORT handler { // kernel is generated detail::KernelWrapper::wrap(KernelFunc); - detail::KernelLaunchPropertyWrapper::parseProperties(this, - KernelFunc); + if (auto prop = + detail::KernelLaunchPropertyWrapper::parseProperties( + KernelFunc, getCommandGraph() != nullptr, + getDeviceImpl())) { + setKernelLaunchProperties(*prop); + } #ifndef __SYCL_DEVICE_ONLY__ constexpr detail::string_view Name{detail::getKernelName()}; verifyUsedKernelBundleInternal(Name); - processProperties(), PropertiesT>(Props); + auto ProcessedProps = detail::KernelLaunchPropertyWrapper::processProperties< + detail::isKernelESIMD(), PropertiesT>( + Props, getCommandGraph() != nullptr, + getDeviceImpl()); + setKernelLaunchProperties(ProcessedProps); detail::checkValueRange(UserRange); setNDRangeDescriptor(std::move(UserRange)); StoreLambda( @@ -1359,7 +1378,12 @@ class __SYCL_EXPORT handler { MKernel = detail::getSyclObjImpl(std::move(Kernel)); detail::checkValueRange(NumWorkItems); setNDRangeDescriptor(std::move(NumWorkItems)); - processLaunchProperties(Props); + auto ParsedProp = + detail::KernelLaunchPropertyWrapper::processLaunchProperties< + PropertiesT>( + Props, getCommandGraph() != nullptr, + getDeviceImpl()); + setKernelLaunchProperties(ParsedProp); setType(detail::CGType::Kernel); extractArgsAndReqs(); MKernelName = getKernelName(); @@ -1384,7 +1408,13 @@ class __SYCL_EXPORT handler { MKernel = detail::getSyclObjImpl(std::move(Kernel)); detail::checkValueRange(NDRange); setNDRangeDescriptor(std::move(NDRange)); - processLaunchProperties(Props); + auto ParsedProp = + detail::KernelLaunchPropertyWrapper::processLaunchProperties + ( + Props, getCommandGraph() != nullptr, + getDeviceImpl() + ); + setKernelLaunchProperties(ParsedProp); setType(detail::CGType::Kernel); extractArgsAndReqs(); MKernelName = getKernelName(); @@ -1405,8 +1435,12 @@ class __SYCL_EXPORT handler { (void)Props; detail::KernelWrapper::wrap(KernelFunc); - detail::KernelLaunchPropertyWrapper::parseProperties(this, - KernelFunc); + if (auto prop = + detail::KernelLaunchPropertyWrapper::parseProperties( + KernelFunc, getCommandGraph() != nullptr, + getDeviceImpl())) { + setKernelLaunchProperties(*prop); + } #ifndef __SYCL_DEVICE_ONLY__ if constexpr (WrapAsVal == detail::WrapAs::single_task) { throwOnKernelParameterMisuse(); @@ -1425,7 +1459,11 @@ class __SYCL_EXPORT handler { } StoreLambda(std::move(KernelFunc)); - processProperties(), PropertiesT>(Props); + auto ProcessedProps = detail::KernelLaunchPropertyWrapper::processProperties< + detail::isKernelESIMD(), PropertiesT>( + Props, getCommandGraph() != nullptr, + getDeviceImpl()); + setKernelLaunchProperties(ProcessedProps); #endif } @@ -1448,8 +1486,12 @@ class __SYCL_EXPORT handler { (void)Kernel; detail::KernelWrapper::wrap(KernelFunc); - detail::KernelLaunchPropertyWrapper::parseProperties(this, - KernelFunc); + if (auto prop = + detail::KernelLaunchPropertyWrapper::parseProperties( + KernelFunc, getCommandGraph() != nullptr, + getDeviceImpl())) { + setKernelLaunchProperties(*prop); + } #ifndef __SYCL_DEVICE_ONLY__ if constexpr (WrapAsVal == detail::WrapAs::single_task) { throwOnKernelParameterMisuse(); @@ -1477,7 +1519,10 @@ class __SYCL_EXPORT handler { } else { StoreLambda(std::move(KernelFunc)); } - processProperties(), PropertiesT>(Props); + auto ProcessedProps = detail::KernelLaunchPropertyWrapper::processProperties< + detail::isKernelESIMD(), PropertiesT>( + Props, getCommandGraph() != nullptr, getDeviceImpl()); + setKernelLaunchProperties(ProcessedProps); #endif } #endif // __INTEL_PREVIEW_BREAKING_CHANGES @@ -3483,6 +3528,7 @@ class __SYCL_EXPORT handler { bool IsDeviceImageScoped, size_t NumBytes, size_t Offset); +#ifndef __INTEL_PREVIEW_BREAKING_CHANGES // Changing values in this will break ABI/API. enum class StableKernelCacheConfig : int32_t { Default = 0, @@ -3505,6 +3551,11 @@ class __SYCL_EXPORT handler { // Set the request work group memory size (work_group_static ext). void setKernelWorkGroupMem(size_t Size); +#endif + + void setKernelLaunchProperties( + detail::KernelLaunchPropertyWrapper::KernelLaunchPropertiesT + &KernelLaunchProperties); // Various checks that are only meaningful for host compilation, because they // result in runtime errors (i.e. exceptions being thrown). To save time @@ -3567,7 +3618,7 @@ class __SYCL_EXPORT handler { template < ext::oneapi::experimental::detail::UnsupportedGraphFeatures FeatureT> void throwIfGraphAssociated() const { - detail::KernelLaunchProperties::throwIfGraphAssociated(getCommandGraph() != nullptr); + detail::KernelLaunchPropertyWrapper::throwIfGraphAssociated(getCommandGraph() != nullptr); } #endif @@ -3642,7 +3693,6 @@ class __SYCL_EXPORT handler { void instantiateKernelOnHost(void *InstantiateKernelOnHostPtr); friend class detail::HandlerAccess; - friend struct detail::KernelLaunchPropertyWrapper; #ifdef __INTEL_PREVIEW_BREAKING_CHANGES __SYCL_DLL_LOCAL detail::handler_impl *get_impl() { return impl; } diff --git a/sycl/source/detail/handler_impl.hpp b/sycl/source/detail/handler_impl.hpp index bfcb17c28c03e..7b11ab78c75ec 100644 --- a/sycl/source/detail/handler_impl.hpp +++ b/sycl/source/detail/handler_impl.hpp @@ -29,7 +29,7 @@ enum class HandlerSubmissionState : std::uint8_t { SPEC_CONST_SET_STATE, }; -class handler_impl : public KernelLaunchProperties { +class handler_impl : public KernelLaunchPropertyWrapper { public: handler_impl(queue_impl &Queue, queue_impl *SubmissionSecondaryQueue, bool EventNeeded) diff --git a/sycl/source/detail/kernel_launch_helper.cpp b/sycl/source/detail/kernel_launch_helper.cpp index 5670aa5962101..8ff7aa7ea231e 100644 --- a/sycl/source/detail/kernel_launch_helper.cpp +++ b/sycl/source/detail/kernel_launch_helper.cpp @@ -17,11 +17,12 @@ inline namespace _V1 { namespace detail { namespace syclex = sycl::ext::oneapi::experimental; - void KernelLaunchProperties::verifyDeviceHasProgressGuarantee( + void KernelLaunchPropertyWrapper::verifyDeviceHasProgressGuarantee( device_impl &dev, syclex::forward_progress_guarantee guarantee, syclex::execution_scope threadScope, - syclex::execution_scope coordinationScope) { + syclex::execution_scope coordinationScope, + KernelLaunchPropertiesT &prop) { using execution_scope = syclex::execution_scope; using forward_progress = syclex::forward_progress_guarantee; @@ -44,7 +45,7 @@ namespace detail { // TODO: Further design work is probably needed to reflect this behavior in // Unified Runtime. if (guarantee == forward_progress::concurrent) - MKernelIsCooperative = true; + prop.MKernelIsCooperative = true; } else if (threadScope == execution_scope::sub_group) { if (!supported) { throw sycl::exception(sycl::errc::feature_not_supported, @@ -53,7 +54,7 @@ namespace detail { } // Same reasoning as above. if (guarantee == forward_progress::concurrent) - MKernelIsCooperative = true; + prop.MKernelIsCooperative = true; } else { // threadScope is execution_scope::work_item otherwise undefined // behavior if (!supported) { diff --git a/sycl/source/handler.cpp b/sycl/source/handler.cpp index 57099843d8cf0..54ab9ffb16103 100644 --- a/sycl/source/handler.cpp +++ b/sycl/source/handler.cpp @@ -2076,14 +2076,54 @@ static bool checkContextSupports(detail::context_impl &ContextImpl, return SupportsOp; } +#ifndef __INTEL_PREVIEW_BREAKING_CHANGES void handler::verifyDeviceHasProgressGuarantee( sycl::ext::oneapi::experimental::forward_progress_guarantee guarantee, sycl::ext::oneapi::experimental::execution_scope threadScope, sycl::ext::oneapi::experimental::execution_scope coordinationScope) { - impl->verifyDeviceHasProgressGuarantee( - impl->get_device(), - guarantee, threadScope, coordinationScope); + + using execution_scope = sycl::ext::oneapi::experimental::execution_scope; + using forward_progress = + sycl::ext::oneapi::experimental::forward_progress_guarantee; + const bool supported = impl->get_device().supportsForwardProgress( + guarantee, threadScope, coordinationScope); + if (threadScope == execution_scope::work_group) { + if (!supported) { + throw sycl::exception( + sycl::errc::feature_not_supported, + "Required progress guarantee for work groups is not " + "supported by this device."); + } + // If we are here, the device supports the guarantee required but there is a + // caveat in that if the guarantee required is a concurrent guarantee, then + // we most likely also need to enable cooperative launch of the kernel. That + // is, although the device supports the required guarantee, some setup work + // is needed to truly make the device provide that guarantee at runtime. + // Otherwise, we will get the default guarantee which is weaker than + // concurrent. Same reasoning applies for sub_group but not for work_item. + // TODO: Further design work is probably needed to reflect this behavior in + // Unified Runtime. + if (guarantee == forward_progress::concurrent) + setKernelIsCooperative(true); + } else if (threadScope == execution_scope::sub_group) { + if (!supported) { + throw sycl::exception(sycl::errc::feature_not_supported, + "Required progress guarantee for sub groups is not " + "supported by this device."); + } + // Same reasoning as above. + if (guarantee == forward_progress::concurrent) + setKernelIsCooperative(true); + } else { // threadScope is execution_scope::work_item otherwise undefined + // behavior + if (!supported) { + throw sycl::exception(sycl::errc::feature_not_supported, + "Required progress guarantee for work items is not " + "supported by this device."); + } + } } +#endif bool handler::supportsUSMMemcpy2D() { if (impl->get_graph_or_null()) @@ -2214,25 +2254,38 @@ detail::context_impl &handler::getContextImpl() const { return impl->get_queue().getContextImpl(); } -void handler::setKernelCacheConfig(handler::StableKernelCacheConfig Config) { - switch (Config) { - case handler::StableKernelCacheConfig::Default: - impl->MKernelCacheConfig = UR_KERNEL_CACHE_CONFIG_DEFAULT; - break; - case handler::StableKernelCacheConfig::LargeSLM: - impl->MKernelCacheConfig = UR_KERNEL_CACHE_CONFIG_LARGE_SLM; - break; - case handler::StableKernelCacheConfig::LargeData: - impl->MKernelCacheConfig = UR_KERNEL_CACHE_CONFIG_LARGE_DATA; - break; +void handler::setKernelLaunchProperties( + KernelLaunchPropertyWrapper::KernelLaunchPropertiesT + &KernelLaunchProperties) { + impl->parseAndSetKernelLaunchProperties(KernelLaunchProperties); + + if (KernelLaunchProperties.MKernelClusterDims == 1) { + sycl::range<1> ClusterSizeTrimmed = {KernelLaunchProperties.MKernelClusterSize[0]}; + impl->MNDRDesc.setClusterDimensions(ClusterSizeTrimmed); + } else if (KernelLaunchProperties.MKernelClusterDims == 2) { + sycl::range<2> ClusterSizeTrimmed = + {KernelLaunchProperties.MKernelClusterSize[0], + KernelLaunchProperties.MKernelClusterSize[1]}; + impl->MNDRDesc.setClusterDimensions(ClusterSizeTrimmed); + } else if (KernelLaunchProperties.MKernelClusterDims == 3) { + sycl::range<3> ClusterSizeTrimmed = + {KernelLaunchProperties.MKernelClusterSize[0], + KernelLaunchProperties.MKernelClusterSize[1], + KernelLaunchProperties.MKernelClusterSize[2]}; + impl->MNDRDesc.setClusterDimensions(ClusterSizeTrimmed); } } -void handler::setKernelIsCooperative(bool KernelIsCooperative) { - impl->MKernelIsCooperative = KernelIsCooperative; +device_impl& handler::getDeviceImpl() { + return impl->get_device(); } #ifndef __INTEL_PREVIEW_BREAKING_CHANGES + +void handler::setKernelIsCooperative(bool IsCooperative) { + impl->MKernelIsCooperative = IsCooperative; +} + void handler::setKernelClusterLaunch(sycl::range<3> ClusterSize, int Dims) { throwIfGraphAssociated< syclex::detail::UnsupportedGraphFeatures:: @@ -2249,7 +2302,6 @@ void handler::setKernelClusterLaunch(sycl::range<3> ClusterSize, int Dims) { impl->MNDRDesc.setClusterDimensions(ClusterSize); } } -#endif void handler::setKernelClusterLaunch(sycl::range<3> ClusterSize) { throwIfGraphAssociated< @@ -2280,6 +2332,7 @@ void handler::setKernelWorkGroupMem(size_t Size) { sycl_ext_oneapi_work_group_scratch_memory>(); impl->MKernelWorkGroupMemorySize = Size; } +#endif // __INTEL_PREVIEW_BREAKING_CHANGES void handler::ext_oneapi_graph( ext::oneapi::experimental::command_graph< diff --git a/sycl/test/extensions/properties/non_esimd_kernel_fp_control.cpp b/sycl/test/extensions/properties/non_esimd_kernel_fp_control.cpp index 46d11eccdfe54..e6910484bf52f 100644 --- a/sycl/test/extensions/properties/non_esimd_kernel_fp_control.cpp +++ b/sycl/test/extensions/properties/non_esimd_kernel_fp_control.cpp @@ -20,7 +20,7 @@ struct ESIMDKernel { int main(void) { queue q; - // expected-error-re@sycl/handler.hpp:* {{static assertion failed due to requirement {{.+}}: Floating point control property is supported for ESIMD kernels only.}} + // expected-error-re@sycl/detail/kernel_launch_helper.hpp:* {{static assertion failed due to requirement {{.+}}: Floating point control property is supported for ESIMD kernels only.}} syclex::properties properties7{ intelex::fp_control}; @@ -28,7 +28,7 @@ int main(void) { cgh.single_task(properties7, [=]() {}); }); - // expected-error-re@sycl/handler.hpp:* {{static assertion failed due to requirement {{.+}}: Floating point control property is supported for ESIMD kernels only.}} + // expected-error-re@sycl/detail/kernel_launch_helper.hpp:* {{static assertion failed due to requirement {{.+}}: Floating point control property is supported for ESIMD kernels only.}} ESIMDKernel Kern; q.submit([&](handler &cgh) { cgh.parallel_for(range<1>(1), Kern); }); diff --git a/sycl/test/include_deps/sycl_detail_core.hpp.cpp b/sycl/test/include_deps/sycl_detail_core.hpp.cpp index 4e991e78b06c5..8ae13a204adc7 100644 --- a/sycl/test/include_deps/sycl_detail_core.hpp.cpp +++ b/sycl/test/include_deps/sycl_detail_core.hpp.cpp @@ -137,6 +137,8 @@ // CHECK-NEXT: ext/oneapi/kernel_properties/properties.hpp // CHECK-NEXT: ext/oneapi/work_group_scratch_memory.hpp // CHECK-NEXT: detail/sycl_local_mem_builtins.hpp +// CHECK-NEXT: ext/oneapi/experimental/cluster_group_prop.hpp +// CHECK-NEXT: ext/oneapi/experimental/use_root_sync_prop.hpp // CHECK-NEXT: detail/kernel_name_based_cache.hpp // CHECK-NEXT: detail/kernel_name_str_t.hpp // CHECK-NEXT: detail/reduction_forward.hpp @@ -145,9 +147,7 @@ // CHECK-NEXT: ext/oneapi/bindless_images_interop.hpp // CHECK-NEXT: ext/oneapi/interop_common.hpp // CHECK-NEXT: ext/oneapi/bindless_images_mem_handle.hpp -// CHECK-NEXT: ext/oneapi/experimental/cluster_group_prop.hpp // CHECK-NEXT: ext/oneapi/experimental/raw_kernel_arg.hpp -// CHECK-NEXT: ext/oneapi/experimental/use_root_sync_prop.hpp // CHECK-NEXT: kernel.hpp // CHECK-NEXT: sampler.hpp // CHECK-NEXT: feature_test.hpp diff --git a/sycl/test/virtual-functions/properties-negative.cpp b/sycl/test/virtual-functions/properties-negative.cpp index b8e1b75f1d9a9..0ef06b3652ad1 100644 --- a/sycl/test/virtual-functions/properties-negative.cpp +++ b/sycl/test/virtual-functions/properties-negative.cpp @@ -17,15 +17,15 @@ int main() { oneapi::properties props_int{oneapi::indirectly_callable_in}; oneapi::properties props_user{oneapi::indirectly_callable_in}; - // expected-error-re@sycl/handler.hpp:* {{static assertion failed due to requirement {{.*}} indirectly_callable property cannot be applied to SYCL kernels}} + // expected-error-re@sycl/detail/kernel_launch_helper.hpp:* {{static assertion failed due to requirement {{.*}} indirectly_callable property cannot be applied to SYCL kernels}} q.single_task(props_empty, [=]() {}); // When both "props_empty" and "props_void" are in use, we won't see the // static assert firing for the second one, because there will be only one // instantiation of handler::processProperties. q.single_task(props_void, [=]() {}); - // expected-error-re@sycl/handler.hpp:* {{static assertion failed due to requirement {{.*}} indirectly_callable property cannot be applied to SYCL kernels}} + // expected-error-re@sycl/detail/kernel_launch_helper.hpp:* {{static assertion failed due to requirement {{.*}} indirectly_callable property cannot be applied to SYCL kernels}} q.single_task(props_int, [=]() {}); - // expected-error-re@sycl/handler.hpp:* {{static assertion failed due to requirement {{.*}} indirectly_callable property cannot be applied to SYCL kernels}} + // expected-error-re@sycl/detail/kernel_launch_helper.hpp:* {{static assertion failed due to requirement {{.*}} indirectly_callable property cannot be applied to SYCL kernels}} q.single_task(props_user, [=]() {}); return 0; From bb8875b25157228176936eaf529df3c4391306f3 Mon Sep 17 00:00:00 2001 From: "Agarwal, Udit" Date: Fri, 25 Jul 2025 21:05:45 +0200 Subject: [PATCH 04/25] Fix conditional property parsing --- .../sycl/detail/kernel_launch_helper.hpp | 196 ++++++++++++------ sycl/include/sycl/handler.hpp | 85 ++++---- sycl/source/CMakeLists.txt | 1 - sycl/source/detail/handler_impl.hpp | 2 +- sycl/source/detail/kernel_launch_helper.cpp | 70 ------- sycl/source/handler.cpp | 47 ++++- 6 files changed, 207 insertions(+), 194 deletions(-) delete mode 100644 sycl/source/detail/kernel_launch_helper.cpp diff --git a/sycl/include/sycl/detail/kernel_launch_helper.hpp b/sycl/include/sycl/detail/kernel_launch_helper.hpp index 5dc3f35d699e1..2ad0026cfccca 100644 --- a/sycl/include/sycl/detail/kernel_launch_helper.hpp +++ b/sycl/include/sycl/detail/kernel_launch_helper.hpp @@ -12,13 +12,13 @@ #include #include #include +#include +#include +#include #include #include #include #include -#include -#include -#include #include #include @@ -278,56 +278,118 @@ class KernelLaunchPropertyWrapper { std::array MKernelClusterSize = {0, 0, 0}; struct KernelLaunchPropertiesT { - StableKernelCacheConfig MKernelCacheConfig = - StableKernelCacheConfig::Default; - bool MKernelIsCooperative = false; - uint32_t MKernelWorkGroupMemorySize = 0; - bool MKernelUsesClusterLaunch = false; - size_t MKernelClusterDims = 0; - std::array MKernelClusterSize = {0, 0, 0}; + std::optional MKernelCacheConfig; + std::optional MKernelIsCooperative; + std::optional MKernelWorkGroupMemorySize; + std::optional MKernelUsesClusterLaunch; + size_t MKernelClusterDims; + std::array MKernelClusterSize; + + KernelLaunchPropertiesT() { + MKernelCacheConfig = std::nullopt; + MKernelIsCooperative = std::nullopt; + MKernelWorkGroupMemorySize = std::nullopt; + MKernelUsesClusterLaunch = std::nullopt; + MKernelClusterDims = 0; + MKernelClusterSize.fill(0); + } }; - template < - syclex::detail::UnsupportedGraphFeatures FeatureT> + template static void throwIfGraphAssociated(bool HasGraph) { if (!HasGraph) return; std::string FeatureString = - syclex::detail::UnsupportedFeatureToString( - FeatureT); + syclex::detail::UnsupportedFeatureToString(FeatureT); throw sycl::exception(sycl::make_error_code(errc::invalid), "The " + FeatureString + " feature is not yet available " "for use with the SYCL Graph extension."); } - static void - verifyDeviceHasProgressGuarantee(device_impl &dev, - syclex::forward_progress_guarantee guarantee, - syclex::execution_scope threadScope, - syclex::execution_scope coordinationScope, - KernelLaunchPropertiesT &prop); +private: + static void verifyDeviceHasProgressGuarantee( + bool supported, syclex::forward_progress_guarantee guarantee, + syclex::execution_scope threadScope, KernelLaunchPropertiesT &prop) { + using execution_scope = syclex::execution_scope; + using forward_progress = syclex::forward_progress_guarantee; + + if (threadScope == execution_scope::work_group) { + if (!supported) { + throw sycl::exception( + sycl::errc::feature_not_supported, + "Required progress guarantee for work groups is not " + "supported by this device."); + } + // If we are here, the device supports the guarantee required but there is + // a caveat in that if the guarantee required is a concurrent guarantee, + // then we most likely also need to enable cooperative launch of the + // kernel. That is, although the device supports the required guarantee, + // some setup work is needed to truly make the device provide that + // guarantee at runtime. Otherwise, we will get the default guarantee + // which is weaker than concurrent. Same reasoning applies for sub_group + // but not for work_item. + // TODO: Further design work is probably needed to reflect this behavior + // in Unified Runtime. + if (guarantee == forward_progress::concurrent) + prop.MKernelIsCooperative = true; + } else if (threadScope == execution_scope::sub_group) { + if (!supported) { + throw sycl::exception( + sycl::errc::feature_not_supported, + "Required progress guarantee for sub groups is not " + "supported by this device."); + } + // Same reasoning as above. + if (guarantee == forward_progress::concurrent) + prop.MKernelIsCooperative = true; + } else { // threadScope is execution_scope::work_item otherwise undefined + // behavior + if (!supported) { + throw sycl::exception( + sycl::errc::feature_not_supported, + "Required progress guarantee for work items is not " + "supported by this device."); + } + } + } + +public: /// Process runtime kernel properties. /// /// Stores information about kernel properties into the handler. - template - static KernelLaunchPropertiesT processLaunchProperties(PropertiesT Props, - bool HasGraph, device_impl &dev) { + template + static KernelLaunchPropertiesT + processLaunchProperties(PropertiesT Props, PropertyProcessorPtrT p) { KernelLaunchPropertiesT retval; + bool HasGraph = p->isKernelAssociatedWithGraph(); // Process Kernel cache configuration property. { + auto ConvertToURCacheConfig = [](StableKernelCacheConfig cc) { + switch (cc) { + case StableKernelCacheConfig::Default: + return UR_KERNEL_CACHE_CONFIG_DEFAULT; + case StableKernelCacheConfig::LargeSLM: + return UR_KERNEL_CACHE_CONFIG_LARGE_SLM; + case StableKernelCacheConfig::LargeData: + return UR_KERNEL_CACHE_CONFIG_LARGE_DATA; + } + }; + if constexpr (PropertiesT::template has_property< sycl::ext::intel::experimental::cache_config_key>()) { auto Config = Props.template get_property< sycl::ext::intel::experimental::cache_config_key>(); if (Config == sycl::ext::intel::experimental::large_slm) { - retval.MKernelCacheConfig = StableKernelCacheConfig::LargeSLM; + retval.MKernelCacheConfig = + ConvertToURCacheConfig(StableKernelCacheConfig::LargeSLM); } else if (Config == sycl::ext::intel::experimental::large_data) { - retval.MKernelCacheConfig = StableKernelCacheConfig::LargeData; + retval.MKernelCacheConfig = + ConvertToURCacheConfig(StableKernelCacheConfig::LargeData); } } else { std::ignore = Props; @@ -348,11 +410,13 @@ class KernelLaunchPropertyWrapper { work_group_progress_key>()) { auto prop = Props.template get_property< sycl::ext::oneapi::experimental::work_group_progress_key>(); - verifyDeviceHasProgressGuarantee( - dev, + bool supported = p->deviceSupportForwardProgress( prop.guarantee, sycl::ext::oneapi::experimental::execution_scope::work_group, - prop.coordinationScope, + prop.coordinationScope); + verifyDeviceHasProgressGuarantee( + supported, prop.guarantee, + sycl::ext::oneapi::experimental::execution_scope::work_group, retval); } if constexpr (PropertiesT::template has_property< @@ -360,11 +424,13 @@ class KernelLaunchPropertyWrapper { sub_group_progress_key>()) { auto prop = Props.template get_property< sycl::ext::oneapi::experimental::sub_group_progress_key>(); - verifyDeviceHasProgressGuarantee( - dev, + bool supported = p->deviceSupportForwardProgress( prop.guarantee, sycl::ext::oneapi::experimental::execution_scope::sub_group, - prop.coordinationScope, + prop.coordinationScope); + verifyDeviceHasProgressGuarantee( + supported, prop.guarantee, + sycl::ext::oneapi::experimental::execution_scope::sub_group, retval); } if constexpr (PropertiesT::template has_property< @@ -372,11 +438,13 @@ class KernelLaunchPropertyWrapper { work_item_progress_key>()) { auto prop = Props.template get_property< sycl::ext::oneapi::experimental::work_item_progress_key>(); - verifyDeviceHasProgressGuarantee( - dev, + bool supported = p->deviceSupportForwardProgress( prop.guarantee, sycl::ext::oneapi::experimental::execution_scope::work_item, - prop.coordinationScope, + prop.coordinationScope); + verifyDeviceHasProgressGuarantee( + supported, prop.guarantee, + sycl::ext::oneapi::experimental::execution_scope::work_item, retval); } } @@ -387,7 +455,8 @@ class KernelLaunchPropertyWrapper { sycl::ext::oneapi::experimental:: work_group_scratch_size>()) { throwIfGraphAssociated(HasGraph); + sycl_ext_oneapi_work_group_scratch_memory>( + HasGraph); auto WorkGroupMemSize = Props.template get_property< sycl::ext::oneapi::experimental::work_group_scratch_size>(); retval.MKernelWorkGroupMemorySize = WorkGroupMemSize.size; @@ -400,12 +469,12 @@ class KernelLaunchPropertyWrapper { syclex::detail::getClusterDim(); if constexpr (ClusterDim > 0) { throwIfGraphAssociated< - syclex::detail::UnsupportedGraphFeatures:: - sycl_ext_oneapi_experimental_cuda_cluster_launch>(HasGraph); + syclex::detail::UnsupportedGraphFeatures:: + sycl_ext_oneapi_experimental_cuda_cluster_launch>(HasGraph); auto ClusterSize = Props - .template get_property< - syclex::cuda::cluster_size_key>() - .get_cluster_size(); + .template get_property< + syclex::cuda::cluster_size_key>() + .get_cluster_size(); retval.MKernelUsesClusterLaunch = true; retval.MKernelClusterDims = ClusterDim; if (ClusterDim == 1) { @@ -419,11 +488,11 @@ class KernelLaunchPropertyWrapper { retval.MKernelClusterSize[2] = ClusterSize[2]; } else { assert(ClusterDim > 3 && - "Only 1D, 2D, and 3D cluster launch is supported."); + "Only 1D, 2D, and 3D cluster launch is supported."); } } } - + return retval; } @@ -436,10 +505,10 @@ class KernelLaunchPropertyWrapper { /// kernel, even though body of those instantiated functions could be almost /// the same, thus unnecessary increasing compilation time. template < - bool IsESIMDKernel, + bool IsESIMDKernel, typename PropertyProcessorPtrT, typename PropertiesT = ext::oneapi::experimental::empty_properties_t> static KernelLaunchPropertiesT processProperties(PropertiesT Props, - bool HasGraph, device_impl &dev) { + PropertyProcessorPtrT p) { static_assert( ext::oneapi::experimental::is_property_list::value, "Template type is not a property list."); @@ -455,21 +524,21 @@ class KernelLaunchPropertyWrapper { sycl::ext::oneapi::experimental::indirectly_callable_key>(), "indirectly_callable property cannot be applied to SYCL kernels"); - return processLaunchProperties(Props, HasGraph, dev); + return processLaunchProperties(Props, p); } - template - static std::optional parseProperties( - [[maybe_unused]] const KernelType &KernelFunc, - [[maybe_unused]] bool HasGraph, - [[maybe_unused]] device_impl &dev) { + template + static std::optional + parseProperties([[maybe_unused]] const KernelType &KernelFunc, + [[maybe_unused]] PropertyProcessorPtrT p) { #ifndef __SYCL_DEVICE_ONLY__ // If there are properties provided by get method then process them. if constexpr (ext::oneapi::experimental::detail:: HasKernelPropertiesGetMethod::value) { return processProperties()>( - KernelFunc.get(ext::oneapi::experimental::properties_tag{}), HasGraph, dev); + KernelFunc.get(ext::oneapi::experimental::properties_tag{}), p); } #endif // If there are no properties provided by get method then return empty @@ -478,24 +547,21 @@ class KernelLaunchPropertyWrapper { } void parseAndSetKernelLaunchProperties(KernelLaunchPropertiesT &props) { - switch (props.MKernelCacheConfig) { - case StableKernelCacheConfig::Default: - MKernelCacheConfig = UR_KERNEL_CACHE_CONFIG_DEFAULT; - break; - case StableKernelCacheConfig::LargeSLM: - MKernelCacheConfig = UR_KERNEL_CACHE_CONFIG_LARGE_SLM; - break; - case StableKernelCacheConfig::LargeData: - MKernelCacheConfig = UR_KERNEL_CACHE_CONFIG_LARGE_DATA; - break; - } + if (props.MKernelCacheConfig) + MKernelCacheConfig = *props.MKernelCacheConfig; + + if (props.MKernelIsCooperative) + MKernelIsCooperative = *props.MKernelIsCooperative; - MKernelIsCooperative = props.MKernelIsCooperative; - MKernelWorkGroupMemorySize = props.MKernelWorkGroupMemorySize; - MKernelUsesClusterLaunch = props.MKernelUsesClusterLaunch; + if (props.MKernelWorkGroupMemorySize) + MKernelWorkGroupMemorySize = *props.MKernelWorkGroupMemorySize; + + if (props.MKernelUsesClusterLaunch) { + MKernelUsesClusterLaunch = *props.MKernelUsesClusterLaunch; MKernelClusterDims = props.MKernelClusterDims; MKernelClusterSize = props.MKernelClusterSize; } + } }; // KernelLaunchPropertyWrapper struct } // namespace detail diff --git a/sycl/include/sycl/handler.hpp b/sycl/include/sycl/handler.hpp index 6c711b7941b4e..3177b12d211e3 100644 --- a/sycl/include/sycl/handler.hpp +++ b/sycl/include/sycl/handler.hpp @@ -883,7 +883,6 @@ class __SYCL_EXPORT handler { sycl::ext::oneapi::experimental::execution_scope threadScope, sycl::ext::oneapi::experimental::execution_scope coordinationScope); - template void checkAndSetClusterRange(const Properties &Props) { namespace syclex = sycl::ext::oneapi::experimental; @@ -992,9 +991,8 @@ class __SYCL_EXPORT handler { processLaunchProperties(Props); } +#endif // INTEL_PREVIEW_BREAKING_CHANGES - #endif // INTEL_PREVIEW_BREAKING_CHANGES - /// Checks whether it is possible to copy the source shape to the destination /// shape(the shapes are described by the accessor ranges) by using /// copying by regions of memory and not copying element by element @@ -1102,7 +1100,7 @@ class __SYCL_EXPORT handler { // supported gcc version is bumped. std::tuple, bool> getMaxWorkGroups_v2(); - detail::device_impl& getDeviceImpl(); + detail::device_impl &getDeviceImpl(); template std::tuple, bool> getRoundedRange(range UserRange) { @@ -1303,12 +1301,11 @@ class __SYCL_EXPORT handler { PropertiesT>::wrap(Wrapper); if (auto prop = - detail::KernelLaunchPropertyWrapper::parseProperties( - Wrapper, getCommandGraph() != nullptr, - getDeviceImpl())) { + detail::KernelLaunchPropertyWrapper::parseProperties( + Wrapper, this)) { setKernelLaunchProperties(*prop); } - + #ifndef __SYCL_DEVICE_ONLY__ constexpr detail::string_view Name{detail::getKernelName()}; verifyUsedKernelBundleInternal(Name); @@ -1335,19 +1332,18 @@ class __SYCL_EXPORT handler { detail::KernelWrapper::wrap(KernelFunc); if (auto prop = - detail::KernelLaunchPropertyWrapper::parseProperties( - KernelFunc, getCommandGraph() != nullptr, - getDeviceImpl())) { + detail::KernelLaunchPropertyWrapper::parseProperties( + KernelFunc, this)) { setKernelLaunchProperties(*prop); } #ifndef __SYCL_DEVICE_ONLY__ constexpr detail::string_view Name{detail::getKernelName()}; verifyUsedKernelBundleInternal(Name); - auto ProcessedProps = detail::KernelLaunchPropertyWrapper::processProperties< - detail::isKernelESIMD(), PropertiesT>( - Props, getCommandGraph() != nullptr, - getDeviceImpl()); + auto ProcessedProps = + detail::KernelLaunchPropertyWrapper::processProperties< + detail::isKernelESIMD(), decltype(this), PropertiesT>( + Props, this); setKernelLaunchProperties(ProcessedProps); detail::checkValueRange(UserRange); setNDRangeDescriptor(std::move(UserRange)); @@ -1379,10 +1375,8 @@ class __SYCL_EXPORT handler { detail::checkValueRange(NumWorkItems); setNDRangeDescriptor(std::move(NumWorkItems)); auto ParsedProp = - detail::KernelLaunchPropertyWrapper::processLaunchProperties< - PropertiesT>( - Props, getCommandGraph() != nullptr, - getDeviceImpl()); + detail::KernelLaunchPropertyWrapper::processLaunchProperties< + PropertiesT>(Props, this); setKernelLaunchProperties(ParsedProp); setType(detail::CGType::Kernel); extractArgsAndReqs(); @@ -1409,11 +1403,8 @@ class __SYCL_EXPORT handler { detail::checkValueRange(NDRange); setNDRangeDescriptor(std::move(NDRange)); auto ParsedProp = - detail::KernelLaunchPropertyWrapper::processLaunchProperties - ( - Props, getCommandGraph() != nullptr, - getDeviceImpl() - ); + detail::KernelLaunchPropertyWrapper::processLaunchProperties< + PropertiesT>(Props, this); setKernelLaunchProperties(ParsedProp); setType(detail::CGType::Kernel); extractArgsAndReqs(); @@ -1435,11 +1426,9 @@ class __SYCL_EXPORT handler { (void)Props; detail::KernelWrapper::wrap(KernelFunc); - if (auto prop = - detail::KernelLaunchPropertyWrapper::parseProperties( - KernelFunc, getCommandGraph() != nullptr, - getDeviceImpl())) { - setKernelLaunchProperties(*prop); + if (auto prop = detail::KernelLaunchPropertyWrapper::parseProperties( + KernelFunc, this)) { + setKernelLaunchProperties(*prop); } #ifndef __SYCL_DEVICE_ONLY__ if constexpr (WrapAsVal == detail::WrapAs::single_task) { @@ -1459,10 +1448,10 @@ class __SYCL_EXPORT handler { } StoreLambda(std::move(KernelFunc)); - auto ProcessedProps = detail::KernelLaunchPropertyWrapper::processProperties< - detail::isKernelESIMD(), PropertiesT>( - Props, getCommandGraph() != nullptr, - getDeviceImpl()); + auto ProcessedProps = + detail::KernelLaunchPropertyWrapper::processProperties< + detail::isKernelESIMD(), decltype(this), PropertiesT>(Props, + this); setKernelLaunchProperties(ProcessedProps); #endif } @@ -1486,11 +1475,9 @@ class __SYCL_EXPORT handler { (void)Kernel; detail::KernelWrapper::wrap(KernelFunc); - if (auto prop = - detail::KernelLaunchPropertyWrapper::parseProperties( - KernelFunc, getCommandGraph() != nullptr, - getDeviceImpl())) { - setKernelLaunchProperties(*prop); + if (auto prop = detail::KernelLaunchPropertyWrapper::parseProperties( + KernelFunc, this)) { + setKernelLaunchProperties(*prop); } #ifndef __SYCL_DEVICE_ONLY__ if constexpr (WrapAsVal == detail::WrapAs::single_task) { @@ -1519,9 +1506,10 @@ class __SYCL_EXPORT handler { } else { StoreLambda(std::move(KernelFunc)); } - auto ProcessedProps = detail::KernelLaunchPropertyWrapper::processProperties< - detail::isKernelESIMD(), PropertiesT>( - Props, getCommandGraph() != nullptr, getDeviceImpl()); + auto ProcessedProps = + detail::KernelLaunchPropertyWrapper::processProperties< + detail::isKernelESIMD(), decltype(this), PropertiesT>(Props, + this); setKernelLaunchProperties(ProcessedProps); #endif } @@ -3542,9 +3530,7 @@ class __SYCL_EXPORT handler { void setKernelIsCooperative(bool); // Set using cuda thread block cluster launch flag and set the launch bounds. -#ifndef __INTEL_PREVIEW_BREAKING_CHANGES void setKernelClusterLaunch(sycl::range<3> ClusterSize, int Dims); -#endif void setKernelClusterLaunch(sycl::range<3> ClusterSize); void setKernelClusterLaunch(sycl::range<2> ClusterSize); void setKernelClusterLaunch(sycl::range<1> ClusterSize); @@ -3554,8 +3540,13 @@ class __SYCL_EXPORT handler { #endif void setKernelLaunchProperties( - detail::KernelLaunchPropertyWrapper::KernelLaunchPropertiesT - &KernelLaunchProperties); + detail::KernelLaunchPropertyWrapper::KernelLaunchPropertiesT + &KernelLaunchProperties); + bool isKernelAssociatedWithGraph() const; + bool deviceSupportForwardProgress( + sycl::ext::oneapi::experimental::forward_progress_guarantee guarantee, + sycl::ext::oneapi::experimental::execution_scope threadScope, + sycl::ext::oneapi::experimental::execution_scope coordinationScope) const; // Various checks that are only meaningful for host compilation, because they // result in runtime errors (i.e. exceptions being thrown). To save time @@ -3618,7 +3609,8 @@ class __SYCL_EXPORT handler { template < ext::oneapi::experimental::detail::UnsupportedGraphFeatures FeatureT> void throwIfGraphAssociated() const { - detail::KernelLaunchPropertyWrapper::throwIfGraphAssociated(getCommandGraph() != nullptr); + detail::KernelLaunchPropertyWrapper::throwIfGraphAssociated( + getCommandGraph() != nullptr); } #endif @@ -3693,6 +3685,7 @@ class __SYCL_EXPORT handler { void instantiateKernelOnHost(void *InstantiateKernelOnHostPtr); friend class detail::HandlerAccess; + friend class detail::KernelLaunchPropertyWrapper; #ifdef __INTEL_PREVIEW_BREAKING_CHANGES __SYCL_DLL_LOCAL detail::handler_impl *get_impl() { return impl; } diff --git a/sycl/source/CMakeLists.txt b/sycl/source/CMakeLists.txt index 0cfb276ddc199..c564c63b06b3a 100644 --- a/sycl/source/CMakeLists.txt +++ b/sycl/source/CMakeLists.txt @@ -287,7 +287,6 @@ set(SYCL_COMMON_SOURCES "detail/kernel_compiler/kernel_compiler_opencl.cpp" "detail/kernel_compiler/kernel_compiler_sycl.cpp" "detail/kernel_impl.cpp" - "detail/kernel_launch_helper.cpp" "detail/kernel_name_based_cache.cpp" "detail/kernel_program_cache.cpp" "detail/memory_export.cpp" diff --git a/sycl/source/detail/handler_impl.hpp b/sycl/source/detail/handler_impl.hpp index 7b11ab78c75ec..bb7d0385761a9 100644 --- a/sycl/source/detail/handler_impl.hpp +++ b/sycl/source/detail/handler_impl.hpp @@ -11,8 +11,8 @@ #include "sycl/handler.hpp" #include #include -#include #include +#include namespace sycl { inline namespace _V1 { diff --git a/sycl/source/detail/kernel_launch_helper.cpp b/sycl/source/detail/kernel_launch_helper.cpp deleted file mode 100644 index 8ff7aa7ea231e..0000000000000 --- a/sycl/source/detail/kernel_launch_helper.cpp +++ /dev/null @@ -1,70 +0,0 @@ -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include - -namespace sycl { -inline namespace _V1 { -namespace detail { - - namespace syclex = sycl::ext::oneapi::experimental; - void KernelLaunchPropertyWrapper::verifyDeviceHasProgressGuarantee( - device_impl &dev, - syclex::forward_progress_guarantee guarantee, - syclex::execution_scope threadScope, - syclex::execution_scope coordinationScope, - KernelLaunchPropertiesT &prop) { - using execution_scope = syclex::execution_scope; - using forward_progress = - syclex::forward_progress_guarantee; - const bool supported = dev.supportsForwardProgress( - guarantee, threadScope, coordinationScope); - if (threadScope == execution_scope::work_group) { - if (!supported) { - throw sycl::exception( - sycl::errc::feature_not_supported, - "Required progress guarantee for work groups is not " - "supported by this device."); - } - // If we are here, the device supports the guarantee required but there is a - // caveat in that if the guarantee required is a concurrent guarantee, then - // we most likely also need to enable cooperative launch of the kernel. That - // is, although the device supports the required guarantee, some setup work - // is needed to truly make the device provide that guarantee at runtime. - // Otherwise, we will get the default guarantee which is weaker than - // concurrent. Same reasoning applies for sub_group but not for work_item. - // TODO: Further design work is probably needed to reflect this behavior in - // Unified Runtime. - if (guarantee == forward_progress::concurrent) - prop.MKernelIsCooperative = true; - } else if (threadScope == execution_scope::sub_group) { - if (!supported) { - throw sycl::exception(sycl::errc::feature_not_supported, - "Required progress guarantee for sub groups is not " - "supported by this device."); - } - // Same reasoning as above. - if (guarantee == forward_progress::concurrent) - prop.MKernelIsCooperative = true; - } else { // threadScope is execution_scope::work_item otherwise undefined - // behavior - if (!supported) { - throw sycl::exception(sycl::errc::feature_not_supported, - "Required progress guarantee for work items is not " - "supported by this device."); - } - } - } - -} -} -} \ No newline at end of file diff --git a/sycl/source/handler.cpp b/sycl/source/handler.cpp index 54ab9ffb16103..9c5223cd9634b 100644 --- a/sycl/source/handler.cpp +++ b/sycl/source/handler.cpp @@ -313,7 +313,6 @@ fill_copy_args(detail::handler_impl *impl, 0 /*DestPitch*/, SrcOffset, SrcExtent, DestOffset, DestExtent, CopyExtent); } - } // namespace detail #ifdef __INTEL_PREVIEW_BREAKING_CHANGES @@ -2081,7 +2080,7 @@ void handler::verifyDeviceHasProgressGuarantee( sycl::ext::oneapi::experimental::forward_progress_guarantee guarantee, sycl::ext::oneapi::experimental::execution_scope threadScope, sycl::ext::oneapi::experimental::execution_scope coordinationScope) { - + using execution_scope = sycl::ext::oneapi::experimental::execution_scope; using forward_progress = sycl::ext::oneapi::experimental::forward_progress_guarantee; @@ -2256,35 +2255,61 @@ detail::context_impl &handler::getContextImpl() const { void handler::setKernelLaunchProperties( KernelLaunchPropertyWrapper::KernelLaunchPropertiesT - &KernelLaunchProperties) { + &KernelLaunchProperties) { impl->parseAndSetKernelLaunchProperties(KernelLaunchProperties); if (KernelLaunchProperties.MKernelClusterDims == 1) { - sycl::range<1> ClusterSizeTrimmed = {KernelLaunchProperties.MKernelClusterSize[0]}; + sycl::range<1> ClusterSizeTrimmed = { + KernelLaunchProperties.MKernelClusterSize[0]}; impl->MNDRDesc.setClusterDimensions(ClusterSizeTrimmed); } else if (KernelLaunchProperties.MKernelClusterDims == 2) { - sycl::range<2> ClusterSizeTrimmed = - {KernelLaunchProperties.MKernelClusterSize[0], + sycl::range<2> ClusterSizeTrimmed = { + KernelLaunchProperties.MKernelClusterSize[0], KernelLaunchProperties.MKernelClusterSize[1]}; impl->MNDRDesc.setClusterDimensions(ClusterSizeTrimmed); } else if (KernelLaunchProperties.MKernelClusterDims == 3) { - sycl::range<3> ClusterSizeTrimmed = - {KernelLaunchProperties.MKernelClusterSize[0], + sycl::range<3> ClusterSizeTrimmed = { + KernelLaunchProperties.MKernelClusterSize[0], KernelLaunchProperties.MKernelClusterSize[1], KernelLaunchProperties.MKernelClusterSize[2]}; impl->MNDRDesc.setClusterDimensions(ClusterSizeTrimmed); } } -device_impl& handler::getDeviceImpl() { - return impl->get_device(); +bool handler::isKernelAssociatedWithGraph() const { + return getCommandGraph() != nullptr; +} + +bool handler::deviceSupportForwardProgress( + sycl::ext::oneapi::experimental::forward_progress_guarantee guarantee, + sycl::ext::oneapi::experimental::execution_scope threadScope, + sycl::ext::oneapi::experimental::execution_scope coordinationScope) const { + + return impl->get_device().supportsForwardProgress(guarantee, threadScope, + coordinationScope); } +device_impl &handler::getDeviceImpl() { return impl->get_device(); } + #ifndef __INTEL_PREVIEW_BREAKING_CHANGES +void handler::setKernelCacheConfig(handler::StableKernelCacheConfig Config) { + switch (Config) { + case handler::StableKernelCacheConfig::Default: + impl->MKernelCacheConfig = UR_KERNEL_CACHE_CONFIG_DEFAULT; + break; + case handler::StableKernelCacheConfig::LargeSLM: + impl->MKernelCacheConfig = UR_KERNEL_CACHE_CONFIG_LARGE_SLM; + break; + case handler::StableKernelCacheConfig::LargeData: + impl->MKernelCacheConfig = UR_KERNEL_CACHE_CONFIG_LARGE_DATA; + break; + } +} + void handler::setKernelIsCooperative(bool IsCooperative) { impl->MKernelIsCooperative = IsCooperative; -} +} void handler::setKernelClusterLaunch(sycl::range<3> ClusterSize, int Dims) { throwIfGraphAssociated< From 681258f62ddecf4ec9b3931457d936b72ad4cfc5 Mon Sep 17 00:00:00 2001 From: "Agarwal, Udit" Date: Fri, 25 Jul 2025 22:33:10 +0200 Subject: [PATCH 05/25] Fix root_sync property parsing --- sycl/include/sycl/detail/kernel_launch_helper.hpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/sycl/include/sycl/detail/kernel_launch_helper.hpp b/sycl/include/sycl/detail/kernel_launch_helper.hpp index 2ad0026cfccca..25c97de826ff7 100644 --- a/sycl/include/sycl/detail/kernel_launch_helper.hpp +++ b/sycl/include/sycl/detail/kernel_launch_helper.hpp @@ -400,7 +400,8 @@ class KernelLaunchPropertyWrapper { { constexpr bool UsesRootSync = PropertiesT::template has_property< sycl::ext::oneapi::experimental::use_root_sync_key>(); - retval.MKernelIsCooperative = UsesRootSync; + if (UsesRootSync) + retval.MKernelIsCooperative = UsesRootSync; } // Process device progress properties. From 3c3d3f6d2de82abbf7c97dcdc0267e6dae985aa5 Mon Sep 17 00:00:00 2001 From: "Agarwal, Udit" Date: Fri, 25 Jul 2025 22:49:31 +0200 Subject: [PATCH 06/25] Fix sycl_detail_core.hpp.cpp after merge --- sycl/test/include_deps/sycl_detail_core.hpp.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sycl/test/include_deps/sycl_detail_core.hpp.cpp b/sycl/test/include_deps/sycl_detail_core.hpp.cpp index 8ae13a204adc7..377c883a57f21 100644 --- a/sycl/test/include_deps/sycl_detail_core.hpp.cpp +++ b/sycl/test/include_deps/sycl_detail_core.hpp.cpp @@ -133,12 +133,12 @@ // CHECK-NEXT: detail/kernel_launch_helper.hpp // CHECK-NEXT: ext/intel/experimental/fp_control_kernel_properties.hpp // CHECK-NEXT: ext/intel/experimental/kernel_execution_properties.hpp +// CHECK-NEXT: ext/oneapi/experimental/cluster_group_prop.hpp +// CHECK-NEXT: ext/oneapi/experimental/use_root_sync_prop.hpp // CHECK-NEXT: ext/oneapi/experimental/virtual_functions.hpp // CHECK-NEXT: ext/oneapi/kernel_properties/properties.hpp // CHECK-NEXT: ext/oneapi/work_group_scratch_memory.hpp // CHECK-NEXT: detail/sycl_local_mem_builtins.hpp -// CHECK-NEXT: ext/oneapi/experimental/cluster_group_prop.hpp -// CHECK-NEXT: ext/oneapi/experimental/use_root_sync_prop.hpp // CHECK-NEXT: detail/kernel_name_based_cache.hpp // CHECK-NEXT: detail/kernel_name_str_t.hpp // CHECK-NEXT: detail/reduction_forward.hpp From 70137e163004e23a5c1a251f439cbcfb4d8ee6e4 Mon Sep 17 00:00:00 2001 From: "Agarwal, Udit" Date: Sun, 27 Jul 2025 03:30:13 +0200 Subject: [PATCH 07/25] Beautify code --- .../sycl/detail/kernel_launch_helper.hpp | 66 +++++++++++-------- sycl/source/handler.cpp | 34 +++++----- .../arg_mask/EliminatedArgMask.cpp | 6 +- .../scheduler/SchedulerTestUtils.hpp | 7 +- .../scheduler/StreamInitDependencyOnHost.cpp | 5 +- 5 files changed, 68 insertions(+), 50 deletions(-) diff --git a/sycl/include/sycl/detail/kernel_launch_helper.hpp b/sycl/include/sycl/detail/kernel_launch_helper.hpp index 25c97de826ff7..28eba24b6e893 100644 --- a/sycl/include/sycl/detail/kernel_launch_helper.hpp +++ b/sycl/include/sycl/detail/kernel_launch_helper.hpp @@ -268,15 +268,11 @@ class KernelLaunchPropertyWrapper { LargeData = 2 }; - ur_kernel_cache_config_t MKernelCacheConfig = UR_KERNEL_CACHE_CONFIG_DEFAULT; - bool MKernelIsCooperative = false; - uint32_t MKernelWorkGroupMemorySize = 0; - - // Kernel Cluster launch - bool MKernelUsesClusterLaunch = false; - size_t MKernelClusterDims = 0; - std::array MKernelClusterSize = {0, 0, 0}; - + // This struct is used to store kernel launch properties. + // std::optional is used to indicate that the property is not set. + // In some code paths, kernel launch properties are set multiple times + // for the same kernel, i.e why using std::optional to avoid overriding + // previously set properties. struct KernelLaunchPropertiesT { std::optional MKernelCacheConfig; std::optional MKernelIsCooperative; @@ -293,8 +289,43 @@ class KernelLaunchPropertyWrapper { MKernelClusterDims = 0; MKernelClusterSize.fill(0); } + + KernelLaunchPropertiesT &operator=(const KernelLaunchPropertiesT &Other) { + if (Other.MKernelCacheConfig) + MKernelCacheConfig = Other.MKernelCacheConfig; + + if (Other.MKernelIsCooperative) + MKernelIsCooperative = Other.MKernelIsCooperative; + + if (Other.MKernelWorkGroupMemorySize) + MKernelWorkGroupMemorySize = Other.MKernelWorkGroupMemorySize; + + if (Other.MKernelUsesClusterLaunch) { + MKernelUsesClusterLaunch = Other.MKernelUsesClusterLaunch; + MKernelClusterDims = Other.MKernelClusterDims; + MKernelClusterSize = Other.MKernelClusterSize; + } + + return *this; + } + + KernelLaunchPropertiesT(const KernelLaunchPropertiesT &Other) { + *this = Other; + } }; + KernelLaunchPropertiesT KLProps; + + // Set default values for kernel launch properties. + KernelLaunchPropertyWrapper() { + KLProps.MKernelCacheConfig = UR_KERNEL_CACHE_CONFIG_DEFAULT; + KLProps.MKernelIsCooperative = false; + KLProps.MKernelWorkGroupMemorySize = 0; + KLProps.MKernelUsesClusterLaunch = false; + KLProps.MKernelClusterDims = 0; + KLProps.MKernelClusterSize.fill(0); + } + template static void throwIfGraphAssociated(bool HasGraph) { if (!HasGraph) @@ -546,23 +577,6 @@ class KernelLaunchPropertyWrapper { // optional. return std::nullopt; } - - void parseAndSetKernelLaunchProperties(KernelLaunchPropertiesT &props) { - if (props.MKernelCacheConfig) - MKernelCacheConfig = *props.MKernelCacheConfig; - - if (props.MKernelIsCooperative) - MKernelIsCooperative = *props.MKernelIsCooperative; - - if (props.MKernelWorkGroupMemorySize) - MKernelWorkGroupMemorySize = *props.MKernelWorkGroupMemorySize; - - if (props.MKernelUsesClusterLaunch) { - MKernelUsesClusterLaunch = *props.MKernelUsesClusterLaunch; - MKernelClusterDims = props.MKernelClusterDims; - MKernelClusterSize = props.MKernelClusterSize; - } - } }; // KernelLaunchPropertyWrapper struct } // namespace detail diff --git a/sycl/source/handler.cpp b/sycl/source/handler.cpp index 9c5223cd9634b..14db8b3f1c796 100644 --- a/sycl/source/handler.cpp +++ b/sycl/source/handler.cpp @@ -626,9 +626,11 @@ event handler::finalize() { impl->get_queue(), impl->MNDRDesc, impl->MArgs, KernelBundleImpPtr, MKernel.get(), toKernelNameStrT(MKernelName), impl->MKernelNameBasedCachePtr, RawEvents, ResultEvent.get(), - nullptr, impl->MKernelCacheConfig, impl->MKernelIsCooperative, - impl->MKernelUsesClusterLaunch, impl->MKernelWorkGroupMemorySize, - BinImage, impl->MKernelFuncPtr, impl->MKernelNumArgs, + nullptr, *impl->KLProps.MKernelCacheConfig, + *impl->KLProps.MKernelIsCooperative, + *impl->KLProps.MKernelUsesClusterLaunch, + *impl->KLProps.MKernelWorkGroupMemorySize, BinImage, + impl->MKernelFuncPtr, impl->MKernelNumArgs, impl->MKernelParamDescGetter, impl->MKernelHasSpecialCaptures); #ifdef XPTI_ENABLE_INSTRUMENTATION if (xptiEnabled) { @@ -688,9 +690,9 @@ event handler::finalize() { std::move(impl->MArgs), toKernelNameStrT(MKernelName), impl->MKernelNameBasedCachePtr, std::move(MStreamStorage), std::move(impl->MAuxiliaryResources), getType(), - impl->MKernelCacheConfig, impl->MKernelIsCooperative, - impl->MKernelUsesClusterLaunch, impl->MKernelWorkGroupMemorySize, - MCodeLoc)); + *impl->KLProps.MKernelCacheConfig, *impl->KLProps.MKernelIsCooperative, + *impl->KLProps.MKernelUsesClusterLaunch, + *impl->KLProps.MKernelWorkGroupMemorySize, MCodeLoc)); break; } case detail::CGType::CopyAccToPtr: @@ -2256,8 +2258,8 @@ detail::context_impl &handler::getContextImpl() const { void handler::setKernelLaunchProperties( KernelLaunchPropertyWrapper::KernelLaunchPropertiesT &KernelLaunchProperties) { - impl->parseAndSetKernelLaunchProperties(KernelLaunchProperties); + impl->KLProps = KernelLaunchProperties; if (KernelLaunchProperties.MKernelClusterDims == 1) { sycl::range<1> ClusterSizeTrimmed = { KernelLaunchProperties.MKernelClusterSize[0]}; @@ -2296,26 +2298,26 @@ device_impl &handler::getDeviceImpl() { return impl->get_device(); } void handler::setKernelCacheConfig(handler::StableKernelCacheConfig Config) { switch (Config) { case handler::StableKernelCacheConfig::Default: - impl->MKernelCacheConfig = UR_KERNEL_CACHE_CONFIG_DEFAULT; + impl->KLProps.MKernelCacheConfig = UR_KERNEL_CACHE_CONFIG_DEFAULT; break; case handler::StableKernelCacheConfig::LargeSLM: - impl->MKernelCacheConfig = UR_KERNEL_CACHE_CONFIG_LARGE_SLM; + impl->KLProps.MKernelCacheConfig = UR_KERNEL_CACHE_CONFIG_LARGE_SLM; break; case handler::StableKernelCacheConfig::LargeData: - impl->MKernelCacheConfig = UR_KERNEL_CACHE_CONFIG_LARGE_DATA; + impl->KLProps.MKernelCacheConfig = UR_KERNEL_CACHE_CONFIG_LARGE_DATA; break; } } void handler::setKernelIsCooperative(bool IsCooperative) { - impl->MKernelIsCooperative = IsCooperative; + impl->KLProps.MKernelIsCooperative = IsCooperative; } void handler::setKernelClusterLaunch(sycl::range<3> ClusterSize, int Dims) { throwIfGraphAssociated< syclex::detail::UnsupportedGraphFeatures:: sycl_ext_oneapi_experimental_cuda_cluster_launch>(); - impl->MKernelUsesClusterLaunch = true; + impl->KLProps.MKernelUsesClusterLaunch = true; if (Dims == 1) { sycl::range<1> ClusterSizeTrimmed = {ClusterSize[0]}; @@ -2332,7 +2334,7 @@ void handler::setKernelClusterLaunch(sycl::range<3> ClusterSize) { throwIfGraphAssociated< syclex::detail::UnsupportedGraphFeatures:: sycl_ext_oneapi_experimental_cuda_cluster_launch>(); - impl->MKernelUsesClusterLaunch = true; + impl->KLProps.MKernelUsesClusterLaunch = true; impl->MNDRDesc.setClusterDimensions(ClusterSize); } @@ -2340,7 +2342,7 @@ void handler::setKernelClusterLaunch(sycl::range<2> ClusterSize) { throwIfGraphAssociated< syclex::detail::UnsupportedGraphFeatures:: sycl_ext_oneapi_experimental_cuda_cluster_launch>(); - impl->MKernelUsesClusterLaunch = true; + impl->KLProps.MKernelUsesClusterLaunch = true; impl->MNDRDesc.setClusterDimensions(ClusterSize); } @@ -2348,14 +2350,14 @@ void handler::setKernelClusterLaunch(sycl::range<1> ClusterSize) { throwIfGraphAssociated< syclex::detail::UnsupportedGraphFeatures:: sycl_ext_oneapi_experimental_cuda_cluster_launch>(); - impl->MKernelUsesClusterLaunch = true; + impl->KLProps.MKernelUsesClusterLaunch = true; impl->MNDRDesc.setClusterDimensions(ClusterSize); } void handler::setKernelWorkGroupMem(size_t Size) { throwIfGraphAssociated(); - impl->MKernelWorkGroupMemorySize = Size; + impl->KLProps.MKernelWorkGroupMemorySize = Size; } #endif // __INTEL_PREVIEW_BREAKING_CHANGES diff --git a/sycl/unittests/program_manager/arg_mask/EliminatedArgMask.cpp b/sycl/unittests/program_manager/arg_mask/EliminatedArgMask.cpp index ab322bce9022b..ed9da96f6c0bc 100644 --- a/sycl/unittests/program_manager/arg_mask/EliminatedArgMask.cpp +++ b/sycl/unittests/program_manager/arg_mask/EliminatedArgMask.cpp @@ -149,9 +149,9 @@ class MockHandler : public sycl::handler { std::move(impl->CGData), std::move(impl->MArgs), CGH->MKernelName.data(), impl->MKernelNameBasedCachePtr, std::move(CGH->MStreamStorage), std::move(impl->MAuxiliaryResources), - impl->MCGType, {}, impl->MKernelIsCooperative, - impl->MKernelUsesClusterLaunch, impl->MKernelWorkGroupMemorySize, - CGH->MCodeLoc)); + impl->MCGType, {}, *impl->KLProps.MKernelIsCooperative, + *impl->KLProps.MKernelUsesClusterLaunch, + *impl->KLProps.MKernelWorkGroupMemorySize, CGH->MCodeLoc)); break; } default: diff --git a/sycl/unittests/scheduler/SchedulerTestUtils.hpp b/sycl/unittests/scheduler/SchedulerTestUtils.hpp index 61ed99dd87cff..0aac3bf114e50 100644 --- a/sycl/unittests/scheduler/SchedulerTestUtils.hpp +++ b/sycl/unittests/scheduler/SchedulerTestUtils.hpp @@ -303,9 +303,10 @@ class MockHandlerCustomFinalize : public MockHandler { getNDRDesc(), std::move(getHostKernel()), getKernel(), std::move(impl->MKernelBundle), std::move(CGData), getArgs(), getKernelName(), impl->MKernelNameBasedCachePtr, getStreamStorage(), - impl->MAuxiliaryResources, getType(), {}, impl->MKernelIsCooperative, - impl->MKernelUsesClusterLaunch, impl->MKernelWorkGroupMemorySize, - getCodeLoc())); + impl->MAuxiliaryResources, getType(), {}, + *impl->KLProps.MKernelIsCooperative, + *impl->KLProps.MKernelUsesClusterLaunch, + *impl->KLProps.MKernelWorkGroupMemorySize, getCodeLoc())); break; } case sycl::detail::CGType::CodeplayHostTask: { diff --git a/sycl/unittests/scheduler/StreamInitDependencyOnHost.cpp b/sycl/unittests/scheduler/StreamInitDependencyOnHost.cpp index d038004b1e1e4..56b6926ec1c19 100644 --- a/sycl/unittests/scheduler/StreamInitDependencyOnHost.cpp +++ b/sycl/unittests/scheduler/StreamInitDependencyOnHost.cpp @@ -35,8 +35,9 @@ class MockHandlerStreamInit : public MockHandler { getRequirements(), getEvents()), getArgs(), getKernelName(), impl->MKernelNameBasedCachePtr, getStreamStorage(), std::move(impl->MAuxiliaryResources), getType(), - {}, impl->MKernelIsCooperative, impl->MKernelUsesClusterLaunch, - impl->MKernelWorkGroupMemorySize, getCodeLoc())); + {}, *impl->KLProps.MKernelIsCooperative, + *impl->KLProps.MKernelUsesClusterLaunch, + *impl->KLProps.MKernelWorkGroupMemorySize, getCodeLoc())); break; } default: From 26660c848eda3ce08de710895bb395bb9ad00bd8 Mon Sep 17 00:00:00 2001 From: "Agarwal, Udit" Date: Sun, 27 Jul 2025 03:49:04 +0200 Subject: [PATCH 08/25] Remove TODOs --- .../sycl/detail/kernel_launch_helper.hpp | 30 +++++++++---------- sycl/include/sycl/handler.hpp | 2 +- 2 files changed, 16 insertions(+), 16 deletions(-) diff --git a/sycl/include/sycl/detail/kernel_launch_helper.hpp b/sycl/include/sycl/detail/kernel_launch_helper.hpp index 28eba24b6e893..43025529a69d1 100644 --- a/sycl/include/sycl/detail/kernel_launch_helper.hpp +++ b/sycl/include/sycl/detail/kernel_launch_helper.hpp @@ -254,13 +254,9 @@ struct KernelWrapper< } }; // KernelWrapper struct -// TODO: Remove this. -namespace syclex = sycl::ext::oneapi::experimental; - -// This class is inherited by handler_impl and sycl::handler is using the static -// methods. -class KernelLaunchPropertyWrapper { -public: +// This struct is inherited by handler_impl and sycl::handler is using the +// static methods. +struct KernelLaunchPropertyWrapper { // Changing values in this will break ABI/API. enum class StableKernelCacheConfig : int32_t { Default = 0, @@ -326,26 +322,30 @@ class KernelLaunchPropertyWrapper { KLProps.MKernelClusterSize.fill(0); } - template + template static void throwIfGraphAssociated(bool HasGraph) { if (!HasGraph) return; std::string FeatureString = - syclex::detail::UnsupportedFeatureToString(FeatureT); + sycl::ext::oneapi::experimental::detail::UnsupportedFeatureToString( + FeatureT); throw sycl::exception(sycl::make_error_code(errc::invalid), "The " + FeatureString + " feature is not yet available " "for use with the SYCL Graph extension."); } -private: static void verifyDeviceHasProgressGuarantee( - bool supported, syclex::forward_progress_guarantee guarantee, - syclex::execution_scope threadScope, KernelLaunchPropertiesT &prop) { + bool supported, + sycl::ext::oneapi::experimental::forward_progress_guarantee guarantee, + sycl::ext::oneapi::experimental::execution_scope threadScope, + KernelLaunchPropertiesT &prop) { - using execution_scope = syclex::execution_scope; - using forward_progress = syclex::forward_progress_guarantee; + using execution_scope = sycl::ext::oneapi::experimental::execution_scope; + using forward_progress = + sycl::ext::oneapi::experimental::forward_progress_guarantee; if (threadScope == execution_scope::work_group) { if (!supported) { @@ -387,7 +387,6 @@ class KernelLaunchPropertyWrapper { } } -public: /// Process runtime kernel properties. /// /// Stores information about kernel properties into the handler. @@ -395,6 +394,7 @@ class KernelLaunchPropertyWrapper { static KernelLaunchPropertiesT processLaunchProperties(PropertiesT Props, PropertyProcessorPtrT p) { + namespace syclex = sycl::ext::oneapi::experimental; KernelLaunchPropertiesT retval; bool HasGraph = p->isKernelAssociatedWithGraph(); diff --git a/sycl/include/sycl/handler.hpp b/sycl/include/sycl/handler.hpp index 3177b12d211e3..8984def20b02e 100644 --- a/sycl/include/sycl/handler.hpp +++ b/sycl/include/sycl/handler.hpp @@ -3685,7 +3685,7 @@ class __SYCL_EXPORT handler { void instantiateKernelOnHost(void *InstantiateKernelOnHostPtr); friend class detail::HandlerAccess; - friend class detail::KernelLaunchPropertyWrapper; + friend struct detail::KernelLaunchPropertyWrapper; #ifdef __INTEL_PREVIEW_BREAKING_CHANGES __SYCL_DLL_LOCAL detail::handler_impl *get_impl() { return impl; } From 4afcd33ae093ac7de371714f9b54069b455668d2 Mon Sep 17 00:00:00 2001 From: "Agarwal, Udit" Date: Sun, 27 Jul 2025 18:28:56 +0200 Subject: [PATCH 09/25] Fix coverity comments --- .../sycl/detail/kernel_launch_helper.hpp | 21 +++++------- sycl/source/handler.cpp | 33 ++++++++++--------- 2 files changed, 27 insertions(+), 27 deletions(-) diff --git a/sycl/include/sycl/detail/kernel_launch_helper.hpp b/sycl/include/sycl/detail/kernel_launch_helper.hpp index 43025529a69d1..b64167e4b0a27 100644 --- a/sycl/include/sycl/detail/kernel_launch_helper.hpp +++ b/sycl/include/sycl/detail/kernel_launch_helper.hpp @@ -277,14 +277,11 @@ struct KernelLaunchPropertyWrapper { size_t MKernelClusterDims; std::array MKernelClusterSize; - KernelLaunchPropertiesT() { - MKernelCacheConfig = std::nullopt; - MKernelIsCooperative = std::nullopt; - MKernelWorkGroupMemorySize = std::nullopt; - MKernelUsesClusterLaunch = std::nullopt; - MKernelClusterDims = 0; - MKernelClusterSize.fill(0); - } + KernelLaunchPropertiesT() + : MKernelCacheConfig(std::nullopt), MKernelIsCooperative(std::nullopt), + MKernelWorkGroupMemorySize(std::nullopt), + MKernelUsesClusterLaunch(std::nullopt), MKernelClusterDims(0), + MKernelClusterSize{0, 0, 0} {} KernelLaunchPropertiesT &operator=(const KernelLaunchPropertiesT &Other) { if (Other.MKernelCacheConfig) @@ -305,9 +302,9 @@ struct KernelLaunchPropertyWrapper { return *this; } - KernelLaunchPropertiesT(const KernelLaunchPropertiesT &Other) { - *this = Other; - } + KernelLaunchPropertiesT(const KernelLaunchPropertiesT &Other) = default; + KernelLaunchPropertiesT(KernelLaunchPropertiesT &&Other) = default; + ~KernelLaunchPropertiesT() = default; }; KernelLaunchPropertiesT KLProps; @@ -519,7 +516,7 @@ struct KernelLaunchPropertyWrapper { retval.MKernelClusterSize[1] = ClusterSize[1]; retval.MKernelClusterSize[2] = ClusterSize[2]; } else { - assert(ClusterDim > 3 && + assert(ClusterDim <= 3 && "Only 1D, 2D, and 3D cluster launch is supported."); } } diff --git a/sycl/source/handler.cpp b/sycl/source/handler.cpp index 14db8b3f1c796..1de8b851dc1f2 100644 --- a/sycl/source/handler.cpp +++ b/sycl/source/handler.cpp @@ -2260,21 +2260,24 @@ void handler::setKernelLaunchProperties( &KernelLaunchProperties) { impl->KLProps = KernelLaunchProperties; - if (KernelLaunchProperties.MKernelClusterDims == 1) { - sycl::range<1> ClusterSizeTrimmed = { - KernelLaunchProperties.MKernelClusterSize[0]}; - impl->MNDRDesc.setClusterDimensions(ClusterSizeTrimmed); - } else if (KernelLaunchProperties.MKernelClusterDims == 2) { - sycl::range<2> ClusterSizeTrimmed = { - KernelLaunchProperties.MKernelClusterSize[0], - KernelLaunchProperties.MKernelClusterSize[1]}; - impl->MNDRDesc.setClusterDimensions(ClusterSizeTrimmed); - } else if (KernelLaunchProperties.MKernelClusterDims == 3) { - sycl::range<3> ClusterSizeTrimmed = { - KernelLaunchProperties.MKernelClusterSize[0], - KernelLaunchProperties.MKernelClusterSize[1], - KernelLaunchProperties.MKernelClusterSize[2]}; - impl->MNDRDesc.setClusterDimensions(ClusterSizeTrimmed); + + if (KernelLaunchProperties.MKernelUsesClusterLaunch) { + if (KernelLaunchProperties.MKernelClusterDims == 1) { + sycl::range<1> ClusterSizeTrimmed = { + KernelLaunchProperties.MKernelClusterSize[0]}; + impl->MNDRDesc.setClusterDimensions(ClusterSizeTrimmed); + } else if (KernelLaunchProperties.MKernelClusterDims == 2) { + sycl::range<2> ClusterSizeTrimmed = { + KernelLaunchProperties.MKernelClusterSize[0], + KernelLaunchProperties.MKernelClusterSize[1]}; + impl->MNDRDesc.setClusterDimensions(ClusterSizeTrimmed); + } else if (KernelLaunchProperties.MKernelClusterDims == 3) { + sycl::range<3> ClusterSizeTrimmed = { + KernelLaunchProperties.MKernelClusterSize[0], + KernelLaunchProperties.MKernelClusterSize[1], + KernelLaunchProperties.MKernelClusterSize[2]}; + impl->MNDRDesc.setClusterDimensions(ClusterSizeTrimmed); + } } } From feb3bd335b2b98808dc02ee3163963a6eea3ccf9 Mon Sep 17 00:00:00 2001 From: "Agarwal, Udit" Date: Mon, 28 Jul 2025 18:36:17 +0200 Subject: [PATCH 10/25] Replace sycl::ext::oneapi::experimental:: with syclex:: --- .../sycl/detail/kernel_launch_helper.hpp | 62 ++++++++----------- 1 file changed, 26 insertions(+), 36 deletions(-) diff --git a/sycl/include/sycl/detail/kernel_launch_helper.hpp b/sycl/include/sycl/detail/kernel_launch_helper.hpp index b64167e4b0a27..91399d6be3e2d 100644 --- a/sycl/include/sycl/detail/kernel_launch_helper.hpp +++ b/sycl/include/sycl/detail/kernel_launch_helper.hpp @@ -426,8 +426,8 @@ struct KernelLaunchPropertyWrapper { // Process Kernel cooperative property. { - constexpr bool UsesRootSync = PropertiesT::template has_property< - sycl::ext::oneapi::experimental::use_root_sync_key>(); + constexpr bool UsesRootSync = + PropertiesT::template has_property(); if (UsesRootSync) retval.MKernelIsCooperative = UsesRootSync; } @@ -435,59 +435,49 @@ struct KernelLaunchPropertyWrapper { // Process device progress properties. { if constexpr (PropertiesT::template has_property< - sycl::ext::oneapi::experimental:: - work_group_progress_key>()) { - auto prop = Props.template get_property< - sycl::ext::oneapi::experimental::work_group_progress_key>(); + syclex::work_group_progress_key>()) { + auto prop = + Props.template get_property(); bool supported = p->deviceSupportForwardProgress( - prop.guarantee, - sycl::ext::oneapi::experimental::execution_scope::work_group, + prop.guarantee, syclex::execution_scope::work_group, prop.coordinationScope); - verifyDeviceHasProgressGuarantee( - supported, prop.guarantee, - sycl::ext::oneapi::experimental::execution_scope::work_group, - retval); + verifyDeviceHasProgressGuarantee(supported, prop.guarantee, + syclex::execution_scope::work_group, + retval); } if constexpr (PropertiesT::template has_property< - sycl::ext::oneapi::experimental:: - sub_group_progress_key>()) { - auto prop = Props.template get_property< - sycl::ext::oneapi::experimental::sub_group_progress_key>(); + syclex::sub_group_progress_key>()) { + auto prop = + Props.template get_property(); bool supported = p->deviceSupportForwardProgress( - prop.guarantee, - sycl::ext::oneapi::experimental::execution_scope::sub_group, + prop.guarantee, syclex::execution_scope::sub_group, prop.coordinationScope); - verifyDeviceHasProgressGuarantee( - supported, prop.guarantee, - sycl::ext::oneapi::experimental::execution_scope::sub_group, - retval); + verifyDeviceHasProgressGuarantee(supported, prop.guarantee, + syclex::execution_scope::sub_group, + retval); } if constexpr (PropertiesT::template has_property< - sycl::ext::oneapi::experimental:: - work_item_progress_key>()) { - auto prop = Props.template get_property< - sycl::ext::oneapi::experimental::work_item_progress_key>(); + syclex::work_item_progress_key>()) { + auto prop = + Props.template get_property(); bool supported = p->deviceSupportForwardProgress( - prop.guarantee, - sycl::ext::oneapi::experimental::execution_scope::work_item, + prop.guarantee, syclex::execution_scope::work_item, prop.coordinationScope); - verifyDeviceHasProgressGuarantee( - supported, prop.guarantee, - sycl::ext::oneapi::experimental::execution_scope::work_item, - retval); + verifyDeviceHasProgressGuarantee(supported, prop.guarantee, + syclex::execution_scope::work_item, + retval); } } // Process work group scratch memory property. { if constexpr (PropertiesT::template has_property< - sycl::ext::oneapi::experimental:: - work_group_scratch_size>()) { + syclex::work_group_scratch_size>()) { throwIfGraphAssociated( HasGraph); - auto WorkGroupMemSize = Props.template get_property< - sycl::ext::oneapi::experimental::work_group_scratch_size>(); + auto WorkGroupMemSize = + Props.template get_property(); retval.MKernelWorkGroupMemorySize = WorkGroupMemSize.size; } } From d281749e3b67083ac5da6f5051bade07aca9fe90 Mon Sep 17 00:00:00 2001 From: "Agarwal, Udit" Date: Mon, 28 Jul 2025 19:28:30 +0200 Subject: [PATCH 11/25] Rename MKernelCacheConfig to MCacheConfig, and similar renaming --- .../sycl/detail/kernel_launch_helper.hpp | 79 +++++++++---------- sycl/source/handler.cpp | 52 ++++++------ .../arg_mask/EliminatedArgMask.cpp | 6 +- .../scheduler/SchedulerTestUtils.hpp | 5 +- .../scheduler/StreamInitDependencyOnHost.cpp | 5 +- 5 files changed, 72 insertions(+), 75 deletions(-) diff --git a/sycl/include/sycl/detail/kernel_launch_helper.hpp b/sycl/include/sycl/detail/kernel_launch_helper.hpp index 91399d6be3e2d..dc67aeaed5fa4 100644 --- a/sycl/include/sycl/detail/kernel_launch_helper.hpp +++ b/sycl/include/sycl/detail/kernel_launch_helper.hpp @@ -270,33 +270,32 @@ struct KernelLaunchPropertyWrapper { // for the same kernel, i.e why using std::optional to avoid overriding // previously set properties. struct KernelLaunchPropertiesT { - std::optional MKernelCacheConfig; - std::optional MKernelIsCooperative; - std::optional MKernelWorkGroupMemorySize; - std::optional MKernelUsesClusterLaunch; - size_t MKernelClusterDims; - std::array MKernelClusterSize; + std::optional MCacheConfig; + std::optional MIsCooperative; + std::optional MWorkGroupMemorySize; + std::optional MUsesClusterLaunch; + size_t MClusterDims; + std::array MClusterSize; KernelLaunchPropertiesT() - : MKernelCacheConfig(std::nullopt), MKernelIsCooperative(std::nullopt), - MKernelWorkGroupMemorySize(std::nullopt), - MKernelUsesClusterLaunch(std::nullopt), MKernelClusterDims(0), - MKernelClusterSize{0, 0, 0} {} + : MCacheConfig(std::nullopt), MIsCooperative(std::nullopt), + MWorkGroupMemorySize(std::nullopt), MUsesClusterLaunch(std::nullopt), + MClusterDims(0), MClusterSize{0, 0, 0} {} KernelLaunchPropertiesT &operator=(const KernelLaunchPropertiesT &Other) { - if (Other.MKernelCacheConfig) - MKernelCacheConfig = Other.MKernelCacheConfig; + if (Other.MCacheConfig) + MCacheConfig = Other.MCacheConfig; - if (Other.MKernelIsCooperative) - MKernelIsCooperative = Other.MKernelIsCooperative; + if (Other.MIsCooperative) + MIsCooperative = Other.MIsCooperative; - if (Other.MKernelWorkGroupMemorySize) - MKernelWorkGroupMemorySize = Other.MKernelWorkGroupMemorySize; + if (Other.MWorkGroupMemorySize) + MWorkGroupMemorySize = Other.MWorkGroupMemorySize; - if (Other.MKernelUsesClusterLaunch) { - MKernelUsesClusterLaunch = Other.MKernelUsesClusterLaunch; - MKernelClusterDims = Other.MKernelClusterDims; - MKernelClusterSize = Other.MKernelClusterSize; + if (Other.MUsesClusterLaunch) { + MUsesClusterLaunch = Other.MUsesClusterLaunch; + MClusterDims = Other.MClusterDims; + MClusterSize = Other.MClusterSize; } return *this; @@ -311,12 +310,12 @@ struct KernelLaunchPropertyWrapper { // Set default values for kernel launch properties. KernelLaunchPropertyWrapper() { - KLProps.MKernelCacheConfig = UR_KERNEL_CACHE_CONFIG_DEFAULT; - KLProps.MKernelIsCooperative = false; - KLProps.MKernelWorkGroupMemorySize = 0; - KLProps.MKernelUsesClusterLaunch = false; - KLProps.MKernelClusterDims = 0; - KLProps.MKernelClusterSize.fill(0); + KLProps.MCacheConfig = UR_KERNEL_CACHE_CONFIG_DEFAULT; + KLProps.MIsCooperative = false; + KLProps.MWorkGroupMemorySize = 0; + KLProps.MUsesClusterLaunch = false; + KLProps.MClusterDims = 0; + KLProps.MClusterSize.fill(0); } template (); if (Config == sycl::ext::intel::experimental::large_slm) { - retval.MKernelCacheConfig = + retval.MCacheConfig = ConvertToURCacheConfig(StableKernelCacheConfig::LargeSLM); } else if (Config == sycl::ext::intel::experimental::large_data) { - retval.MKernelCacheConfig = + retval.MCacheConfig = ConvertToURCacheConfig(StableKernelCacheConfig::LargeData); } } else { @@ -429,7 +428,7 @@ struct KernelLaunchPropertyWrapper { constexpr bool UsesRootSync = PropertiesT::template has_property(); if (UsesRootSync) - retval.MKernelIsCooperative = UsesRootSync; + retval.MIsCooperative = UsesRootSync; } // Process device progress properties. @@ -478,7 +477,7 @@ struct KernelLaunchPropertyWrapper { HasGraph); auto WorkGroupMemSize = Props.template get_property(); - retval.MKernelWorkGroupMemorySize = WorkGroupMemSize.size; + retval.MWorkGroupMemorySize = WorkGroupMemSize.size; } } @@ -494,17 +493,17 @@ struct KernelLaunchPropertyWrapper { .template get_property< syclex::cuda::cluster_size_key>() .get_cluster_size(); - retval.MKernelUsesClusterLaunch = true; - retval.MKernelClusterDims = ClusterDim; + retval.MUsesClusterLaunch = true; + retval.MClusterDims = ClusterDim; if (ClusterDim == 1) { - retval.MKernelClusterSize[0] = ClusterSize[0]; + retval.MClusterSize[0] = ClusterSize[0]; } else if (ClusterDim == 2) { - retval.MKernelClusterSize[0] = ClusterSize[0]; - retval.MKernelClusterSize[1] = ClusterSize[1]; + retval.MClusterSize[0] = ClusterSize[0]; + retval.MClusterSize[1] = ClusterSize[1]; } else if (ClusterDim == 3) { - retval.MKernelClusterSize[0] = ClusterSize[0]; - retval.MKernelClusterSize[1] = ClusterSize[1]; - retval.MKernelClusterSize[2] = ClusterSize[2]; + retval.MClusterSize[0] = ClusterSize[0]; + retval.MClusterSize[1] = ClusterSize[1]; + retval.MClusterSize[2] = ClusterSize[2]; } else { assert(ClusterDim <= 3 && "Only 1D, 2D, and 3D cluster launch is supported."); diff --git a/sycl/source/handler.cpp b/sycl/source/handler.cpp index 1de8b851dc1f2..3b9f514454189 100644 --- a/sycl/source/handler.cpp +++ b/sycl/source/handler.cpp @@ -626,10 +626,10 @@ event handler::finalize() { impl->get_queue(), impl->MNDRDesc, impl->MArgs, KernelBundleImpPtr, MKernel.get(), toKernelNameStrT(MKernelName), impl->MKernelNameBasedCachePtr, RawEvents, ResultEvent.get(), - nullptr, *impl->KLProps.MKernelCacheConfig, - *impl->KLProps.MKernelIsCooperative, - *impl->KLProps.MKernelUsesClusterLaunch, - *impl->KLProps.MKernelWorkGroupMemorySize, BinImage, + nullptr, *impl->KLProps.MCacheConfig, + *impl->KLProps.MIsCooperative, + *impl->KLProps.MUsesClusterLaunch, + *impl->KLProps.MWorkGroupMemorySize, BinImage, impl->MKernelFuncPtr, impl->MKernelNumArgs, impl->MKernelParamDescGetter, impl->MKernelHasSpecialCaptures); #ifdef XPTI_ENABLE_INSTRUMENTATION @@ -690,9 +690,9 @@ event handler::finalize() { std::move(impl->MArgs), toKernelNameStrT(MKernelName), impl->MKernelNameBasedCachePtr, std::move(MStreamStorage), std::move(impl->MAuxiliaryResources), getType(), - *impl->KLProps.MKernelCacheConfig, *impl->KLProps.MKernelIsCooperative, - *impl->KLProps.MKernelUsesClusterLaunch, - *impl->KLProps.MKernelWorkGroupMemorySize, MCodeLoc)); + *impl->KLProps.MCacheConfig, *impl->KLProps.MIsCooperative, + *impl->KLProps.MUsesClusterLaunch, + *impl->KLProps.MWorkGroupMemorySize, MCodeLoc)); break; } case detail::CGType::CopyAccToPtr: @@ -2261,21 +2261,21 @@ void handler::setKernelLaunchProperties( impl->KLProps = KernelLaunchProperties; - if (KernelLaunchProperties.MKernelUsesClusterLaunch) { - if (KernelLaunchProperties.MKernelClusterDims == 1) { + if (KernelLaunchProperties.MUsesClusterLaunch) { + if (KernelLaunchProperties.MClusterDims == 1) { sycl::range<1> ClusterSizeTrimmed = { - KernelLaunchProperties.MKernelClusterSize[0]}; + KernelLaunchProperties.MClusterSize[0]}; impl->MNDRDesc.setClusterDimensions(ClusterSizeTrimmed); - } else if (KernelLaunchProperties.MKernelClusterDims == 2) { + } else if (KernelLaunchProperties.MClusterDims == 2) { sycl::range<2> ClusterSizeTrimmed = { - KernelLaunchProperties.MKernelClusterSize[0], - KernelLaunchProperties.MKernelClusterSize[1]}; + KernelLaunchProperties.MClusterSize[0], + KernelLaunchProperties.MClusterSize[1]}; impl->MNDRDesc.setClusterDimensions(ClusterSizeTrimmed); - } else if (KernelLaunchProperties.MKernelClusterDims == 3) { + } else if (KernelLaunchProperties.MClusterDims == 3) { sycl::range<3> ClusterSizeTrimmed = { - KernelLaunchProperties.MKernelClusterSize[0], - KernelLaunchProperties.MKernelClusterSize[1], - KernelLaunchProperties.MKernelClusterSize[2]}; + KernelLaunchProperties.MClusterSize[0], + KernelLaunchProperties.MClusterSize[1], + KernelLaunchProperties.MClusterSize[2]}; impl->MNDRDesc.setClusterDimensions(ClusterSizeTrimmed); } } @@ -2301,26 +2301,26 @@ device_impl &handler::getDeviceImpl() { return impl->get_device(); } void handler::setKernelCacheConfig(handler::StableKernelCacheConfig Config) { switch (Config) { case handler::StableKernelCacheConfig::Default: - impl->KLProps.MKernelCacheConfig = UR_KERNEL_CACHE_CONFIG_DEFAULT; + impl->KLProps.MCacheConfig = UR_KERNEL_CACHE_CONFIG_DEFAULT; break; case handler::StableKernelCacheConfig::LargeSLM: - impl->KLProps.MKernelCacheConfig = UR_KERNEL_CACHE_CONFIG_LARGE_SLM; + impl->KLProps.MCacheConfig = UR_KERNEL_CACHE_CONFIG_LARGE_SLM; break; case handler::StableKernelCacheConfig::LargeData: - impl->KLProps.MKernelCacheConfig = UR_KERNEL_CACHE_CONFIG_LARGE_DATA; + impl->KLProps.MCacheConfig = UR_KERNEL_CACHE_CONFIG_LARGE_DATA; break; } } void handler::setKernelIsCooperative(bool IsCooperative) { - impl->KLProps.MKernelIsCooperative = IsCooperative; + impl->KLProps.MIsCooperative = IsCooperative; } void handler::setKernelClusterLaunch(sycl::range<3> ClusterSize, int Dims) { throwIfGraphAssociated< syclex::detail::UnsupportedGraphFeatures:: sycl_ext_oneapi_experimental_cuda_cluster_launch>(); - impl->KLProps.MKernelUsesClusterLaunch = true; + impl->KLProps.MUsesClusterLaunch = true; if (Dims == 1) { sycl::range<1> ClusterSizeTrimmed = {ClusterSize[0]}; @@ -2337,7 +2337,7 @@ void handler::setKernelClusterLaunch(sycl::range<3> ClusterSize) { throwIfGraphAssociated< syclex::detail::UnsupportedGraphFeatures:: sycl_ext_oneapi_experimental_cuda_cluster_launch>(); - impl->KLProps.MKernelUsesClusterLaunch = true; + impl->KLProps.MUsesClusterLaunch = true; impl->MNDRDesc.setClusterDimensions(ClusterSize); } @@ -2345,7 +2345,7 @@ void handler::setKernelClusterLaunch(sycl::range<2> ClusterSize) { throwIfGraphAssociated< syclex::detail::UnsupportedGraphFeatures:: sycl_ext_oneapi_experimental_cuda_cluster_launch>(); - impl->KLProps.MKernelUsesClusterLaunch = true; + impl->KLProps.MUsesClusterLaunch = true; impl->MNDRDesc.setClusterDimensions(ClusterSize); } @@ -2353,14 +2353,14 @@ void handler::setKernelClusterLaunch(sycl::range<1> ClusterSize) { throwIfGraphAssociated< syclex::detail::UnsupportedGraphFeatures:: sycl_ext_oneapi_experimental_cuda_cluster_launch>(); - impl->KLProps.MKernelUsesClusterLaunch = true; + impl->KLProps.MUsesClusterLaunch = true; impl->MNDRDesc.setClusterDimensions(ClusterSize); } void handler::setKernelWorkGroupMem(size_t Size) { throwIfGraphAssociated(); - impl->KLProps.MKernelWorkGroupMemorySize = Size; + impl->KLProps.MWorkGroupMemorySize = Size; } #endif // __INTEL_PREVIEW_BREAKING_CHANGES diff --git a/sycl/unittests/program_manager/arg_mask/EliminatedArgMask.cpp b/sycl/unittests/program_manager/arg_mask/EliminatedArgMask.cpp index ed9da96f6c0bc..9870e0c3c49ce 100644 --- a/sycl/unittests/program_manager/arg_mask/EliminatedArgMask.cpp +++ b/sycl/unittests/program_manager/arg_mask/EliminatedArgMask.cpp @@ -149,9 +149,9 @@ class MockHandler : public sycl::handler { std::move(impl->CGData), std::move(impl->MArgs), CGH->MKernelName.data(), impl->MKernelNameBasedCachePtr, std::move(CGH->MStreamStorage), std::move(impl->MAuxiliaryResources), - impl->MCGType, {}, *impl->KLProps.MKernelIsCooperative, - *impl->KLProps.MKernelUsesClusterLaunch, - *impl->KLProps.MKernelWorkGroupMemorySize, CGH->MCodeLoc)); + impl->MCGType, {}, *impl->KLProps.MIsCooperative, + *impl->KLProps.MUsesClusterLaunch, + *impl->KLProps.MWorkGroupMemorySize, CGH->MCodeLoc)); break; } default: diff --git a/sycl/unittests/scheduler/SchedulerTestUtils.hpp b/sycl/unittests/scheduler/SchedulerTestUtils.hpp index 0aac3bf114e50..d929a3b320b22 100644 --- a/sycl/unittests/scheduler/SchedulerTestUtils.hpp +++ b/sycl/unittests/scheduler/SchedulerTestUtils.hpp @@ -304,9 +304,8 @@ class MockHandlerCustomFinalize : public MockHandler { std::move(impl->MKernelBundle), std::move(CGData), getArgs(), getKernelName(), impl->MKernelNameBasedCachePtr, getStreamStorage(), impl->MAuxiliaryResources, getType(), {}, - *impl->KLProps.MKernelIsCooperative, - *impl->KLProps.MKernelUsesClusterLaunch, - *impl->KLProps.MKernelWorkGroupMemorySize, getCodeLoc())); + *impl->KLProps.MIsCooperative, *impl->KLProps.MUsesClusterLaunch, + *impl->KLProps.MWorkGroupMemorySize, getCodeLoc())); break; } case sycl::detail::CGType::CodeplayHostTask: { diff --git a/sycl/unittests/scheduler/StreamInitDependencyOnHost.cpp b/sycl/unittests/scheduler/StreamInitDependencyOnHost.cpp index 56b6926ec1c19..05d30c163096a 100644 --- a/sycl/unittests/scheduler/StreamInitDependencyOnHost.cpp +++ b/sycl/unittests/scheduler/StreamInitDependencyOnHost.cpp @@ -35,9 +35,8 @@ class MockHandlerStreamInit : public MockHandler { getRequirements(), getEvents()), getArgs(), getKernelName(), impl->MKernelNameBasedCachePtr, getStreamStorage(), std::move(impl->MAuxiliaryResources), getType(), - {}, *impl->KLProps.MKernelIsCooperative, - *impl->KLProps.MKernelUsesClusterLaunch, - *impl->KLProps.MKernelWorkGroupMemorySize, getCodeLoc())); + {}, *impl->KLProps.MIsCooperative, *impl->KLProps.MUsesClusterLaunch, + *impl->KLProps.MWorkGroupMemorySize, getCodeLoc())); break; } default: From 5ffc25f659f8407b6714c1bf8f70650ef915e723 Mon Sep 17 00:00:00 2001 From: "Agarwal, Udit" Date: Mon, 28 Jul 2025 20:04:34 +0200 Subject: [PATCH 12/25] Make KernelLaunchPropertyWrapper empty --- .../sycl/detail/kernel_launch_helper.hpp | 32 +++++++++++-------- sycl/source/detail/handler_impl.hpp | 9 ++++++ 2 files changed, 27 insertions(+), 14 deletions(-) diff --git a/sycl/include/sycl/detail/kernel_launch_helper.hpp b/sycl/include/sycl/detail/kernel_launch_helper.hpp index dc67aeaed5fa4..a652410e91819 100644 --- a/sycl/include/sycl/detail/kernel_launch_helper.hpp +++ b/sycl/include/sycl/detail/kernel_launch_helper.hpp @@ -254,9 +254,9 @@ struct KernelWrapper< } }; // KernelWrapper struct -// This struct is inherited by handler_impl and sycl::handler is using the -// static methods. +// This struct is inherited by sycl::handler. struct KernelLaunchPropertyWrapper { + // Changing values in this will break ABI/API. enum class StableKernelCacheConfig : int32_t { Default = 0, @@ -282,6 +282,18 @@ struct KernelLaunchPropertyWrapper { MWorkGroupMemorySize(std::nullopt), MUsesClusterLaunch(std::nullopt), MClusterDims(0), MClusterSize{0, 0, 0} {} + + KernelLaunchPropertiesT(ur_kernel_cache_config_t _CacheConfig, + bool _IsCooperative, + uint32_t _WorkGroupMemorySize, + bool _UsesClusterLaunch, + size_t _ClusterDims, + std::array _ClusterSize) + : MCacheConfig(_CacheConfig), MIsCooperative(_IsCooperative), + MWorkGroupMemorySize(_WorkGroupMemorySize), + MUsesClusterLaunch(_UsesClusterLaunch), MClusterDims(_ClusterDims), + MClusterSize(_ClusterSize) {} + KernelLaunchPropertiesT &operator=(const KernelLaunchPropertiesT &Other) { if (Other.MCacheConfig) MCacheConfig = Other.MCacheConfig; @@ -306,18 +318,6 @@ struct KernelLaunchPropertyWrapper { ~KernelLaunchPropertiesT() = default; }; - KernelLaunchPropertiesT KLProps; - - // Set default values for kernel launch properties. - KernelLaunchPropertyWrapper() { - KLProps.MCacheConfig = UR_KERNEL_CACHE_CONFIG_DEFAULT; - KLProps.MIsCooperative = false; - KLProps.MWorkGroupMemorySize = 0; - KLProps.MUsesClusterLaunch = false; - KLProps.MClusterDims = 0; - KLProps.MClusterSize.fill(0); - } - template static void throwIfGraphAssociated(bool HasGraph) { @@ -565,6 +565,10 @@ struct KernelLaunchPropertyWrapper { } }; // KernelLaunchPropertyWrapper struct +static_assert(sizeof(KernelLaunchPropertyWrapper) == 1, + "KernelLaunchPropertyWrapper should not have any data members " + "to avoid increasing the size of handler."); + } // namespace detail } // namespace _V1 } // namespace sycl diff --git a/sycl/source/detail/handler_impl.hpp b/sycl/source/detail/handler_impl.hpp index bb7d0385761a9..a5f3614a39697 100644 --- a/sycl/source/detail/handler_impl.hpp +++ b/sycl/source/detail/handler_impl.hpp @@ -105,6 +105,15 @@ class handler_impl : public KernelLaunchPropertyWrapper { // If the pipe operation is read or write, 1 for read 0 for write. bool HostPipeRead = true; + KernelLaunchPropertyWrapper::KernelLaunchPropertiesT KLProps{ + UR_KERNEL_CACHE_CONFIG_DEFAULT, // MCacheConfig + false, // MIsCooperative + 0, // MWorkGroupMemorySize + false, // MUsesClusterLaunch + 0, // MClusterDims + {0, 0, 0} // MClusterSize + }; + // Extra information for bindless image copy ur_image_desc_t MSrcImageDesc = {}; ur_image_desc_t MDstImageDesc = {}; From 17fb4f911a768fc1955fe6a86ef2e5aabcb25eb7 Mon Sep 17 00:00:00 2001 From: "Agarwal, Udit" Date: Mon, 28 Jul 2025 23:40:10 +0200 Subject: [PATCH 13/25] Make sycl::handler inherit KernelLaunchPropertyWrapper --- .../sycl/detail/kernel_launch_helper.hpp | 156 +++++++----------- sycl/include/sycl/handler.hpp | 74 +++++---- sycl/source/detail/handler_impl.hpp | 4 +- sycl/source/handler.cpp | 15 +- sycl/test/abi/layout_handler.cpp | 1 + 5 files changed, 113 insertions(+), 137 deletions(-) diff --git a/sycl/include/sycl/detail/kernel_launch_helper.hpp b/sycl/include/sycl/detail/kernel_launch_helper.hpp index a652410e91819..48c48ddfca0b4 100644 --- a/sycl/include/sycl/detail/kernel_launch_helper.hpp +++ b/sycl/include/sycl/detail/kernel_launch_helper.hpp @@ -255,19 +255,12 @@ struct KernelWrapper< }; // KernelWrapper struct // This struct is inherited by sycl::handler. -struct KernelLaunchPropertyWrapper { - - // Changing values in this will break ABI/API. - enum class StableKernelCacheConfig : int32_t { - Default = 0, - LargeSLM = 1, - LargeData = 2 - }; - +template class KernelLaunchPropertyWrapper { +public: // This struct is used to store kernel launch properties. // std::optional is used to indicate that the property is not set. // In some code paths, kernel launch properties are set multiple times - // for the same kernel, i.e why using std::optional to avoid overriding + // for the same kernel, that is why using std::optional to avoid overriding // previously set properties. struct KernelLaunchPropertiesT { std::optional MCacheConfig; @@ -282,19 +275,16 @@ struct KernelLaunchPropertyWrapper { MWorkGroupMemorySize(std::nullopt), MUsesClusterLaunch(std::nullopt), MClusterDims(0), MClusterSize{0, 0, 0} {} - KernelLaunchPropertiesT(ur_kernel_cache_config_t _CacheConfig, - bool _IsCooperative, - uint32_t _WorkGroupMemorySize, - bool _UsesClusterLaunch, - size_t _ClusterDims, + bool _IsCooperative, uint32_t _WorkGroupMemorySize, + bool _UsesClusterLaunch, size_t _ClusterDims, std::array _ClusterSize) : MCacheConfig(_CacheConfig), MIsCooperative(_IsCooperative), MWorkGroupMemorySize(_WorkGroupMemorySize), MUsesClusterLaunch(_UsesClusterLaunch), MClusterDims(_ClusterDims), MClusterSize(_ClusterSize) {} - KernelLaunchPropertiesT &operator=(const KernelLaunchPropertiesT &Other) { + void takeUnionOfProperties(const KernelLaunchPropertiesT &Other) { if (Other.MCacheConfig) MCacheConfig = Other.MCacheConfig; @@ -309,40 +299,33 @@ struct KernelLaunchPropertyWrapper { MClusterDims = Other.MClusterDims; MClusterSize = Other.MClusterSize; } - - return *this; } + }; // struct KernelLaunchPropertiesT - KernelLaunchPropertiesT(const KernelLaunchPropertiesT &Other) = default; - KernelLaunchPropertiesT(KernelLaunchPropertiesT &&Other) = default; - ~KernelLaunchPropertiesT() = default; +private: +#ifndef __INTEL_PREVIEW_BREAKING_CHANGES + // Modeled after ur_kernel_cache_config_t + enum class StableKernelCacheConfig : int32_t { + Default = 0, + LargeSLM = 1, + LargeData = 2 }; +#endif // __INTEL_PREVIEW_BREAKING_CHANGES - template - static void throwIfGraphAssociated(bool HasGraph) { - if (!HasGraph) - return; - - std::string FeatureString = - sycl::ext::oneapi::experimental::detail::UnsupportedFeatureToString( - FeatureT); - throw sycl::exception(sycl::make_error_code(errc::invalid), - "The " + FeatureString + - " feature is not yet available " - "for use with the SYCL Graph extension."); - } - - static void verifyDeviceHasProgressGuarantee( - bool supported, + void verifyDeviceHasProgressGuarantee( sycl::ext::oneapi::experimental::forward_progress_guarantee guarantee, sycl::ext::oneapi::experimental::execution_scope threadScope, + sycl::ext::oneapi::experimental::execution_scope coordinationScope, KernelLaunchPropertiesT &prop) { using execution_scope = sycl::ext::oneapi::experimental::execution_scope; using forward_progress = sycl::ext::oneapi::experimental::forward_progress_guarantee; + PropertyProcessorT *pp = static_cast(this); + bool supported = pp->deviceSupportForwardProgress(guarantee, threadScope, + coordinationScope); + if (threadScope == execution_scope::work_group) { if (!supported) { throw sycl::exception( @@ -383,40 +366,29 @@ struct KernelLaunchPropertyWrapper { } } +public: /// Process runtime kernel properties. /// /// Stores information about kernel properties into the handler. - template - static KernelLaunchPropertiesT - processLaunchProperties(PropertiesT Props, PropertyProcessorPtrT p) { + template + KernelLaunchPropertiesT processLaunchProperties(PropertiesT Props) { namespace syclex = sycl::ext::oneapi::experimental; KernelLaunchPropertiesT retval; - bool HasGraph = p->isKernelAssociatedWithGraph(); + + PropertyProcessorT *pp = static_cast(this); + bool HasGraph = pp->isKernelAssociatedWithGraph(); // Process Kernel cache configuration property. { - auto ConvertToURCacheConfig = [](StableKernelCacheConfig cc) { - switch (cc) { - case StableKernelCacheConfig::Default: - return UR_KERNEL_CACHE_CONFIG_DEFAULT; - case StableKernelCacheConfig::LargeSLM: - return UR_KERNEL_CACHE_CONFIG_LARGE_SLM; - case StableKernelCacheConfig::LargeData: - return UR_KERNEL_CACHE_CONFIG_LARGE_DATA; - } - }; - if constexpr (PropertiesT::template has_property< sycl::ext::intel::experimental::cache_config_key>()) { auto Config = Props.template get_property< sycl::ext::intel::experimental::cache_config_key>(); if (Config == sycl::ext::intel::experimental::large_slm) { - retval.MCacheConfig = - ConvertToURCacheConfig(StableKernelCacheConfig::LargeSLM); + retval.MCacheConfig = UR_KERNEL_CACHE_CONFIG_LARGE_SLM; } else if (Config == sycl::ext::intel::experimental::large_data) { - retval.MCacheConfig = - ConvertToURCacheConfig(StableKernelCacheConfig::LargeData); + retval.MCacheConfig = UR_KERNEL_CACHE_CONFIG_LARGE_DATA; } } else { std::ignore = Props; @@ -437,34 +409,25 @@ struct KernelLaunchPropertyWrapper { syclex::work_group_progress_key>()) { auto prop = Props.template get_property(); - bool supported = p->deviceSupportForwardProgress( - prop.guarantee, syclex::execution_scope::work_group, - prop.coordinationScope); - verifyDeviceHasProgressGuarantee(supported, prop.guarantee, + verifyDeviceHasProgressGuarantee(prop.guarantee, syclex::execution_scope::work_group, - retval); + prop.coordinationScope, retval); } if constexpr (PropertiesT::template has_property< syclex::sub_group_progress_key>()) { auto prop = Props.template get_property(); - bool supported = p->deviceSupportForwardProgress( - prop.guarantee, syclex::execution_scope::sub_group, - prop.coordinationScope); - verifyDeviceHasProgressGuarantee(supported, prop.guarantee, + verifyDeviceHasProgressGuarantee(prop.guarantee, syclex::execution_scope::sub_group, - retval); + prop.coordinationScope, retval); } if constexpr (PropertiesT::template has_property< syclex::work_item_progress_key>()) { auto prop = Props.template get_property(); - bool supported = p->deviceSupportForwardProgress( - prop.guarantee, syclex::execution_scope::work_item, - prop.coordinationScope); - verifyDeviceHasProgressGuarantee(supported, prop.guarantee, + verifyDeviceHasProgressGuarantee(prop.guarantee, syclex::execution_scope::work_item, - retval); + prop.coordinationScope, retval); } } @@ -472,9 +435,17 @@ struct KernelLaunchPropertyWrapper { { if constexpr (PropertiesT::template has_property< syclex::work_group_scratch_size>()) { - throwIfGraphAssociated( - HasGraph); + if (HasGraph) { + std::string FeatureString = + ext::oneapi::experimental::detail::UnsupportedFeatureToString( + syclex::detail::UnsupportedGraphFeatures:: + sycl_ext_oneapi_work_group_scratch_memory); + throw sycl::exception(sycl::make_error_code(errc::invalid), + "The " + FeatureString + + " feature is not yet available " + "for use with the SYCL Graph extension."); + } + auto WorkGroupMemSize = Props.template get_property(); retval.MWorkGroupMemorySize = WorkGroupMemSize.size; @@ -486,9 +457,17 @@ struct KernelLaunchPropertyWrapper { constexpr std::size_t ClusterDim = syclex::detail::getClusterDim(); if constexpr (ClusterDim > 0) { - throwIfGraphAssociated< - syclex::detail::UnsupportedGraphFeatures:: - sycl_ext_oneapi_experimental_cuda_cluster_launch>(HasGraph); + if (HasGraph) { + std::string FeatureString = + ext::oneapi::experimental::detail::UnsupportedFeatureToString( + syclex::detail::UnsupportedGraphFeatures:: + sycl_ext_oneapi_experimental_cuda_cluster_launch); + throw sycl::exception(sycl::make_error_code(errc::invalid), + "The " + FeatureString + + " feature is not yet available " + "for use with the SYCL Graph extension."); + } + auto ClusterSize = Props .template get_property< syclex::cuda::cluster_size_key>() @@ -523,10 +502,9 @@ struct KernelLaunchPropertyWrapper { /// kernel, even though body of those instantiated functions could be almost /// the same, thus unnecessary increasing compilation time. template < - bool IsESIMDKernel, typename PropertyProcessorPtrT, + bool IsESIMDKernel, typename PropertiesT = ext::oneapi::experimental::empty_properties_t> - static KernelLaunchPropertiesT processProperties(PropertiesT Props, - PropertyProcessorPtrT p) { + KernelLaunchPropertiesT processProperties(PropertiesT Props) { static_assert( ext::oneapi::experimental::is_property_list::value, "Template type is not a property list."); @@ -542,21 +520,19 @@ struct KernelLaunchPropertyWrapper { sycl::ext::oneapi::experimental::indirectly_callable_key>(), "indirectly_callable property cannot be applied to SYCL kernels"); - return processLaunchProperties(Props, p); + return processLaunchProperties(Props); } - template - static std::optional - parseProperties([[maybe_unused]] const KernelType &KernelFunc, - [[maybe_unused]] PropertyProcessorPtrT p) { + template + std::optional + parseProperties([[maybe_unused]] const KernelType &KernelFunc) { #ifndef __SYCL_DEVICE_ONLY__ // If there are properties provided by get method then process them. if constexpr (ext::oneapi::experimental::detail:: HasKernelPropertiesGetMethod::value) { return processProperties()>( - KernelFunc.get(ext::oneapi::experimental::properties_tag{}), p); + KernelFunc.get(ext::oneapi::experimental::properties_tag{})); } #endif // If there are no properties provided by get method then return empty @@ -565,10 +541,6 @@ struct KernelLaunchPropertyWrapper { } }; // KernelLaunchPropertyWrapper struct -static_assert(sizeof(KernelLaunchPropertyWrapper) == 1, - "KernelLaunchPropertyWrapper should not have any data members " - "to avoid increasing the size of handler."); - } // namespace detail } // namespace _V1 } // namespace sycl diff --git a/sycl/include/sycl/handler.hpp b/sycl/include/sycl/handler.hpp index 8984def20b02e..b893778ebd904 100644 --- a/sycl/include/sycl/handler.hpp +++ b/sycl/include/sycl/handler.hpp @@ -416,7 +416,13 @@ template bool range_size_fits_in_size_t(const range &r) { /// \sa kernel /// /// \ingroup sycl_api -class __SYCL_EXPORT handler { +class __SYCL_EXPORT handler + : public detail::KernelLaunchPropertyWrapper { + + static_assert(sizeof(detail::KernelLaunchPropertyWrapper) == 1, + "KernelLaunchPropertyWrapper should not have any data members " + "to avoid increasing the size of handler."); + private: #ifdef __INTEL_PREVIEW_BREAKING_CHANGES /// Constructs SYCL handler from the pre-constructed stack-allocated @@ -1300,9 +1306,7 @@ class __SYCL_EXPORT handler { decltype(Wrapper), TransformedArgType, PropertiesT>::wrap(Wrapper); - if (auto prop = - detail::KernelLaunchPropertyWrapper::parseProperties( - Wrapper, this)) { + if (auto prop = parseProperties(Wrapper)) { setKernelLaunchProperties(*prop); } @@ -1331,19 +1335,16 @@ class __SYCL_EXPORT handler { // kernel is generated detail::KernelWrapper::wrap(KernelFunc); - if (auto prop = - detail::KernelLaunchPropertyWrapper::parseProperties( - KernelFunc, this)) { + if (auto prop = parseProperties(KernelFunc)) { setKernelLaunchProperties(*prop); } #ifndef __SYCL_DEVICE_ONLY__ constexpr detail::string_view Name{detail::getKernelName()}; verifyUsedKernelBundleInternal(Name); - auto ProcessedProps = - detail::KernelLaunchPropertyWrapper::processProperties< - detail::isKernelESIMD(), decltype(this), PropertiesT>( - Props, this); + KernelLaunchPropertiesT ProcessedProps = + KernelLaunchPropertyWrapper::processProperties< + detail::isKernelESIMD(), PropertiesT>(Props); setKernelLaunchProperties(ProcessedProps); detail::checkValueRange(UserRange); setNDRangeDescriptor(std::move(UserRange)); @@ -1374,9 +1375,9 @@ class __SYCL_EXPORT handler { MKernel = detail::getSyclObjImpl(std::move(Kernel)); detail::checkValueRange(NumWorkItems); setNDRangeDescriptor(std::move(NumWorkItems)); - auto ParsedProp = - detail::KernelLaunchPropertyWrapper::processLaunchProperties< - PropertiesT>(Props, this); + KernelLaunchPropertiesT ParsedProp = + KernelLaunchPropertyWrapper::processLaunchProperties( + Props); setKernelLaunchProperties(ParsedProp); setType(detail::CGType::Kernel); extractArgsAndReqs(); @@ -1402,9 +1403,9 @@ class __SYCL_EXPORT handler { MKernel = detail::getSyclObjImpl(std::move(Kernel)); detail::checkValueRange(NDRange); setNDRangeDescriptor(std::move(NDRange)); - auto ParsedProp = - detail::KernelLaunchPropertyWrapper::processLaunchProperties< - PropertiesT>(Props, this); + KernelLaunchPropertiesT ParsedProp = + KernelLaunchPropertyWrapper::processLaunchProperties( + Props); setKernelLaunchProperties(ParsedProp); setType(detail::CGType::Kernel); extractArgsAndReqs(); @@ -1426,8 +1427,7 @@ class __SYCL_EXPORT handler { (void)Props; detail::KernelWrapper::wrap(KernelFunc); - if (auto prop = detail::KernelLaunchPropertyWrapper::parseProperties( - KernelFunc, this)) { + if (auto prop = parseProperties(KernelFunc)) { setKernelLaunchProperties(*prop); } #ifndef __SYCL_DEVICE_ONLY__ @@ -1448,10 +1448,9 @@ class __SYCL_EXPORT handler { } StoreLambda(std::move(KernelFunc)); - auto ProcessedProps = - detail::KernelLaunchPropertyWrapper::processProperties< - detail::isKernelESIMD(), decltype(this), PropertiesT>(Props, - this); + KernelLaunchPropertiesT ProcessedProps = + KernelLaunchPropertyWrapper::processProperties< + detail::isKernelESIMD(), PropertiesT>(Props); setKernelLaunchProperties(ProcessedProps); #endif } @@ -1475,8 +1474,7 @@ class __SYCL_EXPORT handler { (void)Kernel; detail::KernelWrapper::wrap(KernelFunc); - if (auto prop = detail::KernelLaunchPropertyWrapper::parseProperties( - KernelFunc, this)) { + if (auto prop = parseProperties(KernelFunc)) { setKernelLaunchProperties(*prop); } #ifndef __SYCL_DEVICE_ONLY__ @@ -1506,10 +1504,9 @@ class __SYCL_EXPORT handler { } else { StoreLambda(std::move(KernelFunc)); } - auto ProcessedProps = - detail::KernelLaunchPropertyWrapper::processProperties< - detail::isKernelESIMD(), decltype(this), PropertiesT>(Props, - this); + KernelLaunchPropertiesT ProcessedProps = + KernelLaunchPropertyWrapper::processProperties< + detail::isKernelESIMD(), PropertiesT>(Props); setKernelLaunchProperties(ProcessedProps); #endif } @@ -3539,9 +3536,8 @@ class __SYCL_EXPORT handler { void setKernelWorkGroupMem(size_t Size); #endif - void setKernelLaunchProperties( - detail::KernelLaunchPropertyWrapper::KernelLaunchPropertiesT - &KernelLaunchProperties); + void + setKernelLaunchProperties(KernelLaunchPropertiesT &KernelLaunchProperties); bool isKernelAssociatedWithGraph() const; bool deviceSupportForwardProgress( sycl::ext::oneapi::experimental::forward_progress_guarantee guarantee, @@ -3609,8 +3605,16 @@ class __SYCL_EXPORT handler { template < ext::oneapi::experimental::detail::UnsupportedGraphFeatures FeatureT> void throwIfGraphAssociated() const { - detail::KernelLaunchPropertyWrapper::throwIfGraphAssociated( - getCommandGraph() != nullptr); + + if (getCommandGraph()) { + std::string FeatureString = + ext::oneapi::experimental::detail::UnsupportedFeatureToString( + FeatureT); + throw sycl::exception(sycl::make_error_code(errc::invalid), + "The " + FeatureString + + " feature is not yet available " + "for use with the SYCL Graph extension."); + } } #endif @@ -3685,7 +3689,7 @@ class __SYCL_EXPORT handler { void instantiateKernelOnHost(void *InstantiateKernelOnHostPtr); friend class detail::HandlerAccess; - friend struct detail::KernelLaunchPropertyWrapper; + friend class detail::KernelLaunchPropertyWrapper; #ifdef __INTEL_PREVIEW_BREAKING_CHANGES __SYCL_DLL_LOCAL detail::handler_impl *get_impl() { return impl; } diff --git a/sycl/source/detail/handler_impl.hpp b/sycl/source/detail/handler_impl.hpp index a5f3614a39697..2f89093b61661 100644 --- a/sycl/source/detail/handler_impl.hpp +++ b/sycl/source/detail/handler_impl.hpp @@ -29,7 +29,7 @@ enum class HandlerSubmissionState : std::uint8_t { SPEC_CONST_SET_STATE, }; -class handler_impl : public KernelLaunchPropertyWrapper { +class handler_impl { public: handler_impl(queue_impl &Queue, queue_impl *SubmissionSecondaryQueue, bool EventNeeded) @@ -105,7 +105,7 @@ class handler_impl : public KernelLaunchPropertyWrapper { // If the pipe operation is read or write, 1 for read 0 for write. bool HostPipeRead = true; - KernelLaunchPropertyWrapper::KernelLaunchPropertiesT KLProps{ + KernelLaunchPropertyWrapper::KernelLaunchPropertiesT KLProps{ UR_KERNEL_CACHE_CONFIG_DEFAULT, // MCacheConfig false, // MIsCooperative 0, // MWorkGroupMemorySize diff --git a/sycl/source/handler.cpp b/sycl/source/handler.cpp index 3b9f514454189..dc6f120bc600c 100644 --- a/sycl/source/handler.cpp +++ b/sycl/source/handler.cpp @@ -626,12 +626,11 @@ event handler::finalize() { impl->get_queue(), impl->MNDRDesc, impl->MArgs, KernelBundleImpPtr, MKernel.get(), toKernelNameStrT(MKernelName), impl->MKernelNameBasedCachePtr, RawEvents, ResultEvent.get(), - nullptr, *impl->KLProps.MCacheConfig, - *impl->KLProps.MIsCooperative, + nullptr, *impl->KLProps.MCacheConfig, *impl->KLProps.MIsCooperative, *impl->KLProps.MUsesClusterLaunch, - *impl->KLProps.MWorkGroupMemorySize, BinImage, - impl->MKernelFuncPtr, impl->MKernelNumArgs, - impl->MKernelParamDescGetter, impl->MKernelHasSpecialCaptures); + *impl->KLProps.MWorkGroupMemorySize, BinImage, impl->MKernelFuncPtr, + impl->MKernelNumArgs, impl->MKernelParamDescGetter, + impl->MKernelHasSpecialCaptures); #ifdef XPTI_ENABLE_INSTRUMENTATION if (xptiEnabled) { // Emit signal only when event is created @@ -691,8 +690,8 @@ event handler::finalize() { impl->MKernelNameBasedCachePtr, std::move(MStreamStorage), std::move(impl->MAuxiliaryResources), getType(), *impl->KLProps.MCacheConfig, *impl->KLProps.MIsCooperative, - *impl->KLProps.MUsesClusterLaunch, - *impl->KLProps.MWorkGroupMemorySize, MCodeLoc)); + *impl->KLProps.MUsesClusterLaunch, *impl->KLProps.MWorkGroupMemorySize, + MCodeLoc)); break; } case detail::CGType::CopyAccToPtr: @@ -2259,7 +2258,7 @@ void handler::setKernelLaunchProperties( KernelLaunchPropertyWrapper::KernelLaunchPropertiesT &KernelLaunchProperties) { - impl->KLProps = KernelLaunchProperties; + impl->KLProps.takeUnionOfProperties(KernelLaunchProperties); if (KernelLaunchProperties.MUsesClusterLaunch) { if (KernelLaunchProperties.MClusterDims == 1) { diff --git a/sycl/test/abi/layout_handler.cpp b/sycl/test/abi/layout_handler.cpp index f313066acc6f4..302d68f429241 100644 --- a/sycl/test/abi/layout_handler.cpp +++ b/sycl/test/abi/layout_handler.cpp @@ -14,6 +14,7 @@ void foo() { // The order of field declarations and their types are important. // CHECK: 0 | class sycl::handler +// CHECK-NEXT: 0 | class sycl::detail::KernelLaunchPropertyWrapper (base) (empty) // CHECK-NEXT: 0 | class std::shared_ptr impl // CHECK-NEXT: 0 | class std::__shared_ptr (base) // CHECK-NEXT: 0 | class std::__shared_ptr_access (base) (empty) From 0856936b45077f89c88d609b7eb9b53cd4cb0dac Mon Sep 17 00:00:00 2001 From: "Agarwal, Udit" Date: Mon, 28 Jul 2025 23:48:35 +0200 Subject: [PATCH 14/25] Remove getDeviceImpl --- sycl/include/sycl/handler.hpp | 2 -- sycl/source/handler.cpp | 2 -- 2 files changed, 4 deletions(-) diff --git a/sycl/include/sycl/handler.hpp b/sycl/include/sycl/handler.hpp index b893778ebd904..d368716cb0a13 100644 --- a/sycl/include/sycl/handler.hpp +++ b/sycl/include/sycl/handler.hpp @@ -1106,8 +1106,6 @@ class __SYCL_EXPORT handler // supported gcc version is bumped. std::tuple, bool> getMaxWorkGroups_v2(); - detail::device_impl &getDeviceImpl(); - template std::tuple, bool> getRoundedRange(range UserRange) { range RoundedRange = UserRange; diff --git a/sycl/source/handler.cpp b/sycl/source/handler.cpp index dc6f120bc600c..f475096b5b946 100644 --- a/sycl/source/handler.cpp +++ b/sycl/source/handler.cpp @@ -2293,8 +2293,6 @@ bool handler::deviceSupportForwardProgress( coordinationScope); } -device_impl &handler::getDeviceImpl() { return impl->get_device(); } - #ifndef __INTEL_PREVIEW_BREAKING_CHANGES void handler::setKernelCacheConfig(handler::StableKernelCacheConfig Config) { From 6361a2954112641bed8e630f0da4feb3bb3ff35c Mon Sep 17 00:00:00 2001 From: "Agarwal, Udit" Date: Tue, 29 Jul 2025 17:35:12 +0200 Subject: [PATCH 15/25] Add comment on parseProperties --- sycl/include/sycl/detail/kernel_launch_helper.hpp | 2 ++ 1 file changed, 2 insertions(+) diff --git a/sycl/include/sycl/detail/kernel_launch_helper.hpp b/sycl/include/sycl/detail/kernel_launch_helper.hpp index 48c48ddfca0b4..7fae8e9dad531 100644 --- a/sycl/include/sycl/detail/kernel_launch_helper.hpp +++ b/sycl/include/sycl/detail/kernel_launch_helper.hpp @@ -523,6 +523,8 @@ template class KernelLaunchPropertyWrapper { return processLaunchProperties(Props); } + // Returns KernelLaunchPropertiesT or std::nullopt based on whether the + // kernel functor has a get method that returns properties. template std::optional parseProperties([[maybe_unused]] const KernelType &KernelFunc) { From 1f9a31363c4fa63217c20528d5f90ddb2388eebc Mon Sep 17 00:00:00 2001 From: "Agarwal, Udit" Date: Tue, 29 Jul 2025 18:54:15 +0200 Subject: [PATCH 16/25] Update abi/sycl_symbols_linux.dump --- sycl/test/abi/sycl_symbols_linux.dump | 3 +++ 1 file changed, 3 insertions(+) diff --git a/sycl/test/abi/sycl_symbols_linux.dump b/sycl/test/abi/sycl_symbols_linux.dump index dd392cf315b88..8cd1cf0b3b342 100644 --- a/sycl/test/abi/sycl_symbols_linux.dump +++ b/sycl/test/abi/sycl_symbols_linux.dump @@ -3605,6 +3605,7 @@ _ZN4sycl3_V17handler24ext_oneapi_memset2d_implEPvmimm _ZN4sycl3_V17handler24registerDynamicParameterEPNS0_3ext6oneapi12experimental6detail22dynamic_parameter_implEi _ZN4sycl3_V17handler24registerDynamicParameterERNS0_3ext6oneapi12experimental6detail22dynamic_parameter_baseEi _ZN4sycl3_V17handler25ext_intel_write_host_pipeENS0_6detail11string_viewEPvmb +_ZN4sycl3_V17handler25setKernelLaunchPropertiesERNS0_6detail27KernelLaunchPropertyWrapperIS1_E23KernelLaunchPropertiesTE _ZN4sycl3_V17handler26associateWithHandlerCommonESt10shared_ptrINS0_6detail16AccessorImplHostEEi _ZN4sycl3_V17handler26setKernelNameBasedCachePtrEPNS0_6detail21KernelNameBasedCacheTE _ZN4sycl3_V17handler26setNDRangeDescriptorPaddedENS0_5rangeILi3EEENS0_2idILi3EEEi @@ -4111,7 +4112,9 @@ _ZNK4sycl3_V17handler15getKernelBundleEv _ZNK4sycl3_V17handler16getDeviceBackendEv _ZNK4sycl3_V17handler17getContextImplPtrEv _ZNK4sycl3_V17handler21HasAssociatedAccessorEPNS0_6detail16AccessorImplHostENS0_6access6targetE +_ZNK4sycl3_V17handler27isKernelAssociatedWithGraphEv _ZNK4sycl3_V17handler27isStateExplicitKernelBundleEv +_ZNK4sycl3_V17handler28deviceSupportForwardProgressENS0_3ext6oneapi12experimental26forward_progress_guaranteeENS4_15execution_scopeES6_ _ZNK4sycl3_V17handler30getOrInsertHandlerKernelBundleEb _ZNK4sycl3_V17handler33getOrInsertHandlerKernelBundlePtrEb _ZNK4sycl3_V17handler7getTypeEv From 297172a0c0150d4528ad8fbd35e75f410dc4d6d1 Mon Sep 17 00:00:00 2001 From: "Agarwal, Udit" Date: Wed, 30 Jul 2025 01:27:19 +0200 Subject: [PATCH 17/25] NFC changes based on feedback from code review --- .../sycl/detail/kernel_launch_helper.hpp | 82 ++++++++----------- sycl/include/sycl/handler.hpp | 6 +- sycl/source/handler.cpp | 3 +- 3 files changed, 41 insertions(+), 50 deletions(-) diff --git a/sycl/include/sycl/detail/kernel_launch_helper.hpp b/sycl/include/sycl/detail/kernel_launch_helper.hpp index 7fae8e9dad531..bd5e59b5e20aa 100644 --- a/sycl/include/sycl/detail/kernel_launch_helper.hpp +++ b/sycl/include/sycl/detail/kernel_launch_helper.hpp @@ -255,7 +255,7 @@ struct KernelWrapper< }; // KernelWrapper struct // This struct is inherited by sycl::handler. -template class KernelLaunchPropertyWrapper { +template class KernelLaunchPropertyWrapper { public: // This struct is used to store kernel launch properties. // std::optional is used to indicate that the property is not set. @@ -263,17 +263,14 @@ template class KernelLaunchPropertyWrapper { // for the same kernel, that is why using std::optional to avoid overriding // previously set properties. struct KernelLaunchPropertiesT { - std::optional MCacheConfig; - std::optional MIsCooperative; - std::optional MWorkGroupMemorySize; - std::optional MUsesClusterLaunch; - size_t MClusterDims; - std::array MClusterSize; - - KernelLaunchPropertiesT() - : MCacheConfig(std::nullopt), MIsCooperative(std::nullopt), - MWorkGroupMemorySize(std::nullopt), MUsesClusterLaunch(std::nullopt), - MClusterDims(0), MClusterSize{0, 0, 0} {} + std::optional MCacheConfig = std::nullopt; + std::optional MIsCooperative = std::nullopt; + std::optional MWorkGroupMemorySize = std::nullopt; + std::optional MUsesClusterLaunch = std::nullopt; + size_t MClusterDims = 0; + std::array MClusterSize = {0, 0, 0}; + + KernelLaunchPropertiesT() = default; KernelLaunchPropertiesT(ur_kernel_cache_config_t _CacheConfig, bool _IsCooperative, uint32_t _WorkGroupMemorySize, @@ -284,7 +281,7 @@ template class KernelLaunchPropertyWrapper { MUsesClusterLaunch(_UsesClusterLaunch), MClusterDims(_ClusterDims), MClusterSize(_ClusterSize) {} - void takeUnionOfProperties(const KernelLaunchPropertiesT &Other) { + void copyInitializedFrom(const KernelLaunchPropertiesT &Other) { if (Other.MCacheConfig) MCacheConfig = Other.MCacheConfig; @@ -322,7 +319,7 @@ template class KernelLaunchPropertyWrapper { using forward_progress = sycl::ext::oneapi::experimental::forward_progress_guarantee; - PropertyProcessorT *pp = static_cast(this); + Self *pp = static_cast(this); bool supported = pp->deviceSupportForwardProgress(guarantee, threadScope, coordinationScope); @@ -372,11 +369,11 @@ template class KernelLaunchPropertyWrapper { /// Stores information about kernel properties into the handler. template KernelLaunchPropertiesT processLaunchProperties(PropertiesT Props) { + using namespace sycl::ext::oneapi::experimental; + using namespace sycl::ext::oneapi::experimental::detail; - namespace syclex = sycl::ext::oneapi::experimental; KernelLaunchPropertiesT retval; - - PropertyProcessorT *pp = static_cast(this); + Self *pp = static_cast(this); bool HasGraph = pp->isKernelAssociatedWithGraph(); // Process Kernel cache configuration property. @@ -398,7 +395,7 @@ template class KernelLaunchPropertyWrapper { // Process Kernel cooperative property. { constexpr bool UsesRootSync = - PropertiesT::template has_property(); + PropertiesT::template has_property(); if (UsesRootSync) retval.MIsCooperative = UsesRootSync; } @@ -406,27 +403,24 @@ template class KernelLaunchPropertyWrapper { // Process device progress properties. { if constexpr (PropertiesT::template has_property< - syclex::work_group_progress_key>()) { - auto prop = - Props.template get_property(); + work_group_progress_key>()) { + auto prop = Props.template get_property(); verifyDeviceHasProgressGuarantee(prop.guarantee, - syclex::execution_scope::work_group, + execution_scope::work_group, prop.coordinationScope, retval); } if constexpr (PropertiesT::template has_property< - syclex::sub_group_progress_key>()) { - auto prop = - Props.template get_property(); + sub_group_progress_key>()) { + auto prop = Props.template get_property(); verifyDeviceHasProgressGuarantee(prop.guarantee, - syclex::execution_scope::sub_group, + execution_scope::sub_group, prop.coordinationScope, retval); } if constexpr (PropertiesT::template has_property< - syclex::work_item_progress_key>()) { - auto prop = - Props.template get_property(); + work_item_progress_key>()) { + auto prop = Props.template get_property(); verifyDeviceHasProgressGuarantee(prop.guarantee, - syclex::execution_scope::work_item, + execution_scope::work_item, prop.coordinationScope, retval); } } @@ -434,12 +428,11 @@ template class KernelLaunchPropertyWrapper { // Process work group scratch memory property. { if constexpr (PropertiesT::template has_property< - syclex::work_group_scratch_size>()) { + work_group_scratch_size>()) { if (HasGraph) { - std::string FeatureString = - ext::oneapi::experimental::detail::UnsupportedFeatureToString( - syclex::detail::UnsupportedGraphFeatures:: - sycl_ext_oneapi_work_group_scratch_memory); + std::string FeatureString = UnsupportedFeatureToString( + UnsupportedGraphFeatures:: + sycl_ext_oneapi_work_group_scratch_memory); throw sycl::exception(sycl::make_error_code(errc::invalid), "The " + FeatureString + " feature is not yet available " @@ -447,31 +440,28 @@ template class KernelLaunchPropertyWrapper { } auto WorkGroupMemSize = - Props.template get_property(); + Props.template get_property(); retval.MWorkGroupMemorySize = WorkGroupMemSize.size; } } // Parse cluster properties. { - constexpr std::size_t ClusterDim = - syclex::detail::getClusterDim(); + constexpr std::size_t ClusterDim = getClusterDim(); if constexpr (ClusterDim > 0) { if (HasGraph) { - std::string FeatureString = - ext::oneapi::experimental::detail::UnsupportedFeatureToString( - syclex::detail::UnsupportedGraphFeatures:: - sycl_ext_oneapi_experimental_cuda_cluster_launch); + std::string FeatureString = UnsupportedFeatureToString( + UnsupportedGraphFeatures:: + sycl_ext_oneapi_experimental_cuda_cluster_launch); throw sycl::exception(sycl::make_error_code(errc::invalid), "The " + FeatureString + " feature is not yet available " "for use with the SYCL Graph extension."); } - auto ClusterSize = Props - .template get_property< - syclex::cuda::cluster_size_key>() - .get_cluster_size(); + auto ClusterSize = + Props.template get_property>() + .get_cluster_size(); retval.MUsesClusterLaunch = true; retval.MClusterDims = ClusterDim; if (ClusterDim == 1) { diff --git a/sycl/include/sycl/handler.hpp b/sycl/include/sycl/handler.hpp index d368716cb0a13..00c4bc111c3b2 100644 --- a/sycl/include/sycl/handler.hpp +++ b/sycl/include/sycl/handler.hpp @@ -419,9 +419,11 @@ template bool range_size_fits_in_size_t(const range &r) { class __SYCL_EXPORT handler : public detail::KernelLaunchPropertyWrapper { - static_assert(sizeof(detail::KernelLaunchPropertyWrapper) == 1, +#ifndef __INTEL_PREVIEW_BREAKING_CHANGES + static_assert(std::is_empty_v>, "KernelLaunchPropertyWrapper should not have any data members " - "to avoid increasing the size of handler."); + "to avoid ABI break."); +#endif // __INTEL_PREVIEW_BREAKING_CHANGES private: #ifdef __INTEL_PREVIEW_BREAKING_CHANGES diff --git a/sycl/source/handler.cpp b/sycl/source/handler.cpp index f475096b5b946..aa1fce3b4292f 100644 --- a/sycl/source/handler.cpp +++ b/sycl/source/handler.cpp @@ -2258,7 +2258,7 @@ void handler::setKernelLaunchProperties( KernelLaunchPropertyWrapper::KernelLaunchPropertiesT &KernelLaunchProperties) { - impl->KLProps.takeUnionOfProperties(KernelLaunchProperties); + impl->KLProps.copyInitializedFrom(KernelLaunchProperties); if (KernelLaunchProperties.MUsesClusterLaunch) { if (KernelLaunchProperties.MClusterDims == 1) { @@ -2636,7 +2636,6 @@ void handler::copyCodeLoc(const handler &other) { queue handler::getQueue() { return createSyclObjFromImpl(impl->get_queue()); } - namespace detail { __SYCL_EXPORT void HandlerAccess::preProcess(handler &CGH, type_erased_cgfo_ty F) { From 60db900008060b45cd52625cdeaff7982020278c Mon Sep 17 00:00:00 2001 From: "Agarwal, Udit" Date: Wed, 30 Jul 2025 01:32:52 +0200 Subject: [PATCH 18/25] Updated Windows ABI symbol dump --- sycl/test/abi/sycl_symbols_windows.dump | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/sycl/test/abi/sycl_symbols_windows.dump b/sycl/test/abi/sycl_symbols_windows.dump index 748d74482a1a5..21da9862af6ec 100644 --- a/sycl/test/abi/sycl_symbols_windows.dump +++ b/sycl/test/abi/sycl_symbols_windows.dump @@ -229,9 +229,9 @@ ??$get_info_impl@Unative_vector_width_int@device@info@_V1@sycl@@@device@_V1@sycl@@AEBAIXZ ??$get_info_impl@Unative_vector_width_long@device@info@_V1@sycl@@@device@_V1@sycl@@AEBAIXZ ??$get_info_impl@Unative_vector_width_short@device@info@_V1@sycl@@@device@_V1@sycl@@AEBAIXZ +??$get_info_impl@Unode_mask@device@info@intel@ext@_V1@sycl@@@device@_V1@sycl@@AEBAIXZ ??$get_info_impl@Unum_args@kernel@info@_V1@sycl@@@kernel@_V1@sycl@@AEBAIXZ ??$get_info_impl@Unum_compute_units@device@info@oneapi@ext@_V1@sycl@@@device@_V1@sycl@@AEBA_KXZ -??$get_info_impl@Unode_mask@device@info@intel@ext@_V1@sycl@@@device@_V1@sycl@@AEBAIXZ ??$get_info_impl@Uopencl_c_version@device@info@_V1@sycl@@@device@_V1@sycl@@AEBA?AVstring@detail@12@XZ ??$get_info_impl@Uparent_device@device@info@_V1@sycl@@@device@_V1@sycl@@AEBA?AV012@XZ ??$get_info_impl@Upartition_affinity_domains@device@info@_V1@sycl@@@device@_V1@sycl@@AEBA?AV?$vector@W4partition_affinity_domain@info@_V1@sycl@@V?$allocator@W4partition_affinity_domain@info@_V1@sycl@@@std@@@std@@XZ @@ -549,6 +549,8 @@ ??1sampler@_V1@sycl@@QEAA@XZ ??1stream@_V1@sycl@@QEAA@XZ ??1tls_code_loc_t@detail@_V1@sycl@@QEAA@XZ +??4?$KernelLaunchPropertyWrapper@Vhandler@_V1@sycl@@@detail@_V1@sycl@@QEAAAEAV0123@$$QEAV0123@@Z +??4?$KernelLaunchPropertyWrapper@Vhandler@_V1@sycl@@@detail@_V1@sycl@@QEAAAEAV0123@AEBV0123@@Z ??4?$OwnerLessBase@Vcontext@_V1@sycl@@@detail@_V1@sycl@@QEAAAEAV0123@$$QEAV0123@@Z ??4?$OwnerLessBase@Vcontext@_V1@sycl@@@detail@_V1@sycl@@QEAAAEAV0123@AEBV0123@@Z ??4?$OwnerLessBase@Vdevice@_V1@sycl@@@detail@_V1@sycl@@QEAAAEAV0123@$$QEAV0123@@Z @@ -3861,6 +3863,7 @@ ?destroy_image_handle@experimental@oneapi@ext@_V1@sycl@@YAXAEAUsampled_image_handle@12345@AEBVqueue@45@@Z ?destroy_image_handle@experimental@oneapi@ext@_V1@sycl@@YAXAEAUunsampled_image_handle@12345@AEBVdevice@45@AEBVcontext@45@@Z ?destroy_image_handle@experimental@oneapi@ext@_V1@sycl@@YAXAEAUunsampled_image_handle@12345@AEBVqueue@45@@Z +?deviceSupportForwardProgress@handler@_V1@sycl@@AEBA_NW4forward_progress_guarantee@experimental@oneapi@ext@23@W4execution_scope@56723@1@Z ?device_has@queue@_V1@sycl@@AEBA_NW4aspect@23@@Z ?empty@kernel_bundle_plain@detail@_V1@sycl@@QEBA_NXZ ?enable_ext_oneapi_default_context@detail@_V1@sycl@@YAX_N@Z @@ -4187,8 +4190,8 @@ ?get_impl@handler@_V1@sycl@@AEAAPEAVhandler_impl@detail@23@XZ ?get_kernel@kernel_bundle_plain@detail@_V1@sycl@@IEBA?AVkernel@34@AEBVkernel_id@34@@Z ?get_kernel_bundle@kernel@_V1@sycl@@QEBA?AV?$kernel_bundle@$01@23@XZ -?get_kernel_bundle_impl@detail@_V1@sycl@@YA?AV?$shared_ptr@Vkernel_bundle_impl@detail@_V1@sycl@@@std@@AEBVcontext@23@AEBV?$vector@Vdevice@_V1@sycl@@V?$allocator@Vdevice@_V1@sycl@@@std@@@5@AEBV?$vector@Vkernel_id@_V1@sycl@@V?$allocator@Vkernel_id@_V1@sycl@@@std@@@5@W4bundle_state@23@@Z ?get_kernel_bundle_impl@detail@_V1@sycl@@YA?AV?$shared_ptr@Vkernel_bundle_impl@detail@_V1@sycl@@@std@@AEBVcontext@23@AEBV?$vector@Vdevice@_V1@sycl@@V?$allocator@Vdevice@_V1@sycl@@@std@@@5@AEBV?$span@D$0?0@23@W4bundle_state@23@@Z +?get_kernel_bundle_impl@detail@_V1@sycl@@YA?AV?$shared_ptr@Vkernel_bundle_impl@detail@_V1@sycl@@@std@@AEBVcontext@23@AEBV?$vector@Vdevice@_V1@sycl@@V?$allocator@Vdevice@_V1@sycl@@@std@@@5@AEBV?$vector@Vkernel_id@_V1@sycl@@V?$allocator@Vkernel_id@_V1@sycl@@@std@@@5@W4bundle_state@23@@Z ?get_kernel_bundle_impl@detail@_V1@sycl@@YA?AV?$shared_ptr@Vkernel_bundle_impl@detail@_V1@sycl@@@std@@AEBVcontext@23@AEBV?$vector@Vdevice@_V1@sycl@@V?$allocator@Vdevice@_V1@sycl@@@std@@@5@W4bundle_state@23@@Z ?get_kernel_bundle_impl@detail@_V1@sycl@@YA?AV?$shared_ptr@Vkernel_bundle_impl@detail@_V1@sycl@@@std@@AEBVcontext@23@AEBV?$vector@Vdevice@_V1@sycl@@V?$allocator@Vdevice@_V1@sycl@@@std@@@5@W4bundle_state@23@AEBV?$function@$$A6A_NAEBV?$shared_ptr@Vdevice_image_impl@detail@_V1@sycl@@@std@@@Z@5@@Z ?get_kernel_id_impl@detail@_V1@sycl@@YA?AVkernel_id@23@Vstring_view@123@@Z @@ -4260,6 +4263,7 @@ ?isConstOrGlobal@handler@_V1@sycl@@CA_NW4target@access@23@@Z ?isDeviceGlobalUsedInKernel@detail@_V1@sycl@@YA_NPEBX@Z ?isImageOrImageArray@handler@_V1@sycl@@CA_NW4target@access@23@@Z +?isKernelAssociatedWithGraph@handler@_V1@sycl@@AEBA_NXZ ?isMemoryObjectUsedByGraph@AccessorBaseHost@detail@_V1@sycl@@QEBA_NXZ ?isOutOfRange@detail@_V1@sycl@@YA_NV?$vec@H$03@23@W4addressing_mode@23@V?$range@$02@23@@Z ?isPathPresent@OSUtil@detail@_V1@sycl@@SA_NAEBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@Z @@ -4415,6 +4419,7 @@ ?setKernelClusterLaunch@handler@_V1@sycl@@AEAAXV?$range@$02@23@H@Z ?setKernelInfo@handler@_V1@sycl@@AEAAXPEAXHP6A?AUkernel_param_desc_t@detail@23@H@Z_N2@Z ?setKernelIsCooperative@handler@_V1@sycl@@AEAAX_N@Z +?setKernelLaunchProperties@handler@_V1@sycl@@AEAAXAEAUKernelLaunchPropertiesT@?$KernelLaunchPropertyWrapper@Vhandler@_V1@sycl@@@detail@23@@Z ?setKernelNameBasedCachePtr@handler@_V1@sycl@@AEAAXPEAUKernelNameBasedCacheT@detail@23@@Z ?setKernelWorkGroupMem@handler@_V1@sycl@@AEAAX_K@Z ?setLocalAccessorArgHelper@handler@_V1@sycl@@AEAAXHAEAVLocalAccessorBaseHost@detail@23@@Z @@ -4427,8 +4432,8 @@ ?setNDRangeDescriptor@handler@_V1@sycl@@AEAAXV?$range@$02@23@0V?$id@$02@23@@Z ?setNDRangeDescriptor@handler@_V1@sycl@@AEAAXV?$range@$02@23@V?$id@$02@23@@Z ?setNDRangeDescriptor@handler@_V1@sycl@@AEAAXV?$range@$02@23@_N@Z -?setNDRangeDescriptorPadded@handler@_V1@sycl@@AEAAXV?$range@$02@23@V?$id@$02@23@H@Z ?setNDRangeDescriptorPadded@handler@_V1@sycl@@AEAAXV?$range@$02@23@0V?$id@$02@23@H@Z +?setNDRangeDescriptorPadded@handler@_V1@sycl@@AEAAXV?$range@$02@23@V?$id@$02@23@H@Z ?setNDRangeDescriptorPadded@handler@_V1@sycl@@AEAAXV?$range@$02@23@_NH@Z ?setNDRangeUsed@handler@_V1@sycl@@AEAAX_N@Z ?setStateExplicitKernelBundle@handler@_V1@sycl@@AEAAXXZ @@ -4502,6 +4507,7 @@ ?updateValue@dynamic_parameter_base@detail@experimental@oneapi@ext@_V1@sycl@@IEAAXPEBX_K@Z ?updateWorkGroupMem@dynamic_work_group_memory_base@detail@experimental@oneapi@ext@_V1@sycl@@IEAAX_K@Z ?use_kernel_bundle@handler@_V1@sycl@@QEAAXAEBV?$kernel_bundle@$01@23@@Z +?verifyDeviceHasProgressGuarantee@?$KernelLaunchPropertyWrapper@Vhandler@_V1@sycl@@@detail@_V1@sycl@@AEAAXW4forward_progress_guarantee@experimental@oneapi@ext@34@W4execution_scope@67834@1AEAUKernelLaunchPropertiesT@1234@@Z ?verifyDeviceHasProgressGuarantee@handler@_V1@sycl@@AEAAXW4forward_progress_guarantee@experimental@oneapi@ext@23@W4execution_scope@56723@1@Z ?verifyReductionProps@detail@_V1@sycl@@YAXAEBVproperty_list@23@@Z ?verifyUSMAllocatorProperties@_V1@sycl@@YAXAEBVproperty_list@12@@Z From 9e06813d3630566d2ff973c203c3a9bf0ea2c3b8 Mon Sep 17 00:00:00 2001 From: Udit Kumar Agarwal Date: Tue, 29 Jul 2025 16:35:21 -0700 Subject: [PATCH 19/25] Apply suggestions from code review Co-authored-by: Andrei Elovikov --- sycl/include/sycl/detail/kernel_launch_helper.hpp | 1 - 1 file changed, 1 deletion(-) diff --git a/sycl/include/sycl/detail/kernel_launch_helper.hpp b/sycl/include/sycl/detail/kernel_launch_helper.hpp index bd5e59b5e20aa..a5c58b88a1bb4 100644 --- a/sycl/include/sycl/detail/kernel_launch_helper.hpp +++ b/sycl/include/sycl/detail/kernel_launch_helper.hpp @@ -371,7 +371,6 @@ template class KernelLaunchPropertyWrapper { KernelLaunchPropertiesT processLaunchProperties(PropertiesT Props) { using namespace sycl::ext::oneapi::experimental; using namespace sycl::ext::oneapi::experimental::detail; - KernelLaunchPropertiesT retval; Self *pp = static_cast(this); bool HasGraph = pp->isKernelAssociatedWithGraph(); From 7ffacc7c7948e82a1f598fbb93acb9b88a240b3d Mon Sep 17 00:00:00 2001 From: "Agarwal, Udit" Date: Wed, 30 Jul 2025 02:00:52 +0200 Subject: [PATCH 20/25] Simply check for use_root_sync_key property --- sycl/include/sycl/detail/kernel_launch_helper.hpp | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/sycl/include/sycl/detail/kernel_launch_helper.hpp b/sycl/include/sycl/detail/kernel_launch_helper.hpp index bd5e59b5e20aa..d4ce0b078d52b 100644 --- a/sycl/include/sycl/detail/kernel_launch_helper.hpp +++ b/sycl/include/sycl/detail/kernel_launch_helper.hpp @@ -394,10 +394,8 @@ template class KernelLaunchPropertyWrapper { // Process Kernel cooperative property. { - constexpr bool UsesRootSync = - PropertiesT::template has_property(); - if (UsesRootSync) - retval.MIsCooperative = UsesRootSync; + if constexpr (PropertiesT::template has_property()) + retval.MIsCooperative = true; } // Process device progress properties. From 8bc01473ccc4d7dc881cf53d1d7eb874562be9f6 Mon Sep 17 00:00:00 2001 From: "Agarwal, Udit" Date: Wed, 30 Jul 2025 22:17:49 +0200 Subject: [PATCH 21/25] Rename processProperties to processKernelProperties, and remove redudant copy of StableKernelCacheConfig --- .../sycl/detail/kernel_launch_helper.hpp | 17 ++++------------ sycl/include/sycl/handler.hpp | 20 +++++++++---------- 2 files changed, 13 insertions(+), 24 deletions(-) diff --git a/sycl/include/sycl/detail/kernel_launch_helper.hpp b/sycl/include/sycl/detail/kernel_launch_helper.hpp index a310b953a602e..8594cbaebda6d 100644 --- a/sycl/include/sycl/detail/kernel_launch_helper.hpp +++ b/sycl/include/sycl/detail/kernel_launch_helper.hpp @@ -300,15 +300,6 @@ template class KernelLaunchPropertyWrapper { }; // struct KernelLaunchPropertiesT private: -#ifndef __INTEL_PREVIEW_BREAKING_CHANGES - // Modeled after ur_kernel_cache_config_t - enum class StableKernelCacheConfig : int32_t { - Default = 0, - LargeSLM = 1, - LargeData = 2 - }; -#endif // __INTEL_PREVIEW_BREAKING_CHANGES - void verifyDeviceHasProgressGuarantee( sycl::ext::oneapi::experimental::forward_progress_guarantee guarantee, sycl::ext::oneapi::experimental::execution_scope threadScope, @@ -368,7 +359,7 @@ template class KernelLaunchPropertyWrapper { /// /// Stores information about kernel properties into the handler. template - KernelLaunchPropertiesT processLaunchProperties(PropertiesT Props) { + KernelLaunchPropertiesT processKernelLaunchProperties(PropertiesT Props) { using namespace sycl::ext::oneapi::experimental; using namespace sycl::ext::oneapi::experimental::detail; KernelLaunchPropertiesT retval; @@ -491,7 +482,7 @@ template class KernelLaunchPropertyWrapper { template < bool IsESIMDKernel, typename PropertiesT = ext::oneapi::experimental::empty_properties_t> - KernelLaunchPropertiesT processProperties(PropertiesT Props) { + KernelLaunchPropertiesT processKernelProperties(PropertiesT Props) { static_assert( ext::oneapi::experimental::is_property_list::value, "Template type is not a property list."); @@ -507,7 +498,7 @@ template class KernelLaunchPropertyWrapper { sycl::ext::oneapi::experimental::indirectly_callable_key>(), "indirectly_callable property cannot be applied to SYCL kernels"); - return processLaunchProperties(Props); + return processKernelLaunchProperties(Props); } // Returns KernelLaunchPropertiesT or std::nullopt based on whether the @@ -520,7 +511,7 @@ template class KernelLaunchPropertyWrapper { if constexpr (ext::oneapi::experimental::detail:: HasKernelPropertiesGetMethod::value) { - return processProperties()>( + return processKernelProperties()>( KernelFunc.get(ext::oneapi::experimental::properties_tag{})); } #endif diff --git a/sycl/include/sycl/handler.hpp b/sycl/include/sycl/handler.hpp index 00c4bc111c3b2..80c0091a6a2de 100644 --- a/sycl/include/sycl/handler.hpp +++ b/sycl/include/sycl/handler.hpp @@ -997,7 +997,7 @@ class __SYCL_EXPORT handler sycl::ext::oneapi::experimental::indirectly_callable_key>(), "indirectly_callable property cannot be applied to SYCL kernels"); - processLaunchProperties(Props); + processKernelLaunchProperties(Props); } #endif // INTEL_PREVIEW_BREAKING_CHANGES @@ -1343,8 +1343,8 @@ class __SYCL_EXPORT handler verifyUsedKernelBundleInternal(Name); KernelLaunchPropertiesT ProcessedProps = - KernelLaunchPropertyWrapper::processProperties< - detail::isKernelESIMD(), PropertiesT>(Props); + processKernelProperties(), PropertiesT>( + Props); setKernelLaunchProperties(ProcessedProps); detail::checkValueRange(UserRange); setNDRangeDescriptor(std::move(UserRange)); @@ -1376,8 +1376,7 @@ class __SYCL_EXPORT handler detail::checkValueRange(NumWorkItems); setNDRangeDescriptor(std::move(NumWorkItems)); KernelLaunchPropertiesT ParsedProp = - KernelLaunchPropertyWrapper::processLaunchProperties( - Props); + processKernelLaunchProperties(Props); setKernelLaunchProperties(ParsedProp); setType(detail::CGType::Kernel); extractArgsAndReqs(); @@ -1404,8 +1403,7 @@ class __SYCL_EXPORT handler detail::checkValueRange(NDRange); setNDRangeDescriptor(std::move(NDRange)); KernelLaunchPropertiesT ParsedProp = - KernelLaunchPropertyWrapper::processLaunchProperties( - Props); + processKernelLaunchProperties(Props); setKernelLaunchProperties(ParsedProp); setType(detail::CGType::Kernel); extractArgsAndReqs(); @@ -1449,8 +1447,8 @@ class __SYCL_EXPORT handler StoreLambda(std::move(KernelFunc)); KernelLaunchPropertiesT ProcessedProps = - KernelLaunchPropertyWrapper::processProperties< - detail::isKernelESIMD(), PropertiesT>(Props); + processKernelProperties(), PropertiesT>( + Props); setKernelLaunchProperties(ProcessedProps); #endif } @@ -1505,8 +1503,8 @@ class __SYCL_EXPORT handler StoreLambda(std::move(KernelFunc)); } KernelLaunchPropertiesT ProcessedProps = - KernelLaunchPropertyWrapper::processProperties< - detail::isKernelESIMD(), PropertiesT>(Props); + processKernelProperties(), PropertiesT>( + Props); setKernelLaunchProperties(ProcessedProps); #endif } From 39ee8ac04e55a6532e2520b5c9e7bfe7924f2caa Mon Sep 17 00:00:00 2001 From: "Agarwal, Udit" Date: Thu, 31 Jul 2025 00:33:16 +0200 Subject: [PATCH 22/25] Make handler::processLaunchProperties call KernelLaunchPropertyWrapper:: func --- sycl/include/sycl/handler.hpp | 83 ++++------------------------------- 1 file changed, 8 insertions(+), 75 deletions(-) diff --git a/sycl/include/sycl/handler.hpp b/sycl/include/sycl/handler.hpp index 80c0091a6a2de..3b94ea6dfbfca 100644 --- a/sycl/include/sycl/handler.hpp +++ b/sycl/include/sycl/handler.hpp @@ -910,64 +910,9 @@ class __SYCL_EXPORT handler /// Stores information about kernel properties into the handler. template void processLaunchProperties(PropertiesT Props) { - if constexpr (PropertiesT::template has_property< - sycl::ext::intel::experimental::cache_config_key>()) { - auto Config = Props.template get_property< - sycl::ext::intel::experimental::cache_config_key>(); - if (Config == sycl::ext::intel::experimental::large_slm) { - setKernelCacheConfig(StableKernelCacheConfig::LargeSLM); - } else if (Config == sycl::ext::intel::experimental::large_data) { - setKernelCacheConfig(StableKernelCacheConfig::LargeData); - } - } else { - std::ignore = Props; - } - - constexpr bool UsesRootSync = PropertiesT::template has_property< - sycl::ext::oneapi::experimental::use_root_sync_key>(); - if (UsesRootSync) { - setKernelIsCooperative(UsesRootSync); - } - if constexpr (PropertiesT::template has_property< - sycl::ext::oneapi::experimental:: - work_group_progress_key>()) { - auto prop = Props.template get_property< - sycl::ext::oneapi::experimental::work_group_progress_key>(); - verifyDeviceHasProgressGuarantee( - prop.guarantee, - sycl::ext::oneapi::experimental::execution_scope::work_group, - prop.coordinationScope); - } - if constexpr (PropertiesT::template has_property< - sycl::ext::oneapi::experimental:: - sub_group_progress_key>()) { - auto prop = Props.template get_property< - sycl::ext::oneapi::experimental::sub_group_progress_key>(); - verifyDeviceHasProgressGuarantee( - prop.guarantee, - sycl::ext::oneapi::experimental::execution_scope::sub_group, - prop.coordinationScope); - } - if constexpr (PropertiesT::template has_property< - sycl::ext::oneapi::experimental:: - work_item_progress_key>()) { - auto prop = Props.template get_property< - sycl::ext::oneapi::experimental::work_item_progress_key>(); - verifyDeviceHasProgressGuarantee( - prop.guarantee, - sycl::ext::oneapi::experimental::execution_scope::work_item, - prop.coordinationScope); - } - - if constexpr (PropertiesT::template has_property< - sycl::ext::oneapi::experimental:: - work_group_scratch_size>()) { - auto WorkGroupMemSize = Props.template get_property< - sycl::ext::oneapi::experimental::work_group_scratch_size>(); - setKernelWorkGroupMem(WorkGroupMemSize.size); - } - - checkAndSetClusterRange(Props); + KernelLaunchPropertiesT ParsedProp = + processKernelLaunchProperties(Props); + setKernelLaunchProperties(ParsedProp); } /// Process kernel properties. @@ -982,22 +927,9 @@ class __SYCL_EXPORT handler bool IsESIMDKernel, typename PropertiesT = ext::oneapi::experimental::empty_properties_t> void processProperties(PropertiesT Props) { - static_assert( - ext::oneapi::experimental::is_property_list::value, - "Template type is not a property list."); - static_assert( - !PropertiesT::template has_property< - sycl::ext::intel::experimental::fp_control_key>() || - (PropertiesT::template has_property< - sycl::ext::intel::experimental::fp_control_key>() && - IsESIMDKernel), - "Floating point control property is supported for ESIMD kernels only."); - static_assert( - !PropertiesT::template has_property< - sycl::ext::oneapi::experimental::indirectly_callable_key>(), - "indirectly_callable property cannot be applied to SYCL kernels"); - - processKernelLaunchProperties(Props); + KernelLaunchPropertiesT ParsedProp = + processKernelProperties(Props); + setKernelLaunchProperties(ParsedProp); } #endif // INTEL_PREVIEW_BREAKING_CHANGES @@ -3512,7 +3444,8 @@ class __SYCL_EXPORT handler size_t Offset); #ifndef __INTEL_PREVIEW_BREAKING_CHANGES - // Changing values in this will break ABI/API. + // Used by sycl::handler::setKernelCacheConfig. + // Modeled after ur_kernel_cache_config_t enum class StableKernelCacheConfig : int32_t { Default = 0, LargeSLM = 1, From 07884d693b653c852857b05a6e96c030d6c5c5d8 Mon Sep 17 00:00:00 2001 From: "Agarwal, Udit" Date: Thu, 31 Jul 2025 00:42:15 +0200 Subject: [PATCH 23/25] Update comment on setKernelCacheConfig --- sycl/include/sycl/handler.hpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sycl/include/sycl/handler.hpp b/sycl/include/sycl/handler.hpp index 3b94ea6dfbfca..2d23d5b545b92 100644 --- a/sycl/include/sycl/handler.hpp +++ b/sycl/include/sycl/handler.hpp @@ -3444,8 +3444,8 @@ class __SYCL_EXPORT handler size_t Offset); #ifndef __INTEL_PREVIEW_BREAKING_CHANGES - // Used by sycl::handler::setKernelCacheConfig. // Modeled after ur_kernel_cache_config_t + // Used as an argument to setKernelCacheConfig that's part of the ABI. enum class StableKernelCacheConfig : int32_t { Default = 0, LargeSLM = 1, From d39d4bbb0179358da00ed807dbb81dada107446d Mon Sep 17 00:00:00 2001 From: Udit Kumar Agarwal Date: Thu, 31 Jul 2025 07:31:21 -0700 Subject: [PATCH 24/25] Apply suggestions from code review Co-authored-by: Andrei Elovikov --- sycl/include/sycl/handler.hpp | 5 ++--- sycl/source/handler.cpp | 1 - 2 files changed, 2 insertions(+), 4 deletions(-) diff --git a/sycl/include/sycl/handler.hpp b/sycl/include/sycl/handler.hpp index 2d23d5b545b92..5fc487dc2e4bb 100644 --- a/sycl/include/sycl/handler.hpp +++ b/sycl/include/sycl/handler.hpp @@ -419,11 +419,10 @@ template bool range_size_fits_in_size_t(const range &r) { class __SYCL_EXPORT handler : public detail::KernelLaunchPropertyWrapper { -#ifndef __INTEL_PREVIEW_BREAKING_CHANGES static_assert(std::is_empty_v>, "KernelLaunchPropertyWrapper should not have any data members " - "to avoid ABI break."); -#endif // __INTEL_PREVIEW_BREAKING_CHANGES + "to avoid ABI effects."); + private: #ifdef __INTEL_PREVIEW_BREAKING_CHANGES diff --git a/sycl/source/handler.cpp b/sycl/source/handler.cpp index aa1fce3b4292f..049707f3a2ae3 100644 --- a/sycl/source/handler.cpp +++ b/sycl/source/handler.cpp @@ -2257,7 +2257,6 @@ detail::context_impl &handler::getContextImpl() const { void handler::setKernelLaunchProperties( KernelLaunchPropertyWrapper::KernelLaunchPropertiesT &KernelLaunchProperties) { - impl->KLProps.copyInitializedFrom(KernelLaunchProperties); if (KernelLaunchProperties.MUsesClusterLaunch) { From ed746bec4b3b98430cc9fd519e9d6250fd3f8e89 Mon Sep 17 00:00:00 2001 From: "Agarwal, Udit" Date: Thu, 31 Jul 2025 16:42:47 +0200 Subject: [PATCH 25/25] Delegate verifyDeviceHasProgressGuarantee to new code --- .../sycl/detail/kernel_launch_helper.hpp | 22 +++++----- sycl/include/sycl/handler.hpp | 1 - sycl/source/handler.cpp | 44 ++----------------- 3 files changed, 14 insertions(+), 53 deletions(-) diff --git a/sycl/include/sycl/detail/kernel_launch_helper.hpp b/sycl/include/sycl/detail/kernel_launch_helper.hpp index 8594cbaebda6d..ccd801ea5cec8 100644 --- a/sycl/include/sycl/detail/kernel_launch_helper.hpp +++ b/sycl/include/sycl/detail/kernel_launch_helper.hpp @@ -299,8 +299,7 @@ template class KernelLaunchPropertyWrapper { } }; // struct KernelLaunchPropertiesT -private: - void verifyDeviceHasProgressGuarantee( + void verifyIfDeviceHasProgressGuarantee( sycl::ext::oneapi::experimental::forward_progress_guarantee guarantee, sycl::ext::oneapi::experimental::execution_scope threadScope, sycl::ext::oneapi::experimental::execution_scope coordinationScope, @@ -354,7 +353,6 @@ template class KernelLaunchPropertyWrapper { } } -public: /// Process runtime kernel properties. /// /// Stores information about kernel properties into the handler. @@ -393,23 +391,23 @@ template class KernelLaunchPropertyWrapper { if constexpr (PropertiesT::template has_property< work_group_progress_key>()) { auto prop = Props.template get_property(); - verifyDeviceHasProgressGuarantee(prop.guarantee, - execution_scope::work_group, - prop.coordinationScope, retval); + verifyIfDeviceHasProgressGuarantee(prop.guarantee, + execution_scope::work_group, + prop.coordinationScope, retval); } if constexpr (PropertiesT::template has_property< sub_group_progress_key>()) { auto prop = Props.template get_property(); - verifyDeviceHasProgressGuarantee(prop.guarantee, - execution_scope::sub_group, - prop.coordinationScope, retval); + verifyIfDeviceHasProgressGuarantee(prop.guarantee, + execution_scope::sub_group, + prop.coordinationScope, retval); } if constexpr (PropertiesT::template has_property< work_item_progress_key>()) { auto prop = Props.template get_property(); - verifyDeviceHasProgressGuarantee(prop.guarantee, - execution_scope::work_item, - prop.coordinationScope, retval); + verifyIfDeviceHasProgressGuarantee(prop.guarantee, + execution_scope::work_item, + prop.coordinationScope, retval); } } diff --git a/sycl/include/sycl/handler.hpp b/sycl/include/sycl/handler.hpp index 5fc487dc2e4bb..e1f59bb9c3175 100644 --- a/sycl/include/sycl/handler.hpp +++ b/sycl/include/sycl/handler.hpp @@ -423,7 +423,6 @@ class __SYCL_EXPORT handler "KernelLaunchPropertyWrapper should not have any data members " "to avoid ABI effects."); - private: #ifdef __INTEL_PREVIEW_BREAKING_CHANGES /// Constructs SYCL handler from the pre-constructed stack-allocated diff --git a/sycl/source/handler.cpp b/sycl/source/handler.cpp index 049707f3a2ae3..1daa397ae5300 100644 --- a/sycl/source/handler.cpp +++ b/sycl/source/handler.cpp @@ -2082,46 +2082,10 @@ void handler::verifyDeviceHasProgressGuarantee( sycl::ext::oneapi::experimental::execution_scope threadScope, sycl::ext::oneapi::experimental::execution_scope coordinationScope) { - using execution_scope = sycl::ext::oneapi::experimental::execution_scope; - using forward_progress = - sycl::ext::oneapi::experimental::forward_progress_guarantee; - const bool supported = impl->get_device().supportsForwardProgress( - guarantee, threadScope, coordinationScope); - if (threadScope == execution_scope::work_group) { - if (!supported) { - throw sycl::exception( - sycl::errc::feature_not_supported, - "Required progress guarantee for work groups is not " - "supported by this device."); - } - // If we are here, the device supports the guarantee required but there is a - // caveat in that if the guarantee required is a concurrent guarantee, then - // we most likely also need to enable cooperative launch of the kernel. That - // is, although the device supports the required guarantee, some setup work - // is needed to truly make the device provide that guarantee at runtime. - // Otherwise, we will get the default guarantee which is weaker than - // concurrent. Same reasoning applies for sub_group but not for work_item. - // TODO: Further design work is probably needed to reflect this behavior in - // Unified Runtime. - if (guarantee == forward_progress::concurrent) - setKernelIsCooperative(true); - } else if (threadScope == execution_scope::sub_group) { - if (!supported) { - throw sycl::exception(sycl::errc::feature_not_supported, - "Required progress guarantee for sub groups is not " - "supported by this device."); - } - // Same reasoning as above. - if (guarantee == forward_progress::concurrent) - setKernelIsCooperative(true); - } else { // threadScope is execution_scope::work_item otherwise undefined - // behavior - if (!supported) { - throw sycl::exception(sycl::errc::feature_not_supported, - "Required progress guarantee for work items is not " - "supported by this device."); - } - } + KernelLaunchPropertiesT Kprop; + verifyIfDeviceHasProgressGuarantee(guarantee, threadScope, coordinationScope, + Kprop); + impl->KLProps.copyInitializedFrom(Kprop); } #endif