diff --git a/sycl/include/sycl/detail/kernel_launch_helper.hpp b/sycl/include/sycl/detail/kernel_launch_helper.hpp index f5af740ad8751..a310b953a602e 100644 --- a/sycl/include/sycl/detail/kernel_launch_helper.hpp +++ b/sycl/include/sycl/detail/kernel_launch_helper.hpp @@ -12,6 +12,9 @@ #include #include #include +#include +#include +#include #include #include #include @@ -251,20 +254,279 @@ struct KernelWrapper< } }; // KernelWrapper struct -struct KernelLaunchPropertyWrapper { - template - static void parseProperties([[maybe_unused]] PropertyProcessor h, - [[maybe_unused]] const KernelType &KernelFunc) { +// This struct is inherited by sycl::handler. +template class KernelLaunchPropertyWrapper { +public: + // This struct is used to store kernel launch properties. + // std::optional is used to indicate that the property is not set. + // In some code paths, kernel launch properties are set multiple times + // for the same kernel, that is why using std::optional to avoid overriding + // previously set properties. + struct KernelLaunchPropertiesT { + std::optional MCacheConfig = std::nullopt; + std::optional MIsCooperative = std::nullopt; + std::optional MWorkGroupMemorySize = std::nullopt; + std::optional MUsesClusterLaunch = std::nullopt; + size_t MClusterDims = 0; + std::array MClusterSize = {0, 0, 0}; + + KernelLaunchPropertiesT() = default; + + KernelLaunchPropertiesT(ur_kernel_cache_config_t _CacheConfig, + bool _IsCooperative, uint32_t _WorkGroupMemorySize, + bool _UsesClusterLaunch, size_t _ClusterDims, + std::array _ClusterSize) + : MCacheConfig(_CacheConfig), MIsCooperative(_IsCooperative), + MWorkGroupMemorySize(_WorkGroupMemorySize), + MUsesClusterLaunch(_UsesClusterLaunch), MClusterDims(_ClusterDims), + MClusterSize(_ClusterSize) {} + + void copyInitializedFrom(const KernelLaunchPropertiesT &Other) { + if (Other.MCacheConfig) + MCacheConfig = Other.MCacheConfig; + + if (Other.MIsCooperative) + MIsCooperative = Other.MIsCooperative; + + if (Other.MWorkGroupMemorySize) + MWorkGroupMemorySize = Other.MWorkGroupMemorySize; + + if (Other.MUsesClusterLaunch) { + MUsesClusterLaunch = Other.MUsesClusterLaunch; + MClusterDims = Other.MClusterDims; + MClusterSize = Other.MClusterSize; + } + } + }; // struct KernelLaunchPropertiesT + +private: +#ifndef __INTEL_PREVIEW_BREAKING_CHANGES + // Modeled after ur_kernel_cache_config_t + enum class StableKernelCacheConfig : int32_t { + Default = 0, + LargeSLM = 1, + LargeData = 2 + }; +#endif // __INTEL_PREVIEW_BREAKING_CHANGES + + void verifyDeviceHasProgressGuarantee( + sycl::ext::oneapi::experimental::forward_progress_guarantee guarantee, + sycl::ext::oneapi::experimental::execution_scope threadScope, + sycl::ext::oneapi::experimental::execution_scope coordinationScope, + KernelLaunchPropertiesT &prop) { + + using execution_scope = sycl::ext::oneapi::experimental::execution_scope; + using forward_progress = + sycl::ext::oneapi::experimental::forward_progress_guarantee; + + Self *pp = static_cast(this); + bool supported = pp->deviceSupportForwardProgress(guarantee, threadScope, + coordinationScope); + + if (threadScope == execution_scope::work_group) { + if (!supported) { + throw sycl::exception( + sycl::errc::feature_not_supported, + "Required progress guarantee for work groups is not " + "supported by this device."); + } + // If we are here, the device supports the guarantee required but there is + // a caveat in that if the guarantee required is a concurrent guarantee, + // then we most likely also need to enable cooperative launch of the + // kernel. That is, although the device supports the required guarantee, + // some setup work is needed to truly make the device provide that + // guarantee at runtime. Otherwise, we will get the default guarantee + // which is weaker than concurrent. Same reasoning applies for sub_group + // but not for work_item. + // TODO: Further design work is probably needed to reflect this behavior + // in Unified Runtime. + if (guarantee == forward_progress::concurrent) + prop.MIsCooperative = true; + } else if (threadScope == execution_scope::sub_group) { + if (!supported) { + throw sycl::exception( + sycl::errc::feature_not_supported, + "Required progress guarantee for sub groups is not " + "supported by this device."); + } + // Same reasoning as above. + if (guarantee == forward_progress::concurrent) + prop.MIsCooperative = true; + } else { // threadScope is execution_scope::work_item otherwise undefined + // behavior + if (!supported) { + throw sycl::exception( + sycl::errc::feature_not_supported, + "Required progress guarantee for work items is not " + "supported by this device."); + } + } + } + +public: + /// Process runtime kernel properties. + /// + /// Stores information about kernel properties into the handler. + template + KernelLaunchPropertiesT processLaunchProperties(PropertiesT Props) { + using namespace sycl::ext::oneapi::experimental; + using namespace sycl::ext::oneapi::experimental::detail; + KernelLaunchPropertiesT retval; + Self *pp = static_cast(this); + bool HasGraph = pp->isKernelAssociatedWithGraph(); + + // Process Kernel cache configuration property. + { + if constexpr (PropertiesT::template has_property< + sycl::ext::intel::experimental::cache_config_key>()) { + auto Config = Props.template get_property< + sycl::ext::intel::experimental::cache_config_key>(); + if (Config == sycl::ext::intel::experimental::large_slm) { + retval.MCacheConfig = UR_KERNEL_CACHE_CONFIG_LARGE_SLM; + } else if (Config == sycl::ext::intel::experimental::large_data) { + retval.MCacheConfig = UR_KERNEL_CACHE_CONFIG_LARGE_DATA; + } + } else { + std::ignore = Props; + } + } + + // Process Kernel cooperative property. + { + if constexpr (PropertiesT::template has_property()) + retval.MIsCooperative = true; + } + + // Process device progress properties. + { + if constexpr (PropertiesT::template has_property< + work_group_progress_key>()) { + auto prop = Props.template get_property(); + verifyDeviceHasProgressGuarantee(prop.guarantee, + execution_scope::work_group, + prop.coordinationScope, retval); + } + if constexpr (PropertiesT::template has_property< + sub_group_progress_key>()) { + auto prop = Props.template get_property(); + verifyDeviceHasProgressGuarantee(prop.guarantee, + execution_scope::sub_group, + prop.coordinationScope, retval); + } + if constexpr (PropertiesT::template has_property< + work_item_progress_key>()) { + auto prop = Props.template get_property(); + verifyDeviceHasProgressGuarantee(prop.guarantee, + execution_scope::work_item, + prop.coordinationScope, retval); + } + } + + // Process work group scratch memory property. + { + if constexpr (PropertiesT::template has_property< + work_group_scratch_size>()) { + if (HasGraph) { + std::string FeatureString = UnsupportedFeatureToString( + UnsupportedGraphFeatures:: + sycl_ext_oneapi_work_group_scratch_memory); + throw sycl::exception(sycl::make_error_code(errc::invalid), + "The " + FeatureString + + " feature is not yet available " + "for use with the SYCL Graph extension."); + } + + auto WorkGroupMemSize = + Props.template get_property(); + retval.MWorkGroupMemorySize = WorkGroupMemSize.size; + } + } + + // Parse cluster properties. + { + constexpr std::size_t ClusterDim = getClusterDim(); + if constexpr (ClusterDim > 0) { + if (HasGraph) { + std::string FeatureString = UnsupportedFeatureToString( + UnsupportedGraphFeatures:: + sycl_ext_oneapi_experimental_cuda_cluster_launch); + throw sycl::exception(sycl::make_error_code(errc::invalid), + "The " + FeatureString + + " feature is not yet available " + "for use with the SYCL Graph extension."); + } + + auto ClusterSize = + Props.template get_property>() + .get_cluster_size(); + retval.MUsesClusterLaunch = true; + retval.MClusterDims = ClusterDim; + if (ClusterDim == 1) { + retval.MClusterSize[0] = ClusterSize[0]; + } else if (ClusterDim == 2) { + retval.MClusterSize[0] = ClusterSize[0]; + retval.MClusterSize[1] = ClusterSize[1]; + } else if (ClusterDim == 3) { + retval.MClusterSize[0] = ClusterSize[0]; + retval.MClusterSize[1] = ClusterSize[1]; + retval.MClusterSize[2] = ClusterSize[2]; + } else { + assert(ClusterDim <= 3 && + "Only 1D, 2D, and 3D cluster launch is supported."); + } + } + } + + return retval; + } + + /// Process kernel properties. + /// + /// Stores information about kernel properties into the handler. + /// + /// Note: it is important that this function *does not* depend on kernel + /// name or kernel type, because then it will be instantiated for every + /// kernel, even though body of those instantiated functions could be almost + /// the same, thus unnecessary increasing compilation time. + template < + bool IsESIMDKernel, + typename PropertiesT = ext::oneapi::experimental::empty_properties_t> + KernelLaunchPropertiesT processProperties(PropertiesT Props) { + static_assert( + ext::oneapi::experimental::is_property_list::value, + "Template type is not a property list."); + static_assert( + !PropertiesT::template has_property< + sycl::ext::intel::experimental::fp_control_key>() || + (PropertiesT::template has_property< + sycl::ext::intel::experimental::fp_control_key>() && + IsESIMDKernel), + "Floating point control property is supported for ESIMD kernels only."); + static_assert( + !PropertiesT::template has_property< + sycl::ext::oneapi::experimental::indirectly_callable_key>(), + "indirectly_callable property cannot be applied to SYCL kernels"); + + return processLaunchProperties(Props); + } + + // Returns KernelLaunchPropertiesT or std::nullopt based on whether the + // kernel functor has a get method that returns properties. + template + std::optional + parseProperties([[maybe_unused]] const KernelType &KernelFunc) { #ifndef __SYCL_DEVICE_ONLY__ // If there are properties provided by get method then process them. if constexpr (ext::oneapi::experimental::detail:: HasKernelPropertiesGetMethod::value) { - h->template processProperties()>( + return processProperties()>( KernelFunc.get(ext::oneapi::experimental::properties_tag{})); } #endif + // If there are no properties provided by get method then return empty + // optional. + return std::nullopt; } }; // KernelLaunchPropertyWrapper struct diff --git a/sycl/include/sycl/handler.hpp b/sycl/include/sycl/handler.hpp index bd91b9dc755ec..00c4bc111c3b2 100644 --- a/sycl/include/sycl/handler.hpp +++ b/sycl/include/sycl/handler.hpp @@ -416,7 +416,15 @@ template bool range_size_fits_in_size_t(const range &r) { /// \sa kernel /// /// \ingroup sycl_api -class __SYCL_EXPORT handler { +class __SYCL_EXPORT handler + : public detail::KernelLaunchPropertyWrapper { + +#ifndef __INTEL_PREVIEW_BREAKING_CHANGES + static_assert(std::is_empty_v>, + "KernelLaunchPropertyWrapper should not have any data members " + "to avoid ABI break."); +#endif // __INTEL_PREVIEW_BREAKING_CHANGES + private: #ifdef __INTEL_PREVIEW_BREAKING_CHANGES /// Constructs SYCL handler from the pre-constructed stack-allocated @@ -877,6 +885,7 @@ class __SYCL_EXPORT handler { } } +#ifndef __INTEL_PREVIEW_BREAKING_CHANGES void verifyDeviceHasProgressGuarantee( sycl::ext::oneapi::experimental::forward_progress_guarantee guarantee, sycl::ext::oneapi::experimental::execution_scope threadScope, @@ -990,6 +999,7 @@ class __SYCL_EXPORT handler { processLaunchProperties(Props); } +#endif // INTEL_PREVIEW_BREAKING_CHANGES /// Checks whether it is possible to copy the source shape to the destination /// shape(the shapes are described by the accessor ranges) by using @@ -1296,8 +1306,10 @@ class __SYCL_EXPORT handler { decltype(Wrapper), TransformedArgType, PropertiesT>::wrap(Wrapper); - detail::KernelLaunchPropertyWrapper::parseProperties(this, - Wrapper); + if (auto prop = parseProperties(Wrapper)) { + setKernelLaunchProperties(*prop); + } + #ifndef __SYCL_DEVICE_ONLY__ constexpr detail::string_view Name{detail::getKernelName()}; verifyUsedKernelBundleInternal(Name); @@ -1323,13 +1335,17 @@ class __SYCL_EXPORT handler { // kernel is generated detail::KernelWrapper::wrap(KernelFunc); - detail::KernelLaunchPropertyWrapper::parseProperties(this, - KernelFunc); + if (auto prop = parseProperties(KernelFunc)) { + setKernelLaunchProperties(*prop); + } #ifndef __SYCL_DEVICE_ONLY__ constexpr detail::string_view Name{detail::getKernelName()}; verifyUsedKernelBundleInternal(Name); - processProperties(), PropertiesT>(Props); + KernelLaunchPropertiesT ProcessedProps = + KernelLaunchPropertyWrapper::processProperties< + detail::isKernelESIMD(), PropertiesT>(Props); + setKernelLaunchProperties(ProcessedProps); detail::checkValueRange(UserRange); setNDRangeDescriptor(std::move(UserRange)); StoreLambda( @@ -1359,7 +1375,10 @@ class __SYCL_EXPORT handler { MKernel = detail::getSyclObjImpl(std::move(Kernel)); detail::checkValueRange(NumWorkItems); setNDRangeDescriptor(std::move(NumWorkItems)); - processLaunchProperties(Props); + KernelLaunchPropertiesT ParsedProp = + KernelLaunchPropertyWrapper::processLaunchProperties( + Props); + setKernelLaunchProperties(ParsedProp); setType(detail::CGType::Kernel); extractArgsAndReqs(); MKernelName = getKernelName(); @@ -1384,7 +1403,10 @@ class __SYCL_EXPORT handler { MKernel = detail::getSyclObjImpl(std::move(Kernel)); detail::checkValueRange(NDRange); setNDRangeDescriptor(std::move(NDRange)); - processLaunchProperties(Props); + KernelLaunchPropertiesT ParsedProp = + KernelLaunchPropertyWrapper::processLaunchProperties( + Props); + setKernelLaunchProperties(ParsedProp); setType(detail::CGType::Kernel); extractArgsAndReqs(); MKernelName = getKernelName(); @@ -1405,8 +1427,9 @@ class __SYCL_EXPORT handler { (void)Props; detail::KernelWrapper::wrap(KernelFunc); - detail::KernelLaunchPropertyWrapper::parseProperties(this, - KernelFunc); + if (auto prop = parseProperties(KernelFunc)) { + setKernelLaunchProperties(*prop); + } #ifndef __SYCL_DEVICE_ONLY__ if constexpr (WrapAsVal == detail::WrapAs::single_task) { throwOnKernelParameterMisuse(); @@ -1425,7 +1448,10 @@ class __SYCL_EXPORT handler { } StoreLambda(std::move(KernelFunc)); - processProperties(), PropertiesT>(Props); + KernelLaunchPropertiesT ProcessedProps = + KernelLaunchPropertyWrapper::processProperties< + detail::isKernelESIMD(), PropertiesT>(Props); + setKernelLaunchProperties(ProcessedProps); #endif } @@ -1448,8 +1474,9 @@ class __SYCL_EXPORT handler { (void)Kernel; detail::KernelWrapper::wrap(KernelFunc); - detail::KernelLaunchPropertyWrapper::parseProperties(this, - KernelFunc); + if (auto prop = parseProperties(KernelFunc)) { + setKernelLaunchProperties(*prop); + } #ifndef __SYCL_DEVICE_ONLY__ if constexpr (WrapAsVal == detail::WrapAs::single_task) { throwOnKernelParameterMisuse(); @@ -1477,7 +1504,10 @@ class __SYCL_EXPORT handler { } else { StoreLambda(std::move(KernelFunc)); } - processProperties(), PropertiesT>(Props); + KernelLaunchPropertiesT ProcessedProps = + KernelLaunchPropertyWrapper::processProperties< + detail::isKernelESIMD(), PropertiesT>(Props); + setKernelLaunchProperties(ProcessedProps); #endif } #endif // __INTEL_PREVIEW_BREAKING_CHANGES @@ -3483,6 +3513,7 @@ class __SYCL_EXPORT handler { bool IsDeviceImageScoped, size_t NumBytes, size_t Offset); +#ifndef __INTEL_PREVIEW_BREAKING_CHANGES // Changing values in this will break ABI/API. enum class StableKernelCacheConfig : int32_t { Default = 0, @@ -3496,15 +3527,22 @@ class __SYCL_EXPORT handler { void setKernelIsCooperative(bool); // Set using cuda thread block cluster launch flag and set the launch bounds. -#ifndef __INTEL_PREVIEW_BREAKING_CHANGES void setKernelClusterLaunch(sycl::range<3> ClusterSize, int Dims); -#endif void setKernelClusterLaunch(sycl::range<3> ClusterSize); void setKernelClusterLaunch(sycl::range<2> ClusterSize); void setKernelClusterLaunch(sycl::range<1> ClusterSize); // Set the request work group memory size (work_group_static ext). void setKernelWorkGroupMem(size_t Size); +#endif + + void + setKernelLaunchProperties(KernelLaunchPropertiesT &KernelLaunchProperties); + bool isKernelAssociatedWithGraph() const; + bool deviceSupportForwardProgress( + sycl::ext::oneapi::experimental::forward_progress_guarantee guarantee, + sycl::ext::oneapi::experimental::execution_scope threadScope, + sycl::ext::oneapi::experimental::execution_scope coordinationScope) const; // Various checks that are only meaningful for host compilation, because they // result in runtime errors (i.e. exceptions being thrown). To save time @@ -3651,7 +3689,7 @@ class __SYCL_EXPORT handler { void instantiateKernelOnHost(void *InstantiateKernelOnHostPtr); friend class detail::HandlerAccess; - friend struct detail::KernelLaunchPropertyWrapper; + friend class detail::KernelLaunchPropertyWrapper; #ifdef __INTEL_PREVIEW_BREAKING_CHANGES __SYCL_DLL_LOCAL detail::handler_impl *get_impl() { return impl; } diff --git a/sycl/source/detail/handler_impl.hpp b/sycl/source/detail/handler_impl.hpp index 0fda3dd4f2769..2f89093b61661 100644 --- a/sycl/source/detail/handler_impl.hpp +++ b/sycl/source/detail/handler_impl.hpp @@ -12,6 +12,7 @@ #include #include #include +#include namespace sycl { inline namespace _V1 { @@ -104,11 +105,14 @@ class handler_impl { // If the pipe operation is read or write, 1 for read 0 for write. bool HostPipeRead = true; - ur_kernel_cache_config_t MKernelCacheConfig = UR_KERNEL_CACHE_CONFIG_DEFAULT; - - bool MKernelIsCooperative = false; - bool MKernelUsesClusterLaunch = false; - uint32_t MKernelWorkGroupMemorySize = 0; + KernelLaunchPropertyWrapper::KernelLaunchPropertiesT KLProps{ + UR_KERNEL_CACHE_CONFIG_DEFAULT, // MCacheConfig + false, // MIsCooperative + 0, // MWorkGroupMemorySize + false, // MUsesClusterLaunch + 0, // MClusterDims + {0, 0, 0} // MClusterSize + }; // Extra information for bindless image copy ur_image_desc_t MSrcImageDesc = {}; diff --git a/sycl/source/handler.cpp b/sycl/source/handler.cpp index 33f50af815a5b..aa1fce3b4292f 100644 --- a/sycl/source/handler.cpp +++ b/sycl/source/handler.cpp @@ -313,7 +313,6 @@ fill_copy_args(detail::handler_impl *impl, 0 /*DestPitch*/, SrcOffset, SrcExtent, DestOffset, DestExtent, CopyExtent); } - } // namespace detail #ifdef __INTEL_PREVIEW_BREAKING_CHANGES @@ -627,10 +626,11 @@ event handler::finalize() { impl->get_queue(), impl->MNDRDesc, impl->MArgs, KernelBundleImpPtr, MKernel.get(), toKernelNameStrT(MKernelName), impl->MKernelNameBasedCachePtr, RawEvents, ResultEvent.get(), - nullptr, impl->MKernelCacheConfig, impl->MKernelIsCooperative, - impl->MKernelUsesClusterLaunch, impl->MKernelWorkGroupMemorySize, - BinImage, impl->MKernelFuncPtr, impl->MKernelNumArgs, - impl->MKernelParamDescGetter, impl->MKernelHasSpecialCaptures); + nullptr, *impl->KLProps.MCacheConfig, *impl->KLProps.MIsCooperative, + *impl->KLProps.MUsesClusterLaunch, + *impl->KLProps.MWorkGroupMemorySize, BinImage, impl->MKernelFuncPtr, + impl->MKernelNumArgs, impl->MKernelParamDescGetter, + impl->MKernelHasSpecialCaptures); #ifdef XPTI_ENABLE_INSTRUMENTATION if (xptiEnabled) { // Emit signal only when event is created @@ -689,8 +689,8 @@ event handler::finalize() { std::move(impl->MArgs), toKernelNameStrT(MKernelName), impl->MKernelNameBasedCachePtr, std::move(MStreamStorage), std::move(impl->MAuxiliaryResources), getType(), - impl->MKernelCacheConfig, impl->MKernelIsCooperative, - impl->MKernelUsesClusterLaunch, impl->MKernelWorkGroupMemorySize, + *impl->KLProps.MCacheConfig, *impl->KLProps.MIsCooperative, + *impl->KLProps.MUsesClusterLaunch, *impl->KLProps.MWorkGroupMemorySize, MCodeLoc)); break; } @@ -2076,10 +2076,12 @@ static bool checkContextSupports(detail::context_impl &ContextImpl, return SupportsOp; } +#ifndef __INTEL_PREVIEW_BREAKING_CHANGES void handler::verifyDeviceHasProgressGuarantee( sycl::ext::oneapi::experimental::forward_progress_guarantee guarantee, sycl::ext::oneapi::experimental::execution_scope threadScope, sycl::ext::oneapi::experimental::execution_scope coordinationScope) { + using execution_scope = sycl::ext::oneapi::experimental::execution_scope; using forward_progress = sycl::ext::oneapi::experimental::forward_progress_guarantee; @@ -2121,6 +2123,7 @@ void handler::verifyDeviceHasProgressGuarantee( } } } +#endif bool handler::supportsUSMMemcpy2D() { if (impl->get_graph_or_null()) @@ -2251,30 +2254,70 @@ detail::context_impl &handler::getContextImpl() const { return impl->get_queue().getContextImpl(); } +void handler::setKernelLaunchProperties( + KernelLaunchPropertyWrapper::KernelLaunchPropertiesT + &KernelLaunchProperties) { + + impl->KLProps.copyInitializedFrom(KernelLaunchProperties); + + if (KernelLaunchProperties.MUsesClusterLaunch) { + if (KernelLaunchProperties.MClusterDims == 1) { + sycl::range<1> ClusterSizeTrimmed = { + KernelLaunchProperties.MClusterSize[0]}; + impl->MNDRDesc.setClusterDimensions(ClusterSizeTrimmed); + } else if (KernelLaunchProperties.MClusterDims == 2) { + sycl::range<2> ClusterSizeTrimmed = { + KernelLaunchProperties.MClusterSize[0], + KernelLaunchProperties.MClusterSize[1]}; + impl->MNDRDesc.setClusterDimensions(ClusterSizeTrimmed); + } else if (KernelLaunchProperties.MClusterDims == 3) { + sycl::range<3> ClusterSizeTrimmed = { + KernelLaunchProperties.MClusterSize[0], + KernelLaunchProperties.MClusterSize[1], + KernelLaunchProperties.MClusterSize[2]}; + impl->MNDRDesc.setClusterDimensions(ClusterSizeTrimmed); + } + } +} + +bool handler::isKernelAssociatedWithGraph() const { + return getCommandGraph() != nullptr; +} + +bool handler::deviceSupportForwardProgress( + sycl::ext::oneapi::experimental::forward_progress_guarantee guarantee, + sycl::ext::oneapi::experimental::execution_scope threadScope, + sycl::ext::oneapi::experimental::execution_scope coordinationScope) const { + + return impl->get_device().supportsForwardProgress(guarantee, threadScope, + coordinationScope); +} + +#ifndef __INTEL_PREVIEW_BREAKING_CHANGES + void handler::setKernelCacheConfig(handler::StableKernelCacheConfig Config) { switch (Config) { case handler::StableKernelCacheConfig::Default: - impl->MKernelCacheConfig = UR_KERNEL_CACHE_CONFIG_DEFAULT; + impl->KLProps.MCacheConfig = UR_KERNEL_CACHE_CONFIG_DEFAULT; break; case handler::StableKernelCacheConfig::LargeSLM: - impl->MKernelCacheConfig = UR_KERNEL_CACHE_CONFIG_LARGE_SLM; + impl->KLProps.MCacheConfig = UR_KERNEL_CACHE_CONFIG_LARGE_SLM; break; case handler::StableKernelCacheConfig::LargeData: - impl->MKernelCacheConfig = UR_KERNEL_CACHE_CONFIG_LARGE_DATA; + impl->KLProps.MCacheConfig = UR_KERNEL_CACHE_CONFIG_LARGE_DATA; break; } } -void handler::setKernelIsCooperative(bool KernelIsCooperative) { - impl->MKernelIsCooperative = KernelIsCooperative; +void handler::setKernelIsCooperative(bool IsCooperative) { + impl->KLProps.MIsCooperative = IsCooperative; } -#ifndef __INTEL_PREVIEW_BREAKING_CHANGES void handler::setKernelClusterLaunch(sycl::range<3> ClusterSize, int Dims) { throwIfGraphAssociated< syclex::detail::UnsupportedGraphFeatures:: sycl_ext_oneapi_experimental_cuda_cluster_launch>(); - impl->MKernelUsesClusterLaunch = true; + impl->KLProps.MUsesClusterLaunch = true; if (Dims == 1) { sycl::range<1> ClusterSizeTrimmed = {ClusterSize[0]}; @@ -2286,13 +2329,12 @@ void handler::setKernelClusterLaunch(sycl::range<3> ClusterSize, int Dims) { impl->MNDRDesc.setClusterDimensions(ClusterSize); } } -#endif void handler::setKernelClusterLaunch(sycl::range<3> ClusterSize) { throwIfGraphAssociated< syclex::detail::UnsupportedGraphFeatures:: sycl_ext_oneapi_experimental_cuda_cluster_launch>(); - impl->MKernelUsesClusterLaunch = true; + impl->KLProps.MUsesClusterLaunch = true; impl->MNDRDesc.setClusterDimensions(ClusterSize); } @@ -2300,7 +2342,7 @@ void handler::setKernelClusterLaunch(sycl::range<2> ClusterSize) { throwIfGraphAssociated< syclex::detail::UnsupportedGraphFeatures:: sycl_ext_oneapi_experimental_cuda_cluster_launch>(); - impl->MKernelUsesClusterLaunch = true; + impl->KLProps.MUsesClusterLaunch = true; impl->MNDRDesc.setClusterDimensions(ClusterSize); } @@ -2308,15 +2350,16 @@ void handler::setKernelClusterLaunch(sycl::range<1> ClusterSize) { throwIfGraphAssociated< syclex::detail::UnsupportedGraphFeatures:: sycl_ext_oneapi_experimental_cuda_cluster_launch>(); - impl->MKernelUsesClusterLaunch = true; + impl->KLProps.MUsesClusterLaunch = true; impl->MNDRDesc.setClusterDimensions(ClusterSize); } void handler::setKernelWorkGroupMem(size_t Size) { throwIfGraphAssociated(); - impl->MKernelWorkGroupMemorySize = Size; + impl->KLProps.MWorkGroupMemorySize = Size; } +#endif // __INTEL_PREVIEW_BREAKING_CHANGES void handler::ext_oneapi_graph( ext::oneapi::experimental::command_graph< diff --git a/sycl/test/abi/layout_handler.cpp b/sycl/test/abi/layout_handler.cpp index f313066acc6f4..302d68f429241 100644 --- a/sycl/test/abi/layout_handler.cpp +++ b/sycl/test/abi/layout_handler.cpp @@ -14,6 +14,7 @@ void foo() { // The order of field declarations and their types are important. // CHECK: 0 | class sycl::handler +// CHECK-NEXT: 0 | class sycl::detail::KernelLaunchPropertyWrapper (base) (empty) // CHECK-NEXT: 0 | class std::shared_ptr impl // CHECK-NEXT: 0 | class std::__shared_ptr (base) // CHECK-NEXT: 0 | class std::__shared_ptr_access (base) (empty) diff --git a/sycl/test/abi/sycl_symbols_linux.dump b/sycl/test/abi/sycl_symbols_linux.dump index dd392cf315b88..8cd1cf0b3b342 100644 --- a/sycl/test/abi/sycl_symbols_linux.dump +++ b/sycl/test/abi/sycl_symbols_linux.dump @@ -3605,6 +3605,7 @@ _ZN4sycl3_V17handler24ext_oneapi_memset2d_implEPvmimm _ZN4sycl3_V17handler24registerDynamicParameterEPNS0_3ext6oneapi12experimental6detail22dynamic_parameter_implEi _ZN4sycl3_V17handler24registerDynamicParameterERNS0_3ext6oneapi12experimental6detail22dynamic_parameter_baseEi _ZN4sycl3_V17handler25ext_intel_write_host_pipeENS0_6detail11string_viewEPvmb +_ZN4sycl3_V17handler25setKernelLaunchPropertiesERNS0_6detail27KernelLaunchPropertyWrapperIS1_E23KernelLaunchPropertiesTE _ZN4sycl3_V17handler26associateWithHandlerCommonESt10shared_ptrINS0_6detail16AccessorImplHostEEi _ZN4sycl3_V17handler26setKernelNameBasedCachePtrEPNS0_6detail21KernelNameBasedCacheTE _ZN4sycl3_V17handler26setNDRangeDescriptorPaddedENS0_5rangeILi3EEENS0_2idILi3EEEi @@ -4111,7 +4112,9 @@ _ZNK4sycl3_V17handler15getKernelBundleEv _ZNK4sycl3_V17handler16getDeviceBackendEv _ZNK4sycl3_V17handler17getContextImplPtrEv _ZNK4sycl3_V17handler21HasAssociatedAccessorEPNS0_6detail16AccessorImplHostENS0_6access6targetE +_ZNK4sycl3_V17handler27isKernelAssociatedWithGraphEv _ZNK4sycl3_V17handler27isStateExplicitKernelBundleEv +_ZNK4sycl3_V17handler28deviceSupportForwardProgressENS0_3ext6oneapi12experimental26forward_progress_guaranteeENS4_15execution_scopeES6_ _ZNK4sycl3_V17handler30getOrInsertHandlerKernelBundleEb _ZNK4sycl3_V17handler33getOrInsertHandlerKernelBundlePtrEb _ZNK4sycl3_V17handler7getTypeEv diff --git a/sycl/test/abi/sycl_symbols_windows.dump b/sycl/test/abi/sycl_symbols_windows.dump index 748d74482a1a5..21da9862af6ec 100644 --- a/sycl/test/abi/sycl_symbols_windows.dump +++ b/sycl/test/abi/sycl_symbols_windows.dump @@ -229,9 +229,9 @@ ??$get_info_impl@Unative_vector_width_int@device@info@_V1@sycl@@@device@_V1@sycl@@AEBAIXZ ??$get_info_impl@Unative_vector_width_long@device@info@_V1@sycl@@@device@_V1@sycl@@AEBAIXZ ??$get_info_impl@Unative_vector_width_short@device@info@_V1@sycl@@@device@_V1@sycl@@AEBAIXZ +??$get_info_impl@Unode_mask@device@info@intel@ext@_V1@sycl@@@device@_V1@sycl@@AEBAIXZ ??$get_info_impl@Unum_args@kernel@info@_V1@sycl@@@kernel@_V1@sycl@@AEBAIXZ ??$get_info_impl@Unum_compute_units@device@info@oneapi@ext@_V1@sycl@@@device@_V1@sycl@@AEBA_KXZ -??$get_info_impl@Unode_mask@device@info@intel@ext@_V1@sycl@@@device@_V1@sycl@@AEBAIXZ ??$get_info_impl@Uopencl_c_version@device@info@_V1@sycl@@@device@_V1@sycl@@AEBA?AVstring@detail@12@XZ ??$get_info_impl@Uparent_device@device@info@_V1@sycl@@@device@_V1@sycl@@AEBA?AV012@XZ ??$get_info_impl@Upartition_affinity_domains@device@info@_V1@sycl@@@device@_V1@sycl@@AEBA?AV?$vector@W4partition_affinity_domain@info@_V1@sycl@@V?$allocator@W4partition_affinity_domain@info@_V1@sycl@@@std@@@std@@XZ @@ -549,6 +549,8 @@ ??1sampler@_V1@sycl@@QEAA@XZ ??1stream@_V1@sycl@@QEAA@XZ ??1tls_code_loc_t@detail@_V1@sycl@@QEAA@XZ +??4?$KernelLaunchPropertyWrapper@Vhandler@_V1@sycl@@@detail@_V1@sycl@@QEAAAEAV0123@$$QEAV0123@@Z +??4?$KernelLaunchPropertyWrapper@Vhandler@_V1@sycl@@@detail@_V1@sycl@@QEAAAEAV0123@AEBV0123@@Z ??4?$OwnerLessBase@Vcontext@_V1@sycl@@@detail@_V1@sycl@@QEAAAEAV0123@$$QEAV0123@@Z ??4?$OwnerLessBase@Vcontext@_V1@sycl@@@detail@_V1@sycl@@QEAAAEAV0123@AEBV0123@@Z ??4?$OwnerLessBase@Vdevice@_V1@sycl@@@detail@_V1@sycl@@QEAAAEAV0123@$$QEAV0123@@Z @@ -3861,6 +3863,7 @@ ?destroy_image_handle@experimental@oneapi@ext@_V1@sycl@@YAXAEAUsampled_image_handle@12345@AEBVqueue@45@@Z ?destroy_image_handle@experimental@oneapi@ext@_V1@sycl@@YAXAEAUunsampled_image_handle@12345@AEBVdevice@45@AEBVcontext@45@@Z ?destroy_image_handle@experimental@oneapi@ext@_V1@sycl@@YAXAEAUunsampled_image_handle@12345@AEBVqueue@45@@Z +?deviceSupportForwardProgress@handler@_V1@sycl@@AEBA_NW4forward_progress_guarantee@experimental@oneapi@ext@23@W4execution_scope@56723@1@Z ?device_has@queue@_V1@sycl@@AEBA_NW4aspect@23@@Z ?empty@kernel_bundle_plain@detail@_V1@sycl@@QEBA_NXZ ?enable_ext_oneapi_default_context@detail@_V1@sycl@@YAX_N@Z @@ -4187,8 +4190,8 @@ ?get_impl@handler@_V1@sycl@@AEAAPEAVhandler_impl@detail@23@XZ ?get_kernel@kernel_bundle_plain@detail@_V1@sycl@@IEBA?AVkernel@34@AEBVkernel_id@34@@Z ?get_kernel_bundle@kernel@_V1@sycl@@QEBA?AV?$kernel_bundle@$01@23@XZ -?get_kernel_bundle_impl@detail@_V1@sycl@@YA?AV?$shared_ptr@Vkernel_bundle_impl@detail@_V1@sycl@@@std@@AEBVcontext@23@AEBV?$vector@Vdevice@_V1@sycl@@V?$allocator@Vdevice@_V1@sycl@@@std@@@5@AEBV?$vector@Vkernel_id@_V1@sycl@@V?$allocator@Vkernel_id@_V1@sycl@@@std@@@5@W4bundle_state@23@@Z ?get_kernel_bundle_impl@detail@_V1@sycl@@YA?AV?$shared_ptr@Vkernel_bundle_impl@detail@_V1@sycl@@@std@@AEBVcontext@23@AEBV?$vector@Vdevice@_V1@sycl@@V?$allocator@Vdevice@_V1@sycl@@@std@@@5@AEBV?$span@D$0?0@23@W4bundle_state@23@@Z +?get_kernel_bundle_impl@detail@_V1@sycl@@YA?AV?$shared_ptr@Vkernel_bundle_impl@detail@_V1@sycl@@@std@@AEBVcontext@23@AEBV?$vector@Vdevice@_V1@sycl@@V?$allocator@Vdevice@_V1@sycl@@@std@@@5@AEBV?$vector@Vkernel_id@_V1@sycl@@V?$allocator@Vkernel_id@_V1@sycl@@@std@@@5@W4bundle_state@23@@Z ?get_kernel_bundle_impl@detail@_V1@sycl@@YA?AV?$shared_ptr@Vkernel_bundle_impl@detail@_V1@sycl@@@std@@AEBVcontext@23@AEBV?$vector@Vdevice@_V1@sycl@@V?$allocator@Vdevice@_V1@sycl@@@std@@@5@W4bundle_state@23@@Z ?get_kernel_bundle_impl@detail@_V1@sycl@@YA?AV?$shared_ptr@Vkernel_bundle_impl@detail@_V1@sycl@@@std@@AEBVcontext@23@AEBV?$vector@Vdevice@_V1@sycl@@V?$allocator@Vdevice@_V1@sycl@@@std@@@5@W4bundle_state@23@AEBV?$function@$$A6A_NAEBV?$shared_ptr@Vdevice_image_impl@detail@_V1@sycl@@@std@@@Z@5@@Z ?get_kernel_id_impl@detail@_V1@sycl@@YA?AVkernel_id@23@Vstring_view@123@@Z @@ -4260,6 +4263,7 @@ ?isConstOrGlobal@handler@_V1@sycl@@CA_NW4target@access@23@@Z ?isDeviceGlobalUsedInKernel@detail@_V1@sycl@@YA_NPEBX@Z ?isImageOrImageArray@handler@_V1@sycl@@CA_NW4target@access@23@@Z +?isKernelAssociatedWithGraph@handler@_V1@sycl@@AEBA_NXZ ?isMemoryObjectUsedByGraph@AccessorBaseHost@detail@_V1@sycl@@QEBA_NXZ ?isOutOfRange@detail@_V1@sycl@@YA_NV?$vec@H$03@23@W4addressing_mode@23@V?$range@$02@23@@Z ?isPathPresent@OSUtil@detail@_V1@sycl@@SA_NAEBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@Z @@ -4415,6 +4419,7 @@ ?setKernelClusterLaunch@handler@_V1@sycl@@AEAAXV?$range@$02@23@H@Z ?setKernelInfo@handler@_V1@sycl@@AEAAXPEAXHP6A?AUkernel_param_desc_t@detail@23@H@Z_N2@Z ?setKernelIsCooperative@handler@_V1@sycl@@AEAAX_N@Z +?setKernelLaunchProperties@handler@_V1@sycl@@AEAAXAEAUKernelLaunchPropertiesT@?$KernelLaunchPropertyWrapper@Vhandler@_V1@sycl@@@detail@23@@Z ?setKernelNameBasedCachePtr@handler@_V1@sycl@@AEAAXPEAUKernelNameBasedCacheT@detail@23@@Z ?setKernelWorkGroupMem@handler@_V1@sycl@@AEAAX_K@Z ?setLocalAccessorArgHelper@handler@_V1@sycl@@AEAAXHAEAVLocalAccessorBaseHost@detail@23@@Z @@ -4427,8 +4432,8 @@ ?setNDRangeDescriptor@handler@_V1@sycl@@AEAAXV?$range@$02@23@0V?$id@$02@23@@Z ?setNDRangeDescriptor@handler@_V1@sycl@@AEAAXV?$range@$02@23@V?$id@$02@23@@Z ?setNDRangeDescriptor@handler@_V1@sycl@@AEAAXV?$range@$02@23@_N@Z -?setNDRangeDescriptorPadded@handler@_V1@sycl@@AEAAXV?$range@$02@23@V?$id@$02@23@H@Z ?setNDRangeDescriptorPadded@handler@_V1@sycl@@AEAAXV?$range@$02@23@0V?$id@$02@23@H@Z +?setNDRangeDescriptorPadded@handler@_V1@sycl@@AEAAXV?$range@$02@23@V?$id@$02@23@H@Z ?setNDRangeDescriptorPadded@handler@_V1@sycl@@AEAAXV?$range@$02@23@_NH@Z ?setNDRangeUsed@handler@_V1@sycl@@AEAAX_N@Z ?setStateExplicitKernelBundle@handler@_V1@sycl@@AEAAXXZ @@ -4502,6 +4507,7 @@ ?updateValue@dynamic_parameter_base@detail@experimental@oneapi@ext@_V1@sycl@@IEAAXPEBX_K@Z ?updateWorkGroupMem@dynamic_work_group_memory_base@detail@experimental@oneapi@ext@_V1@sycl@@IEAAX_K@Z ?use_kernel_bundle@handler@_V1@sycl@@QEAAXAEBV?$kernel_bundle@$01@23@@Z +?verifyDeviceHasProgressGuarantee@?$KernelLaunchPropertyWrapper@Vhandler@_V1@sycl@@@detail@_V1@sycl@@AEAAXW4forward_progress_guarantee@experimental@oneapi@ext@34@W4execution_scope@67834@1AEAUKernelLaunchPropertiesT@1234@@Z ?verifyDeviceHasProgressGuarantee@handler@_V1@sycl@@AEAAXW4forward_progress_guarantee@experimental@oneapi@ext@23@W4execution_scope@56723@1@Z ?verifyReductionProps@detail@_V1@sycl@@YAXAEBVproperty_list@23@@Z ?verifyUSMAllocatorProperties@_V1@sycl@@YAXAEBVproperty_list@12@@Z diff --git a/sycl/test/extensions/properties/non_esimd_kernel_fp_control.cpp b/sycl/test/extensions/properties/non_esimd_kernel_fp_control.cpp index 46d11eccdfe54..e6910484bf52f 100644 --- a/sycl/test/extensions/properties/non_esimd_kernel_fp_control.cpp +++ b/sycl/test/extensions/properties/non_esimd_kernel_fp_control.cpp @@ -20,7 +20,7 @@ struct ESIMDKernel { int main(void) { queue q; - // expected-error-re@sycl/handler.hpp:* {{static assertion failed due to requirement {{.+}}: Floating point control property is supported for ESIMD kernels only.}} + // expected-error-re@sycl/detail/kernel_launch_helper.hpp:* {{static assertion failed due to requirement {{.+}}: Floating point control property is supported for ESIMD kernels only.}} syclex::properties properties7{ intelex::fp_control}; @@ -28,7 +28,7 @@ int main(void) { cgh.single_task(properties7, [=]() {}); }); - // expected-error-re@sycl/handler.hpp:* {{static assertion failed due to requirement {{.+}}: Floating point control property is supported for ESIMD kernels only.}} + // expected-error-re@sycl/detail/kernel_launch_helper.hpp:* {{static assertion failed due to requirement {{.+}}: Floating point control property is supported for ESIMD kernels only.}} ESIMDKernel Kern; q.submit([&](handler &cgh) { cgh.parallel_for(range<1>(1), Kern); }); diff --git a/sycl/test/include_deps/sycl_detail_core.hpp.cpp b/sycl/test/include_deps/sycl_detail_core.hpp.cpp index 4e991e78b06c5..377c883a57f21 100644 --- a/sycl/test/include_deps/sycl_detail_core.hpp.cpp +++ b/sycl/test/include_deps/sycl_detail_core.hpp.cpp @@ -133,6 +133,8 @@ // CHECK-NEXT: detail/kernel_launch_helper.hpp // CHECK-NEXT: ext/intel/experimental/fp_control_kernel_properties.hpp // CHECK-NEXT: ext/intel/experimental/kernel_execution_properties.hpp +// CHECK-NEXT: ext/oneapi/experimental/cluster_group_prop.hpp +// CHECK-NEXT: ext/oneapi/experimental/use_root_sync_prop.hpp // CHECK-NEXT: ext/oneapi/experimental/virtual_functions.hpp // CHECK-NEXT: ext/oneapi/kernel_properties/properties.hpp // CHECK-NEXT: ext/oneapi/work_group_scratch_memory.hpp @@ -145,9 +147,7 @@ // CHECK-NEXT: ext/oneapi/bindless_images_interop.hpp // CHECK-NEXT: ext/oneapi/interop_common.hpp // CHECK-NEXT: ext/oneapi/bindless_images_mem_handle.hpp -// CHECK-NEXT: ext/oneapi/experimental/cluster_group_prop.hpp // CHECK-NEXT: ext/oneapi/experimental/raw_kernel_arg.hpp -// CHECK-NEXT: ext/oneapi/experimental/use_root_sync_prop.hpp // CHECK-NEXT: kernel.hpp // CHECK-NEXT: sampler.hpp // CHECK-NEXT: feature_test.hpp diff --git a/sycl/test/virtual-functions/properties-negative.cpp b/sycl/test/virtual-functions/properties-negative.cpp index b8e1b75f1d9a9..0ef06b3652ad1 100644 --- a/sycl/test/virtual-functions/properties-negative.cpp +++ b/sycl/test/virtual-functions/properties-negative.cpp @@ -17,15 +17,15 @@ int main() { oneapi::properties props_int{oneapi::indirectly_callable_in}; oneapi::properties props_user{oneapi::indirectly_callable_in}; - // expected-error-re@sycl/handler.hpp:* {{static assertion failed due to requirement {{.*}} indirectly_callable property cannot be applied to SYCL kernels}} + // expected-error-re@sycl/detail/kernel_launch_helper.hpp:* {{static assertion failed due to requirement {{.*}} indirectly_callable property cannot be applied to SYCL kernels}} q.single_task(props_empty, [=]() {}); // When both "props_empty" and "props_void" are in use, we won't see the // static assert firing for the second one, because there will be only one // instantiation of handler::processProperties. q.single_task(props_void, [=]() {}); - // expected-error-re@sycl/handler.hpp:* {{static assertion failed due to requirement {{.*}} indirectly_callable property cannot be applied to SYCL kernels}} + // expected-error-re@sycl/detail/kernel_launch_helper.hpp:* {{static assertion failed due to requirement {{.*}} indirectly_callable property cannot be applied to SYCL kernels}} q.single_task(props_int, [=]() {}); - // expected-error-re@sycl/handler.hpp:* {{static assertion failed due to requirement {{.*}} indirectly_callable property cannot be applied to SYCL kernels}} + // expected-error-re@sycl/detail/kernel_launch_helper.hpp:* {{static assertion failed due to requirement {{.*}} indirectly_callable property cannot be applied to SYCL kernels}} q.single_task(props_user, [=]() {}); return 0; diff --git a/sycl/unittests/program_manager/arg_mask/EliminatedArgMask.cpp b/sycl/unittests/program_manager/arg_mask/EliminatedArgMask.cpp index ab322bce9022b..9870e0c3c49ce 100644 --- a/sycl/unittests/program_manager/arg_mask/EliminatedArgMask.cpp +++ b/sycl/unittests/program_manager/arg_mask/EliminatedArgMask.cpp @@ -149,9 +149,9 @@ class MockHandler : public sycl::handler { std::move(impl->CGData), std::move(impl->MArgs), CGH->MKernelName.data(), impl->MKernelNameBasedCachePtr, std::move(CGH->MStreamStorage), std::move(impl->MAuxiliaryResources), - impl->MCGType, {}, impl->MKernelIsCooperative, - impl->MKernelUsesClusterLaunch, impl->MKernelWorkGroupMemorySize, - CGH->MCodeLoc)); + impl->MCGType, {}, *impl->KLProps.MIsCooperative, + *impl->KLProps.MUsesClusterLaunch, + *impl->KLProps.MWorkGroupMemorySize, CGH->MCodeLoc)); break; } default: diff --git a/sycl/unittests/scheduler/SchedulerTestUtils.hpp b/sycl/unittests/scheduler/SchedulerTestUtils.hpp index 61ed99dd87cff..d929a3b320b22 100644 --- a/sycl/unittests/scheduler/SchedulerTestUtils.hpp +++ b/sycl/unittests/scheduler/SchedulerTestUtils.hpp @@ -303,9 +303,9 @@ class MockHandlerCustomFinalize : public MockHandler { getNDRDesc(), std::move(getHostKernel()), getKernel(), std::move(impl->MKernelBundle), std::move(CGData), getArgs(), getKernelName(), impl->MKernelNameBasedCachePtr, getStreamStorage(), - impl->MAuxiliaryResources, getType(), {}, impl->MKernelIsCooperative, - impl->MKernelUsesClusterLaunch, impl->MKernelWorkGroupMemorySize, - getCodeLoc())); + impl->MAuxiliaryResources, getType(), {}, + *impl->KLProps.MIsCooperative, *impl->KLProps.MUsesClusterLaunch, + *impl->KLProps.MWorkGroupMemorySize, getCodeLoc())); break; } case sycl::detail::CGType::CodeplayHostTask: { diff --git a/sycl/unittests/scheduler/StreamInitDependencyOnHost.cpp b/sycl/unittests/scheduler/StreamInitDependencyOnHost.cpp index d038004b1e1e4..05d30c163096a 100644 --- a/sycl/unittests/scheduler/StreamInitDependencyOnHost.cpp +++ b/sycl/unittests/scheduler/StreamInitDependencyOnHost.cpp @@ -35,8 +35,8 @@ class MockHandlerStreamInit : public MockHandler { getRequirements(), getEvents()), getArgs(), getKernelName(), impl->MKernelNameBasedCachePtr, getStreamStorage(), std::move(impl->MAuxiliaryResources), getType(), - {}, impl->MKernelIsCooperative, impl->MKernelUsesClusterLaunch, - impl->MKernelWorkGroupMemorySize, getCodeLoc())); + {}, *impl->KLProps.MIsCooperative, *impl->KLProps.MUsesClusterLaunch, + *impl->KLProps.MWorkGroupMemorySize, getCodeLoc())); break; } default: