Skip to content

[SYCL][NFC] Move kernel launch property parsing out of sycl::handler #19589

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 20 commits into
base: sycl
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
20 commits
Select commit Hold shift + click to select a range
ba2e6c4
Seperate out property parsing in a seperate DS
uditagarwal97 Jul 23, 2025
a09cbd0
[SYCL][NFC] Decouple property parsing from KernelWrapper
uditagarwal97 Jul 23, 2025
364708d
Move property pasring into a seperate DS
uditagarwal97 Jul 24, 2025
8386380
Merge remote-tracking branch 'origin/sycl' into private/udit/property…
uditagarwal97 Jul 24, 2025
bb8875b
Fix conditional property parsing
uditagarwal97 Jul 25, 2025
681258f
Fix root_sync property parsing
uditagarwal97 Jul 25, 2025
2089858
Merge remote-tracking branch 'origin/sycl' into private/udit/property…
uditagarwal97 Jul 25, 2025
3c3d3f6
Fix sycl_detail_core.hpp.cpp after merge
uditagarwal97 Jul 25, 2025
70137e1
Beautify code
uditagarwal97 Jul 27, 2025
26660c8
Remove TODOs
uditagarwal97 Jul 27, 2025
4afcd33
Fix coverity comments
uditagarwal97 Jul 27, 2025
feb3bd3
Replace sycl::ext::oneapi::experimental:: with syclex::
uditagarwal97 Jul 28, 2025
d281749
Rename MKernelCacheConfig to MCacheConfig, and similar renaming
uditagarwal97 Jul 28, 2025
5ffc25f
Make KernelLaunchPropertyWrapper empty
uditagarwal97 Jul 28, 2025
17fb4f9
Make sycl::handler inherit KernelLaunchPropertyWrapper
uditagarwal97 Jul 28, 2025
0856936
Remove getDeviceImpl
uditagarwal97 Jul 28, 2025
eae037d
Merge remote-tracking branch 'origin/sycl' into private/udit/property…
uditagarwal97 Jul 28, 2025
6361a29
Add comment on parseProperties
uditagarwal97 Jul 29, 2025
5fb9e4a
Merge remote-tracking branch 'origin/sycl' into private/udit/property…
uditagarwal97 Jul 29, 2025
1f9a313
Update abi/sycl_symbols_linux.dump
uditagarwal97 Jul 29, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
287 changes: 281 additions & 6 deletions sycl/include/sycl/detail/kernel_launch_helper.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,9 @@
#include <sycl/detail/helpers.hpp>
#include <sycl/ext/intel/experimental/fp_control_kernel_properties.hpp>
#include <sycl/ext/intel/experimental/kernel_execution_properties.hpp>
#include <sycl/ext/oneapi/experimental/cluster_group_prop.hpp>
#include <sycl/ext/oneapi/experimental/graph.hpp>
#include <sycl/ext/oneapi/experimental/use_root_sync_prop.hpp>
#include <sycl/ext/oneapi/experimental/virtual_functions.hpp>
#include <sycl/ext/oneapi/kernel_properties/properties.hpp>
#include <sycl/ext/oneapi/work_group_scratch_memory.hpp>
Expand Down Expand Up @@ -251,20 +254,292 @@ struct KernelWrapper<
}
}; // KernelWrapper struct

struct KernelLaunchPropertyWrapper {
template <typename KernelName, typename PropertyProcessor,
typename KernelType>
static void parseProperties([[maybe_unused]] PropertyProcessor h,
[[maybe_unused]] const KernelType &KernelFunc) {
// This struct is inherited by sycl::handler.
template <typename PropertyProcessorT> class KernelLaunchPropertyWrapper {
public:
// This struct is used to store kernel launch properties.
// std::optional is used to indicate that the property is not set.
// In some code paths, kernel launch properties are set multiple times
// for the same kernel, that is why using std::optional to avoid overriding
// previously set properties.
struct KernelLaunchPropertiesT {
std::optional<ur_kernel_cache_config_t> MCacheConfig;
std::optional<bool> MIsCooperative;
std::optional<uint32_t> MWorkGroupMemorySize;
std::optional<bool> MUsesClusterLaunch;
size_t MClusterDims;
std::array<size_t, 3> MClusterSize;

KernelLaunchPropertiesT()
: MCacheConfig(std::nullopt), MIsCooperative(std::nullopt),
MWorkGroupMemorySize(std::nullopt), MUsesClusterLaunch(std::nullopt),
MClusterDims(0), MClusterSize{0, 0, 0} {}

KernelLaunchPropertiesT(ur_kernel_cache_config_t _CacheConfig,
bool _IsCooperative, uint32_t _WorkGroupMemorySize,
bool _UsesClusterLaunch, size_t _ClusterDims,
std::array<size_t, 3> _ClusterSize)
: MCacheConfig(_CacheConfig), MIsCooperative(_IsCooperative),
MWorkGroupMemorySize(_WorkGroupMemorySize),
MUsesClusterLaunch(_UsesClusterLaunch), MClusterDims(_ClusterDims),
MClusterSize(_ClusterSize) {}

void takeUnionOfProperties(const KernelLaunchPropertiesT &Other) {
if (Other.MCacheConfig)
MCacheConfig = Other.MCacheConfig;

if (Other.MIsCooperative)
MIsCooperative = Other.MIsCooperative;

if (Other.MWorkGroupMemorySize)
MWorkGroupMemorySize = Other.MWorkGroupMemorySize;

if (Other.MUsesClusterLaunch) {
MUsesClusterLaunch = Other.MUsesClusterLaunch;
MClusterDims = Other.MClusterDims;
MClusterSize = Other.MClusterSize;
}
}
}; // struct KernelLaunchPropertiesT

private:
#ifndef __INTEL_PREVIEW_BREAKING_CHANGES
// Modeled after ur_kernel_cache_config_t
enum class StableKernelCacheConfig : int32_t {
Default = 0,
LargeSLM = 1,
LargeData = 2
};
#endif // __INTEL_PREVIEW_BREAKING_CHANGES

void verifyDeviceHasProgressGuarantee(
sycl::ext::oneapi::experimental::forward_progress_guarantee guarantee,
sycl::ext::oneapi::experimental::execution_scope threadScope,
sycl::ext::oneapi::experimental::execution_scope coordinationScope,
KernelLaunchPropertiesT &prop) {

using execution_scope = sycl::ext::oneapi::experimental::execution_scope;
using forward_progress =
sycl::ext::oneapi::experimental::forward_progress_guarantee;

PropertyProcessorT *pp = static_cast<PropertyProcessorT *>(this);
bool supported = pp->deviceSupportForwardProgress(guarantee, threadScope,
coordinationScope);

if (threadScope == execution_scope::work_group) {
if (!supported) {
throw sycl::exception(
sycl::errc::feature_not_supported,
"Required progress guarantee for work groups is not "
"supported by this device.");
}
// If we are here, the device supports the guarantee required but there is
// a caveat in that if the guarantee required is a concurrent guarantee,
// then we most likely also need to enable cooperative launch of the
// kernel. That is, although the device supports the required guarantee,
// some setup work is needed to truly make the device provide that
// guarantee at runtime. Otherwise, we will get the default guarantee
// which is weaker than concurrent. Same reasoning applies for sub_group
// but not for work_item.
// TODO: Further design work is probably needed to reflect this behavior
// in Unified Runtime.
if (guarantee == forward_progress::concurrent)
prop.MIsCooperative = true;
} else if (threadScope == execution_scope::sub_group) {
if (!supported) {
throw sycl::exception(
sycl::errc::feature_not_supported,
"Required progress guarantee for sub groups is not "
"supported by this device.");
}
// Same reasoning as above.
if (guarantee == forward_progress::concurrent)
prop.MIsCooperative = true;
} else { // threadScope is execution_scope::work_item otherwise undefined
// behavior
if (!supported) {
throw sycl::exception(
sycl::errc::feature_not_supported,
"Required progress guarantee for work items is not "
"supported by this device.");
}
}
}

public:
/// Process runtime kernel properties.
///
/// Stores information about kernel properties into the handler.
template <typename PropertiesT>
KernelLaunchPropertiesT processLaunchProperties(PropertiesT Props) {

namespace syclex = sycl::ext::oneapi::experimental;
KernelLaunchPropertiesT retval;

PropertyProcessorT *pp = static_cast<PropertyProcessorT *>(this);
bool HasGraph = pp->isKernelAssociatedWithGraph();

// Process Kernel cache configuration property.
{
if constexpr (PropertiesT::template has_property<
sycl::ext::intel::experimental::cache_config_key>()) {
auto Config = Props.template get_property<
sycl::ext::intel::experimental::cache_config_key>();
if (Config == sycl::ext::intel::experimental::large_slm) {
retval.MCacheConfig = UR_KERNEL_CACHE_CONFIG_LARGE_SLM;
} else if (Config == sycl::ext::intel::experimental::large_data) {
retval.MCacheConfig = UR_KERNEL_CACHE_CONFIG_LARGE_DATA;
}
} else {
std::ignore = Props;
}
}

// Process Kernel cooperative property.
{
constexpr bool UsesRootSync =
PropertiesT::template has_property<syclex::use_root_sync_key>();
if (UsesRootSync)
retval.MIsCooperative = UsesRootSync;
}

// Process device progress properties.
{
if constexpr (PropertiesT::template has_property<
syclex::work_group_progress_key>()) {
auto prop =
Props.template get_property<syclex::work_group_progress_key>();
verifyDeviceHasProgressGuarantee(prop.guarantee,
syclex::execution_scope::work_group,
prop.coordinationScope, retval);
}
if constexpr (PropertiesT::template has_property<
syclex::sub_group_progress_key>()) {
auto prop =
Props.template get_property<syclex::sub_group_progress_key>();
verifyDeviceHasProgressGuarantee(prop.guarantee,
syclex::execution_scope::sub_group,
prop.coordinationScope, retval);
}
if constexpr (PropertiesT::template has_property<
syclex::work_item_progress_key>()) {
auto prop =
Props.template get_property<syclex::work_item_progress_key>();
verifyDeviceHasProgressGuarantee(prop.guarantee,
syclex::execution_scope::work_item,
prop.coordinationScope, retval);
}
}

// Process work group scratch memory property.
{
if constexpr (PropertiesT::template has_property<
syclex::work_group_scratch_size>()) {
if (HasGraph) {
std::string FeatureString =
ext::oneapi::experimental::detail::UnsupportedFeatureToString(
syclex::detail::UnsupportedGraphFeatures::
sycl_ext_oneapi_work_group_scratch_memory);
throw sycl::exception(sycl::make_error_code(errc::invalid),
"The " + FeatureString +
" feature is not yet available "
"for use with the SYCL Graph extension.");
}

auto WorkGroupMemSize =
Props.template get_property<syclex::work_group_scratch_size>();
retval.MWorkGroupMemorySize = WorkGroupMemSize.size;
}
}

// Parse cluster properties.
{
constexpr std::size_t ClusterDim =
syclex::detail::getClusterDim<PropertiesT>();
if constexpr (ClusterDim > 0) {
if (HasGraph) {
std::string FeatureString =
ext::oneapi::experimental::detail::UnsupportedFeatureToString(
syclex::detail::UnsupportedGraphFeatures::
sycl_ext_oneapi_experimental_cuda_cluster_launch);
throw sycl::exception(sycl::make_error_code(errc::invalid),
"The " + FeatureString +
" feature is not yet available "
"for use with the SYCL Graph extension.");
}

auto ClusterSize = Props
.template get_property<
syclex::cuda::cluster_size_key<ClusterDim>>()
.get_cluster_size();
retval.MUsesClusterLaunch = true;
retval.MClusterDims = ClusterDim;
if (ClusterDim == 1) {
retval.MClusterSize[0] = ClusterSize[0];
} else if (ClusterDim == 2) {
retval.MClusterSize[0] = ClusterSize[0];
retval.MClusterSize[1] = ClusterSize[1];
} else if (ClusterDim == 3) {
retval.MClusterSize[0] = ClusterSize[0];
retval.MClusterSize[1] = ClusterSize[1];
retval.MClusterSize[2] = ClusterSize[2];
} else {
assert(ClusterDim <= 3 &&
"Only 1D, 2D, and 3D cluster launch is supported.");
}
}
}

return retval;
}

/// Process kernel properties.
///
/// Stores information about kernel properties into the handler.
///
/// Note: it is important that this function *does not* depend on kernel
/// name or kernel type, because then it will be instantiated for every
/// kernel, even though body of those instantiated functions could be almost
/// the same, thus unnecessary increasing compilation time.
template <
bool IsESIMDKernel,
typename PropertiesT = ext::oneapi::experimental::empty_properties_t>
KernelLaunchPropertiesT processProperties(PropertiesT Props) {
static_assert(
ext::oneapi::experimental::is_property_list<PropertiesT>::value,
"Template type is not a property list.");
static_assert(
!PropertiesT::template has_property<
sycl::ext::intel::experimental::fp_control_key>() ||
(PropertiesT::template has_property<
sycl::ext::intel::experimental::fp_control_key>() &&
IsESIMDKernel),
"Floating point control property is supported for ESIMD kernels only.");
static_assert(
!PropertiesT::template has_property<
sycl::ext::oneapi::experimental::indirectly_callable_key>(),
"indirectly_callable property cannot be applied to SYCL kernels");

return processLaunchProperties(Props);
}

// Returns KernelLaunchPropertiesT or std::nullopt based on whether the
// kernel functor has a get method that returns properties.
template <typename KernelName, typename KernelType>
std::optional<KernelLaunchPropertiesT>
parseProperties([[maybe_unused]] const KernelType &KernelFunc) {
#ifndef __SYCL_DEVICE_ONLY__
// If there are properties provided by get method then process them.
if constexpr (ext::oneapi::experimental::detail::
HasKernelPropertiesGetMethod<const KernelType &>::value) {

h->template processProperties<detail::isKernelESIMD<KernelName>()>(
return processProperties<detail::isKernelESIMD<KernelName>()>(
KernelFunc.get(ext::oneapi::experimental::properties_tag{}));
}
#endif
// If there are no properties provided by get method then return empty
// optional.
return std::nullopt;
}
}; // KernelLaunchPropertyWrapper struct

Expand Down
Loading
Loading