diff --git a/sycl/source/backend.cpp b/sycl/source/backend.cpp index 4866266c0dd72..5f7ac8af1e1fb 100644 --- a/sycl/source/backend.cpp +++ b/sycl/source/backend.cpp @@ -231,7 +231,7 @@ make_kernel_bundle(ur_native_handle_t NativeHandle, case (UR_PROGRAM_BINARY_TYPE_NONE): if (State == bundle_state::object) { auto Res = Adapter.call_nocheck( - UrProgram, 1u, &Dev, nullptr); + UrProgram, 1u, &Dev, ur_exp_program_flags_t{}, nullptr); if (Res == UR_RESULT_ERROR_UNSUPPORTED_FEATURE) { Res = Adapter.call_nocheck( ContextImpl.getHandleRef(), UrProgram, nullptr); @@ -241,7 +241,7 @@ make_kernel_bundle(ur_native_handle_t NativeHandle, else if (State == bundle_state::executable) { auto Res = Adapter.call_nocheck( - UrProgram, 1u, &Dev, nullptr); + UrProgram, 1u, &Dev, ur_exp_program_flags_t{}, nullptr); if (Res == UR_RESULT_ERROR_UNSUPPORTED_FEATURE) { Res = Adapter.call_nocheck( ContextImpl.getHandleRef(), UrProgram, nullptr); @@ -261,8 +261,8 @@ make_kernel_bundle(ur_native_handle_t NativeHandle, Managed UrLinkedProgram{Adapter}; ur_program_handle_t ProgramsToLink[] = {UrProgram}; auto Res = Adapter.call_nocheck( - ContextImpl.getHandleRef(), 1u, &Dev, 1u, ProgramsToLink, nullptr, - &UrLinkedProgram); + ContextImpl.getHandleRef(), 1u, &Dev, ur_exp_program_flags_t{}, 1u, + ProgramsToLink, nullptr, &UrLinkedProgram); if (Res == UR_RESULT_ERROR_UNSUPPORTED_FEATURE) { Res = Adapter.call_nocheck( ContextImpl.getHandleRef(), 1u, ProgramsToLink, nullptr, diff --git a/sycl/source/detail/device_image_impl.hpp b/sycl/source/detail/device_image_impl.hpp index f11782237db9b..9bd9088067dd6 100644 --- a/sycl/source/detail/device_image_impl.hpp +++ b/sycl/source/detail/device_image_impl.hpp @@ -761,7 +761,8 @@ class device_image_impl std::string XsFlags = extractXsFlags(BuildOptions, MRTCBinInfo->MLanguage); auto Res = Adapter.call_nocheck( - UrProgram, DeviceVec.size(), DeviceVec.data(), XsFlags.c_str()); + UrProgram, DeviceVec.size(), DeviceVec.data(), ur_exp_program_flags_t{}, + XsFlags.c_str()); if (Res == UR_RESULT_ERROR_UNSUPPORTED_FEATURE) { Res = Adapter.call_nocheck( ContextImpl.getHandleRef(), UrProgram, XsFlags.c_str()); diff --git a/sycl/source/detail/program_manager/program_manager.cpp b/sycl/source/detail/program_manager/program_manager.cpp index 07ed72c0df423..2ca4420dc0549 100644 --- a/sycl/source/detail/program_manager/program_manager.cpp +++ b/sycl/source/detail/program_manager/program_manager.cpp @@ -1302,7 +1302,7 @@ static ur_result_t doCompile(adapter_impl &Adapter, ur_program_handle_t Program, // Try to compile with given devices, fall back to compiling with the program // context if unsupported by the adapter auto Result = Adapter.call_nocheck( - Program, NumDevs, Devs, Opts); + Program, NumDevs, Devs, ur_exp_program_flags_t{}, Opts); if (Result == UR_RESULT_ERROR_UNSUPPORTED_FEATURE) { return Adapter.call_nocheck(Ctx, Program, Opts); @@ -1723,7 +1723,8 @@ Managed ProgramManager::build( ? CompileOptions : (CompileOptions + " " + LinkOptions); ur_result_t Error = Adapter.call_nocheck( - Program, Devices.size(), Devices.data(), Options.c_str()); + Program, Devices.size(), Devices.data(), ur_exp_program_flags_t{}, + Options.c_str()); if (Error == UR_RESULT_ERROR_UNSUPPORTED_FEATURE) { Error = Adapter.call_nocheck( Context.getHandleRef(), Program, Options.c_str()); @@ -1759,8 +1760,8 @@ Managed ProgramManager::build( auto doLink = [&] { auto Res = Adapter.call_nocheck( Context.getHandleRef(), Devices.size(), Devices.data(), - LinkPrograms.size(), LinkPrograms.data(), LinkOptions.c_str(), - &LinkedProg); + ur_exp_program_flags_t{}, LinkPrograms.size(), LinkPrograms.data(), + LinkOptions.c_str(), &LinkedProg); if (Res == UR_RESULT_ERROR_UNSUPPORTED_FEATURE) { Res = Adapter.call_nocheck( Context.getHandleRef(), LinkPrograms.size(), LinkPrograms.data(), @@ -3001,8 +3002,8 @@ ProgramManager::link(const std::vector &Imgs, auto doLink = [&] { auto Res = Adapter.call_nocheck( ContextImpl.getHandleRef(), URDevices.size(), URDevices.data(), - URPrograms.size(), URPrograms.data(), LinkOptionsStr.c_str(), - &LinkedProg); + ur_exp_program_flags_t{}, URPrograms.size(), URPrograms.data(), + LinkOptionsStr.c_str(), &LinkedProg); if (Res == UR_RESULT_ERROR_UNSUPPORTED_FEATURE) { Res = Adapter.call_nocheck( ContextImpl.getHandleRef(), URPrograms.size(), URPrograms.data(), diff --git a/unified-runtime/include/ur_api.h b/unified-runtime/include/ur_api.h index 4ad894880db07..38b6a73144e73 100644 --- a/unified-runtime/include/ur_api.h +++ b/unified-runtime/include/ur_api.h @@ -485,6 +485,8 @@ typedef enum ur_function_t { UR_FUNCTION_IPC_CLOSE_MEM_HANDLE_EXP = 292, /// Enumerator for ::urDeviceWaitExp UR_FUNCTION_DEVICE_WAIT_EXP = 293, + /// Enumerator for ::urProgramDynamicLinkExp + UR_FUNCTION_PROGRAM_DYNAMIC_LINK_EXP = 294, /// @cond UR_FUNCTION_FORCE_UINT32 = 0x7fffffff /// @endcond @@ -2462,6 +2464,9 @@ typedef enum ur_device_info_t { /// [::ur_bool_t] Returns true if the device supports the device-wide /// synchronization experimental feature. UR_DEVICE_INFO_DEVICE_WAIT_SUPPORT_EXP = 0x6002, + /// [::ur_bool_t] Returns true if the device supports the dynamic linking + /// experimental feature. + UR_DEVICE_INFO_DYNAMIC_LINK_SUPPORT_EXP = 0x6003, /// [::ur_bool_t] returns true if the device supports /// ::urUSMContextMemcpyExp UR_DEVICE_INFO_USM_CONTEXT_MEMCPY_SUPPORT_EXP = 0x7000, @@ -12384,6 +12389,49 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceWaitExp( /// [in] handle of the device instance. ur_device_handle_t hDevice); +#if !defined(__GNUC__) +#pragma endregion +#endif +// Intel 'oneAPI' Unified Runtime Experimental APIs for dynamic linking +#if !defined(__GNUC__) +#pragma region dynamic_link_(experimental) +#endif +/////////////////////////////////////////////////////////////////////////////// +/// @brief Creates dynamic links between exported and imported symbols in one or +/// more programs. +/// +/// @details +/// - The application may call this function from simultaneous threads. +/// - Following a successful call to this entry point the programs in +/// `phPrograms` will have all external symbols resolved and kernels +/// inside these programs would be ready for use. +/// +/// @returns +/// - ::UR_RESULT_SUCCESS +/// - ::UR_RESULT_ERROR_UNINITIALIZED +/// - ::UR_RESULT_ERROR_DEVICE_LOST +/// - ::UR_RESULT_ERROR_ADAPTER_SPECIFIC +/// - ::UR_RESULT_ERROR_INVALID_NULL_HANDLE +/// + `NULL == hContext` +/// - ::UR_RESULT_ERROR_INVALID_NULL_POINTER +/// + `NULL == phPrograms` +/// - ::UR_RESULT_ERROR_INVALID_PROGRAM +/// + If one of the programs in `phPrograms` isn't a valid program +/// object. +/// - ::UR_RESULT_ERROR_INVALID_SIZE +/// + `count == 0` +/// - ::UR_RESULT_ERROR_PROGRAM_LINK_FAILURE +/// + If an error occurred while linking `phPrograms`. +/// - ::UR_RESULT_ERROR_UNSUPPORTED_FEATURE +/// + If the adapter has no means to support the operation. +UR_APIEXPORT ur_result_t UR_APICALL urProgramDynamicLinkExp( + /// [in] handle of the context instance. + ur_context_handle_t hContext, + /// [in] number of program handles in `phPrograms`. + uint32_t count, + /// [in][range(0, count)] pointer to array of program handles. + const ur_program_handle_t *phPrograms); + #if !defined(__GNUC__) #pragma endregion #endif @@ -12672,6 +12720,21 @@ UR_APIEXPORT ur_result_t UR_APICALL urMemoryExportExportMemoryHandleExp( #if !defined(__GNUC__) #pragma region multi_device_compile_(experimental) #endif +/////////////////////////////////////////////////////////////////////////////// +/// @brief Program operation behavior control flags +typedef uint32_t ur_exp_program_flags_t; +typedef enum ur_exp_program_flag_t { + /// Allow unresolved symbols in the program resulting from the + /// corresponding operation + UR_EXP_PROGRAM_FLAG_ALLOW_UNRESOLVED_SYMBOLS = UR_BIT(0), + /// @cond + UR_EXP_PROGRAM_FLAG_FORCE_UINT32 = 0x7fffffff + /// @endcond + +} ur_exp_program_flag_t; +/// @brief Bit Mask for validating ur_exp_program_flags_t +#define UR_EXP_PROGRAM_FLAGS_MASK 0xfffffffe + /////////////////////////////////////////////////////////////////////////////// /// @brief Produces an executable program from one program, negates need for the /// linking step. @@ -12695,6 +12758,8 @@ UR_APIEXPORT ur_result_t UR_APICALL urMemoryExportExportMemoryHandleExp( /// + `NULL == hProgram` /// - ::UR_RESULT_ERROR_INVALID_NULL_POINTER /// + `NULL == phDevices` +/// - ::UR_RESULT_ERROR_INVALID_ENUMERATION +/// + `::UR_EXP_PROGRAM_FLAGS_MASK & flags` /// - ::UR_RESULT_ERROR_INVALID_PROGRAM /// + If `hProgram` isn't a valid program object. /// - ::UR_RESULT_ERROR_PROGRAM_BUILD_FAILURE @@ -12706,6 +12771,8 @@ UR_APIEXPORT ur_result_t UR_APICALL urProgramBuildExp( uint32_t numDevices, /// [in][range(0, numDevices)] pointer to array of device handles ur_device_handle_t *phDevices, + /// [in] program information flags + ur_exp_program_flags_t flags, /// [in][optional] pointer to build options null-terminated string. const char *pOptions); @@ -12731,6 +12798,8 @@ UR_APIEXPORT ur_result_t UR_APICALL urProgramBuildExp( /// + `NULL == hProgram` /// - ::UR_RESULT_ERROR_INVALID_NULL_POINTER /// + `NULL == phDevices` +/// - ::UR_RESULT_ERROR_INVALID_ENUMERATION +/// + `::UR_EXP_PROGRAM_FLAGS_MASK & flags` /// - ::UR_RESULT_ERROR_INVALID_PROGRAM /// + If `hProgram` isn't a valid program object. /// - ::UR_RESULT_ERROR_PROGRAM_BUILD_FAILURE @@ -12742,6 +12811,8 @@ UR_APIEXPORT ur_result_t UR_APICALL urProgramCompileExp( uint32_t numDevices, /// [in][range(0, numDevices)] pointer to array of device handles ur_device_handle_t *phDevices, + /// [in] program information flags + ur_exp_program_flags_t flags, /// [in][optional] pointer to build options null-terminated string. const char *pOptions); @@ -12775,6 +12846,8 @@ UR_APIEXPORT ur_result_t UR_APICALL urProgramCompileExp( /// + `NULL == phDevices` /// + `NULL == phPrograms` /// + `NULL == phProgram` +/// - ::UR_RESULT_ERROR_INVALID_ENUMERATION +/// + `::UR_EXP_PROGRAM_FLAGS_MASK & flags` /// - ::UR_RESULT_ERROR_INVALID_PROGRAM /// + If one of the programs in `phPrograms` isn't a valid program /// object. @@ -12789,6 +12862,8 @@ UR_APIEXPORT ur_result_t UR_APICALL urProgramLinkExp( uint32_t numDevices, /// [in][range(0, numDevices)] pointer to array of device handles ur_device_handle_t *phDevices, + /// [in] program information flags + ur_exp_program_flags_t flags, /// [in] number of program handles in `phPrograms`. uint32_t count, /// [in][range(0, count)] pointer to array of program handles. @@ -13616,6 +13691,16 @@ typedef struct ur_program_build_params_t { const char **ppOptions; } ur_program_build_params_t; +/////////////////////////////////////////////////////////////////////////////// +/// @brief Function parameters for urProgramDynamicLinkExp +/// @details Each entry is a pointer to the parameter passed to the function; +/// allowing the callback the ability to modify the parameter's value +typedef struct ur_program_dynamic_link_exp_params_t { + ur_context_handle_t *phContext; + uint32_t *pcount; + const ur_program_handle_t **pphPrograms; +} ur_program_dynamic_link_exp_params_t; + /////////////////////////////////////////////////////////////////////////////// /// @brief Function parameters for urProgramBuildExp /// @details Each entry is a pointer to the parameter passed to the function; @@ -13624,6 +13709,7 @@ typedef struct ur_program_build_exp_params_t { ur_program_handle_t *phProgram; uint32_t *pnumDevices; ur_device_handle_t **pphDevices; + ur_exp_program_flags_t *pflags; const char **ppOptions; } ur_program_build_exp_params_t; @@ -13645,6 +13731,7 @@ typedef struct ur_program_compile_exp_params_t { ur_program_handle_t *phProgram; uint32_t *pnumDevices; ur_device_handle_t **pphDevices; + ur_exp_program_flags_t *pflags; const char **ppOptions; } ur_program_compile_exp_params_t; @@ -13668,6 +13755,7 @@ typedef struct ur_program_link_exp_params_t { ur_context_handle_t *phContext; uint32_t *pnumDevices; ur_device_handle_t **pphDevices; + ur_exp_program_flags_t *pflags; uint32_t *pcount; const ur_program_handle_t **pphPrograms; const char **ppOptions; diff --git a/unified-runtime/include/ur_api_funcs.def b/unified-runtime/include/ur_api_funcs.def index d4c071530b2cf..770743c524d74 100644 --- a/unified-runtime/include/ur_api_funcs.def +++ b/unified-runtime/include/ur_api_funcs.def @@ -59,6 +59,7 @@ _UR_API(urProgramGetBuildInfo) _UR_API(urProgramSetSpecializationConstants) _UR_API(urProgramGetNativeHandle) _UR_API(urProgramCreateWithNativeHandle) +_UR_API(urProgramDynamicLinkExp) _UR_API(urProgramBuildExp) _UR_API(urProgramCompileExp) _UR_API(urProgramLinkExp) diff --git a/unified-runtime/include/ur_ddi.h b/unified-runtime/include/ur_ddi.h index 8b5664149a429..0cb139a2e93da 100644 --- a/unified-runtime/include/ur_ddi.h +++ b/unified-runtime/include/ur_ddi.h @@ -418,29 +418,35 @@ UR_DLLEXPORT ur_result_t UR_APICALL urGetProgramProcAddrTable( typedef ur_result_t(UR_APICALL *ur_pfnGetProgramProcAddrTable_t)( ur_api_version_t, ur_program_dditable_t *); +/////////////////////////////////////////////////////////////////////////////// +/// @brief Function-pointer for urProgramDynamicLinkExp +typedef ur_result_t(UR_APICALL *ur_pfnProgramDynamicLinkExp_t)( + ur_context_handle_t, uint32_t, const ur_program_handle_t *); + /////////////////////////////////////////////////////////////////////////////// /// @brief Function-pointer for urProgramBuildExp typedef ur_result_t(UR_APICALL *ur_pfnProgramBuildExp_t)(ur_program_handle_t, uint32_t, ur_device_handle_t *, + ur_exp_program_flags_t, const char *); /////////////////////////////////////////////////////////////////////////////// /// @brief Function-pointer for urProgramCompileExp -typedef ur_result_t(UR_APICALL *ur_pfnProgramCompileExp_t)(ur_program_handle_t, - uint32_t, - ur_device_handle_t *, - const char *); +typedef ur_result_t(UR_APICALL *ur_pfnProgramCompileExp_t)( + ur_program_handle_t, uint32_t, ur_device_handle_t *, ur_exp_program_flags_t, + const char *); /////////////////////////////////////////////////////////////////////////////// /// @brief Function-pointer for urProgramLinkExp typedef ur_result_t(UR_APICALL *ur_pfnProgramLinkExp_t)( - ur_context_handle_t, uint32_t, ur_device_handle_t *, uint32_t, - const ur_program_handle_t *, const char *, ur_program_handle_t *); + ur_context_handle_t, uint32_t, ur_device_handle_t *, ur_exp_program_flags_t, + uint32_t, const ur_program_handle_t *, const char *, ur_program_handle_t *); /////////////////////////////////////////////////////////////////////////////// /// @brief Table of ProgramExp functions pointers typedef struct ur_program_exp_dditable_t { + ur_pfnProgramDynamicLinkExp_t pfnDynamicLinkExp; ur_pfnProgramBuildExp_t pfnBuildExp; ur_pfnProgramCompileExp_t pfnCompileExp; ur_pfnProgramLinkExp_t pfnLinkExp; diff --git a/unified-runtime/include/ur_print.h b/unified-runtime/include/ur_print.h index f14d6f02329c2..645b6b40d078b 100644 --- a/unified-runtime/include/ur_print.h +++ b/unified-runtime/include/ur_print.h @@ -1415,6 +1415,16 @@ urPrintExpCommandBufferUpdateKernelLaunchDesc( const struct ur_exp_command_buffer_update_kernel_launch_desc_t params, char *buffer, const size_t buff_size, size_t *out_size); +/////////////////////////////////////////////////////////////////////////////// +/// @brief Print ur_exp_program_flag_t enum +/// @returns +/// - ::UR_RESULT_SUCCESS +/// - ::UR_RESULT_ERROR_INVALID_SIZE +/// - `buff_size < out_size` +UR_APIEXPORT ur_result_t UR_APICALL +urPrintExpProgramFlags(enum ur_exp_program_flag_t value, char *buffer, + const size_t buff_size, size_t *out_size); + /////////////////////////////////////////////////////////////////////////////// /// @brief Print ur_exp_peer_info_t enum /// @returns @@ -1846,6 +1856,16 @@ UR_APIEXPORT ur_result_t UR_APICALL urPrintProgramBuildParams( const struct ur_program_build_params_t *params, char *buffer, const size_t buff_size, size_t *out_size); +/////////////////////////////////////////////////////////////////////////////// +/// @brief Print ur_program_dynamic_link_exp_params_t struct +/// @returns +/// - ::UR_RESULT_SUCCESS +/// - ::UR_RESULT_ERROR_INVALID_SIZE +/// - `buff_size < out_size` +UR_APIEXPORT ur_result_t UR_APICALL urPrintProgramDynamicLinkExpParams( + const struct ur_program_dynamic_link_exp_params_t *params, char *buffer, + const size_t buff_size, size_t *out_size); + /////////////////////////////////////////////////////////////////////////////// /// @brief Print ur_program_build_exp_params_t struct /// @returns diff --git a/unified-runtime/include/ur_print.hpp b/unified-runtime/include/ur_print.hpp index 0dd7abde00ff6..23bb5bf89c654 100644 --- a/unified-runtime/include/ur_print.hpp +++ b/unified-runtime/include/ur_print.hpp @@ -259,6 +259,10 @@ inline ur_result_t printTagged(std::ostream &os, const void *ptr, ur_exp_command_buffer_command_info_t value, size_t size); +template <> +inline ur_result_t printFlag(std::ostream &os, + uint32_t flag); + template <> inline ur_result_t printTagged(std::ostream &os, const void *ptr, ur_exp_peer_info_t value, size_t size); @@ -592,6 +596,8 @@ inline std::ostream &operator<<( inline std::ostream & operator<<(std::ostream &os, [[maybe_unused]] const struct ur_exp_command_buffer_update_kernel_launch_desc_t params); +inline std::ostream &operator<<(std::ostream &os, + enum ur_exp_program_flag_t value); inline std::ostream &operator<<(std::ostream &os, enum ur_exp_peer_info_t value); inline std::ostream &operator<<(std::ostream &os, @@ -1291,6 +1297,9 @@ inline std::ostream &operator<<(std::ostream &os, enum ur_function_t value) { case UR_FUNCTION_DEVICE_WAIT_EXP: os << "UR_FUNCTION_DEVICE_WAIT_EXP"; break; + case UR_FUNCTION_PROGRAM_DYNAMIC_LINK_EXP: + os << "UR_FUNCTION_PROGRAM_DYNAMIC_LINK_EXP"; + break; default: os << "unknown enumerator"; break; @@ -3160,6 +3169,9 @@ inline std::ostream &operator<<(std::ostream &os, enum ur_device_info_t value) { case UR_DEVICE_INFO_DEVICE_WAIT_SUPPORT_EXP: os << "UR_DEVICE_INFO_DEVICE_WAIT_SUPPORT_EXP"; break; + case UR_DEVICE_INFO_DYNAMIC_LINK_SUPPORT_EXP: + os << "UR_DEVICE_INFO_DYNAMIC_LINK_SUPPORT_EXP"; + break; case UR_DEVICE_INFO_USM_CONTEXT_MEMCPY_SUPPORT_EXP: os << "UR_DEVICE_INFO_USM_CONTEXT_MEMCPY_SUPPORT_EXP"; break; @@ -5396,6 +5408,19 @@ inline ur_result_t printTagged(std::ostream &os, const void *ptr, os << ")"; } break; + case UR_DEVICE_INFO_DYNAMIC_LINK_SUPPORT_EXP: { + const ur_bool_t *tptr = (const ur_bool_t *)ptr; + if (sizeof(ur_bool_t) > size) { + os << "invalid size (is: " << size + << ", expected: >=" << sizeof(ur_bool_t) << ")"; + return UR_RESULT_ERROR_INVALID_SIZE; + } + os << (const void *)(tptr) << " ("; + + os << *tptr; + + os << ")"; + } break; case UR_DEVICE_INFO_USM_CONTEXT_MEMCPY_SUPPORT_EXP: { const ur_bool_t *tptr = (const ur_bool_t *)ptr; if (sizeof(ur_bool_t) > size) { @@ -12362,6 +12387,54 @@ inline std::ostream &operator<<( return os; } /////////////////////////////////////////////////////////////////////////////// +/// @brief Print operator for the ur_exp_program_flag_t type +/// @returns +/// std::ostream & +inline std::ostream &operator<<(std::ostream &os, + enum ur_exp_program_flag_t value) { + switch (value) { + case UR_EXP_PROGRAM_FLAG_ALLOW_UNRESOLVED_SYMBOLS: + os << "UR_EXP_PROGRAM_FLAG_ALLOW_UNRESOLVED_SYMBOLS"; + break; + default: + os << "unknown enumerator"; + break; + } + return os; +} + +namespace ur::details { +/////////////////////////////////////////////////////////////////////////////// +/// @brief Print ur_exp_program_flag_t flag +template <> +inline ur_result_t printFlag(std::ostream &os, + uint32_t flag) { + uint32_t val = flag; + bool first = true; + + if ((val & UR_EXP_PROGRAM_FLAG_ALLOW_UNRESOLVED_SYMBOLS) == + (uint32_t)UR_EXP_PROGRAM_FLAG_ALLOW_UNRESOLVED_SYMBOLS) { + val ^= (uint32_t)UR_EXP_PROGRAM_FLAG_ALLOW_UNRESOLVED_SYMBOLS; + if (!first) { + os << " | "; + } else { + first = false; + } + os << UR_EXP_PROGRAM_FLAG_ALLOW_UNRESOLVED_SYMBOLS; + } + if (val != 0) { + std::bitset<32> bits(val); + if (!first) { + os << " | "; + } + os << "unknown bit flags " << bits; + } else if (first) { + os << "0"; + } + return UR_RESULT_SUCCESS; +} +} // namespace ur::details +/////////////////////////////////////////////////////////////////////////////// /// @brief Print operator for the ur_exp_peer_info_t type /// @returns /// std::ostream & @@ -13642,6 +13715,43 @@ operator<<(std::ostream &os, return os; } +/////////////////////////////////////////////////////////////////////////////// +/// @brief Print operator for the ur_program_dynamic_link_exp_params_t type +/// @returns +/// std::ostream & +inline std::ostream & +operator<<(std::ostream &os, + [[maybe_unused]] const struct ur_program_dynamic_link_exp_params_t + *params) { + + os << ".hContext = "; + + ur::details::printPtr(os, *(params->phContext)); + + os << ", "; + os << ".count = "; + + os << *(params->pcount); + + os << ", "; + os << ".phPrograms = "; + ur::details::printPtr(os, + reinterpret_cast(*(params->pphPrograms))); + if (*(params->pphPrograms) != NULL) { + os << " {"; + for (size_t i = 0; i < *params->pcount; ++i) { + if (i != 0) { + os << ", "; + } + + ur::details::printPtr(os, (*(params->pphPrograms))[i]); + } + os << "}"; + } + + return os; +} + /////////////////////////////////////////////////////////////////////////////// /// @brief Print operator for the ur_program_build_exp_params_t type /// @returns @@ -13675,6 +13785,11 @@ inline std::ostream &operator<<( os << "}"; } + os << ", "; + os << ".flags = "; + + ur::details::printFlag(os, *(params->pflags)); + os << ", "; os << ".pOptions = "; @@ -13741,6 +13856,11 @@ inline std::ostream &operator<<( os << "}"; } + os << ", "; + os << ".flags = "; + + ur::details::printFlag(os, *(params->pflags)); + os << ", "; os << ".pOptions = "; @@ -13828,6 +13948,11 @@ operator<<(std::ostream &os, os << "}"; } + os << ", "; + os << ".flags = "; + + ur::details::printFlag(os, *(params->pflags)); + os << ", "; os << ".count = "; @@ -21461,6 +21586,9 @@ inline ur_result_t UR_APICALL printFunctionParams(std::ostream &os, case UR_FUNCTION_PROGRAM_BUILD: { os << (const struct ur_program_build_params_t *)params; } break; + case UR_FUNCTION_PROGRAM_DYNAMIC_LINK_EXP: { + os << (const struct ur_program_dynamic_link_exp_params_t *)params; + } break; case UR_FUNCTION_PROGRAM_BUILD_EXP: { os << (const struct ur_program_build_exp_params_t *)params; } break; diff --git a/unified-runtime/scripts/core/EXP-DYNAMIC-LINK.rst b/unified-runtime/scripts/core/EXP-DYNAMIC-LINK.rst new file mode 100644 index 0000000000000..71f11ae33dc2b --- /dev/null +++ b/unified-runtime/scripts/core/EXP-DYNAMIC-LINK.rst @@ -0,0 +1,65 @@ +<% + OneApi=tags['$OneApi'] + x=tags['$x'] + X=x.upper() +%> + +.. _experimental-dynamic-link: + +================================================================================ +Multi Device Compile +================================================================================ + +.. warning:: + + Experimental features: + + * May be replaced, updated, or removed at any time. + * Do not require maintaining API/ABI stability of their own additions over + time. + * Do not require conformance testing of their own additions. + + + +Motivation +-------------------------------------------------------------------------------- + +Some adapters support the ability to do dynamic linking between programs, +resolving external symbols through that. This may allow AOT compiled binaries to +be linked, despite already having been built. + +API +-------------------------------------------------------------------------------- + +Enums +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +* ${x}_device_info_t + * ${X}_DEVICE_INFO_DYNAMIC_LINK_SUPPORT_EXP + +Functions +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +* ${x}ProgramDynamicLinkExp + +Changelog +-------------------------------------------------------------------------------- + ++-----------+---------------------------------------------+ +| Revision | Changes | ++===========+=============================================+ +| 1.0 | Initial Draft | ++-----------+---------------------------------------------+ + +Support +-------------------------------------------------------------------------------- + +Adapters which support this experimental feature *must* return ``true`` when +queried for ${X}_DEVICE_INFO_DYNAMIC_LINK_SUPPORT_EXP via +${x}DeviceGetInfo. Conversely, before using any of the functionality defined +in this experimental feature the user *must* use the device query to determine +if the adapter supports this feature. + +Contributors +-------------------------------------------------------------------------------- + +* Steffen Larsen `steffen.larsen@intel.com `_ diff --git a/unified-runtime/scripts/core/EXP-MULTI-DEVICE-COMPILE.rst b/unified-runtime/scripts/core/EXP-MULTI-DEVICE-COMPILE.rst index 19dc26268b318..9f1bb7e912fbb 100644 --- a/unified-runtime/scripts/core/EXP-MULTI-DEVICE-COMPILE.rst +++ b/unified-runtime/scripts/core/EXP-MULTI-DEVICE-COMPILE.rst @@ -36,6 +36,8 @@ Enums ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ * ${x}_device_info_t * ${X}_DEVICE_INFO_MULTI_DEVICE_COMPILE_SUPPORT_EXP +* ${x}_exp_program_flags_t + * ${X}_EXP_PROGRAM_FLAG_ALLOW_UNRESOLVED_SYMBOLS Functions ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ diff --git a/unified-runtime/scripts/core/exp-dynamic-link.yml b/unified-runtime/scripts/core/exp-dynamic-link.yml new file mode 100644 index 0000000000000..fbd694fb6a6b3 --- /dev/null +++ b/unified-runtime/scripts/core/exp-dynamic-link.yml @@ -0,0 +1,52 @@ +# +# Copyright (C) 2023 Intel Corporation +# +# Part of the Unified-Runtime Project, under the Apache License v2.0 with LLVM Exceptions. +# See LICENSE.TXT +# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +# +# See YaML.md for syntax definition +# +--- #-------------------------------------------------------------------------- +type: header +desc: "Intel $OneApi Unified Runtime Experimental APIs for dynamic linking" +ordinal: "99" +--- #-------------------------------------------------------------------------- +type: enum +extend: true +typed_etors: true +desc: "Extension enums for $x_device_info_t to support dynamic linking." +name: $x_device_info_t +etors: + - name: DYNAMIC_LINK_SUPPORT_EXP + value: "0x6003" + desc: "[$x_bool_t] Returns true if the device supports the dynamic linking experimental feature." +--- #-------------------------------------------------------------------------- +type: function +desc: "Creates dynamic links between exported and imported symbols in one or more programs." +class: $xProgram +name: DynamicLinkExp +decl: static +ordinal: "2" +details: + - "The application may call this function from simultaneous threads." + - "Following a successful call to this entry point the programs in `phPrograms` will have all external symbols resolved and kernels inside these programs would be ready for use." +params: + - type: $x_context_handle_t + name: hContext + desc: "[in] handle of the context instance." + - type: uint32_t + name: count + desc: "[in] number of program handles in `phPrograms`." + - type: const $x_program_handle_t* + name: phPrograms + desc: "[in][range(0, count)] pointer to array of program handles." +returns: + - $X_RESULT_ERROR_INVALID_PROGRAM: + - "If one of the programs in `phPrograms` isn't a valid program object." + - $X_RESULT_ERROR_INVALID_SIZE: + - "`count == 0`" + - $X_RESULT_ERROR_PROGRAM_LINK_FAILURE: + - "If an error occurred while linking `phPrograms`." + - $X_RESULT_ERROR_UNSUPPORTED_FEATURE: + - "If the adapter has no means to support the operation." diff --git a/unified-runtime/scripts/core/exp-multi-device-compile.yml b/unified-runtime/scripts/core/exp-multi-device-compile.yml index 06309cb019dfc..39bdaa407cfa8 100644 --- a/unified-runtime/scripts/core/exp-multi-device-compile.yml +++ b/unified-runtime/scripts/core/exp-multi-device-compile.yml @@ -22,6 +22,15 @@ etors: value: "0x6000" desc: "[$x_bool_t] Returns true if the device supports the multi device compile experimental feature." --- #-------------------------------------------------------------------------- +type: enum +desc: "Program operation behavior control flags" +class: $xProgram +name: $x_exp_program_flags_t +etors: + - name: ALLOW_UNRESOLVED_SYMBOLS + desc: "Allow unresolved symbols in the program resulting from the corresponding operation" + value: "$X_BIT(0)" +--- #-------------------------------------------------------------------------- type: function desc: "Produces an executable program from one program, negates need for the linking step." class: $xProgram @@ -43,6 +52,9 @@ params: - type: $x_device_handle_t* name: phDevices desc: "[in][range(0, numDevices)] pointer to array of device handles" + - type: $x_exp_program_flags_t + name: flags + desc: "[in] program information flags" - type: const char* name: pOptions desc: "[in][optional] pointer to build options null-terminated string." @@ -74,6 +86,9 @@ params: - type: $x_device_handle_t* name: phDevices desc: "[in][range(0, numDevices)] pointer to array of device handles" + - type: $x_exp_program_flags_t + name: flags + desc: "[in] program information flags" - type: const char* name: pOptions desc: "[in][optional] pointer to build options null-terminated string." @@ -106,6 +121,9 @@ params: - type: $x_device_handle_t* name: phDevices desc: "[in][range(0, numDevices)] pointer to array of device handles" + - type: $x_exp_program_flags_t + name: flags + desc: "[in] program information flags" - type: uint32_t name: count desc: "[in] number of program handles in `phPrograms`." diff --git a/unified-runtime/scripts/core/registry.yml b/unified-runtime/scripts/core/registry.yml index 9134cbf5e6090..7bee8e9efb798 100644 --- a/unified-runtime/scripts/core/registry.yml +++ b/unified-runtime/scripts/core/registry.yml @@ -685,7 +685,10 @@ etors: - name: DEVICE_WAIT_EXP desc: Enumerator for $xDeviceWaitExp value: '293' -max_id: '293' +- name: PROGRAM_DYNAMIC_LINK_EXP + desc: Enumerator for $xProgramDynamicLinkExp + value: '294' +max_id: '294' --- type: enum desc: Defines structure types diff --git a/unified-runtime/source/adapters/cuda/device.cpp b/unified-runtime/source/adapters/cuda/device.cpp index fed6666621e20..b357a595b6b3e 100644 --- a/unified-runtime/source/adapters/cuda/device.cpp +++ b/unified-runtime/source/adapters/cuda/device.cpp @@ -1180,6 +1180,8 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceGetInfo(ur_device_handle_t hDevice, return ReturnValue(false); case UR_DEVICE_INFO_DEVICE_WAIT_SUPPORT_EXP: return ReturnValue(true); + case UR_DEVICE_INFO_DYNAMIC_LINK_SUPPORT_EXP: + return ReturnValue(false); case UR_DEVICE_INFO_ASYNC_USM_ALLOCATIONS_SUPPORT_EXP: return ReturnValue(true); case UR_DEVICE_INFO_KERNEL_LAUNCH_CAPABILITIES: { diff --git a/unified-runtime/source/adapters/cuda/program.cpp b/unified-runtime/source/adapters/cuda/program.cpp index 15afa125a80a2..9cea38ab02e0c 100644 --- a/unified-runtime/source/adapters/cuda/program.cpp +++ b/unified-runtime/source/adapters/cuda/program.cpp @@ -201,6 +201,7 @@ urProgramCompile(ur_context_handle_t hContext, ur_program_handle_t hProgram, UR_APIEXPORT ur_result_t UR_APICALL urProgramCompileExp(ur_program_handle_t, uint32_t, ur_device_handle_t *, + ur_exp_program_flags_t, const char *) { return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; } @@ -208,6 +209,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urProgramCompileExp(ur_program_handle_t, UR_APIEXPORT ur_result_t UR_APICALL urProgramBuildExp(ur_program_handle_t, uint32_t, ur_device_handle_t *, + ur_exp_program_flags_t, const char *) { return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; } @@ -229,15 +231,21 @@ urProgramBuild(ur_context_handle_t /*hContext*/, ur_program_handle_t hProgram, return UR_RESULT_SUCCESS; } -UR_APIEXPORT ur_result_t UR_APICALL urProgramLinkExp( - ur_context_handle_t, uint32_t, ur_device_handle_t *, uint32_t, - const ur_program_handle_t *, const char *, ur_program_handle_t *phProgram) { +UR_APIEXPORT ur_result_t UR_APICALL +urProgramLinkExp(ur_context_handle_t, uint32_t, ur_device_handle_t *, + ur_exp_program_flags_t, uint32_t, const ur_program_handle_t *, + const char *, ur_program_handle_t *phProgram) { if (nullptr != phProgram) { *phProgram = nullptr; } return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; } +UR_APIEXPORT ur_result_t UR_APICALL urProgramDynamicLinkExp( + ur_context_handle_t, uint32_t, const ur_program_handle_t *) { + return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; +} + /// Creates a new UR program object that is the outcome of linking all input /// programs. /// \TODO Implement linker options, requires mapping of OpenCL to CUDA diff --git a/unified-runtime/source/adapters/cuda/ur_interface_loader.cpp b/unified-runtime/source/adapters/cuda/ur_interface_loader.cpp index 0974f93315c4b..63e7d87134935 100644 --- a/unified-runtime/source/adapters/cuda/ur_interface_loader.cpp +++ b/unified-runtime/source/adapters/cuda/ur_interface_loader.cpp @@ -493,6 +493,7 @@ UR_DLLEXPORT ur_result_t UR_APICALL urGetProgramExpProcAddrTable( pDdiTable->pfnBuildExp = urProgramBuildExp; pDdiTable->pfnCompileExp = urProgramCompileExp; pDdiTable->pfnLinkExp = urProgramLinkExp; + pDdiTable->pfnDynamicLinkExp = urProgramDynamicLinkExp; return UR_RESULT_SUCCESS; } diff --git a/unified-runtime/source/adapters/hip/device.cpp b/unified-runtime/source/adapters/hip/device.cpp index cd1672770aa2b..c1f38094d04b6 100644 --- a/unified-runtime/source/adapters/hip/device.cpp +++ b/unified-runtime/source/adapters/hip/device.cpp @@ -1045,6 +1045,8 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceGetInfo(ur_device_handle_t hDevice, return ReturnValue(false); case UR_DEVICE_INFO_DEVICE_WAIT_SUPPORT_EXP: return ReturnValue(false); + case UR_DEVICE_INFO_DYNAMIC_LINK_SUPPORT_EXP: + return ReturnValue(false); case UR_DEVICE_INFO_KERNEL_LAUNCH_CAPABILITIES: return ReturnValue(0); case UR_DEVICE_INFO_MEMORY_EXPORT_EXPORTABLE_DEVICE_MEM_EXP: diff --git a/unified-runtime/source/adapters/hip/program.cpp b/unified-runtime/source/adapters/hip/program.cpp index 94451730c66ca..cd45448fa7ceb 100644 --- a/unified-runtime/source/adapters/hip/program.cpp +++ b/unified-runtime/source/adapters/hip/program.cpp @@ -284,6 +284,7 @@ urProgramCompile(ur_context_handle_t hContext, ur_program_handle_t hProgram, UR_APIEXPORT ur_result_t UR_APICALL urProgramCompileExp(ur_program_handle_t, uint32_t, ur_device_handle_t *, + ur_exp_program_flags_t, const char *) { return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; } @@ -291,6 +292,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urProgramCompileExp(ur_program_handle_t, UR_APIEXPORT ur_result_t UR_APICALL urProgramBuildExp(ur_program_handle_t, uint32_t, ur_device_handle_t *, + ur_exp_program_flags_t, const char *) { return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; } @@ -313,15 +315,21 @@ UR_APIEXPORT ur_result_t UR_APICALL urProgramBuild(ur_context_handle_t, return UR_RESULT_SUCCESS; } -UR_APIEXPORT ur_result_t UR_APICALL urProgramLinkExp( - ur_context_handle_t, uint32_t, ur_device_handle_t *, uint32_t, - const ur_program_handle_t *, const char *, ur_program_handle_t *phProgram) { +UR_APIEXPORT ur_result_t UR_APICALL +urProgramLinkExp(ur_context_handle_t, uint32_t, ur_device_handle_t *, + ur_exp_program_flags_t, uint32_t, const ur_program_handle_t *, + const char *, ur_program_handle_t *phProgram) { if (nullptr != phProgram) { *phProgram = nullptr; } return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; } +UR_APIEXPORT ur_result_t UR_APICALL urProgramDynamicLinkExp( + ur_context_handle_t, uint32_t, const ur_program_handle_t *) { + return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; +} + UR_APIEXPORT ur_result_t UR_APICALL urProgramLink(ur_context_handle_t, uint32_t, const ur_program_handle_t *, const char *, ur_program_handle_t *phProgram) { diff --git a/unified-runtime/source/adapters/hip/ur_interface_loader.cpp b/unified-runtime/source/adapters/hip/ur_interface_loader.cpp index c56914e46101d..e09ce61d1f099 100644 --- a/unified-runtime/source/adapters/hip/ur_interface_loader.cpp +++ b/unified-runtime/source/adapters/hip/ur_interface_loader.cpp @@ -486,6 +486,7 @@ UR_DLLEXPORT ur_result_t UR_APICALL urGetProgramExpProcAddrTable( pDdiTable->pfnBuildExp = urProgramBuildExp; pDdiTable->pfnCompileExp = urProgramCompileExp; pDdiTable->pfnLinkExp = urProgramLinkExp; + pDdiTable->pfnDynamicLinkExp = urProgramDynamicLinkExp; return UR_RESULT_SUCCESS; } diff --git a/unified-runtime/source/adapters/level_zero/device.cpp b/unified-runtime/source/adapters/level_zero/device.cpp index d4b4d8a98f0c5..90309b9b61765 100644 --- a/unified-runtime/source/adapters/level_zero/device.cpp +++ b/unified-runtime/source/adapters/level_zero/device.cpp @@ -1322,6 +1322,8 @@ ur_result_t urDeviceGetInfo( return ReturnValue(true); case UR_DEVICE_INFO_DEVICE_WAIT_SUPPORT_EXP: return ReturnValue(true); + case UR_DEVICE_INFO_DYNAMIC_LINK_SUPPORT_EXP: + return ReturnValue(true); case UR_DEVICE_INFO_ASYNC_USM_ALLOCATIONS_SUPPORT_EXP: return ReturnValue(true); case UR_DEVICE_INFO_CURRENT_CLOCK_THROTTLE_REASONS: { diff --git a/unified-runtime/source/adapters/level_zero/program.cpp b/unified-runtime/source/adapters/level_zero/program.cpp index b77a972541877..7b791fd567f39 100644 --- a/unified-runtime/source/adapters/level_zero/program.cpp +++ b/unified-runtime/source/adapters/level_zero/program.cpp @@ -161,7 +161,8 @@ ur_result_t urProgramBuild( const char *Options) { std::vector Devices = Context->getDevices(); return ur::level_zero::urProgramBuildExp(Program, Devices.size(), - Devices.data(), Options); + Devices.data(), + ur_exp_program_flags_t{}, Options); } ur_result_t urProgramBuildExp( @@ -171,6 +172,8 @@ ur_result_t urProgramBuildExp( uint32_t numDevices, /// [in][range(0, numDevices)] pointer to array of device handles ur_device_handle_t *phDevices, + /// [in] program information flags + ur_exp_program_flags_t flags, /// [in][optional] pointer to build options null-terminated string. const char *pOptions) { // TODO @@ -251,10 +254,11 @@ ur_result_t urProgramBuildExp( } else { // The call to zeModuleCreate does not report an error if there are // unresolved symbols because it thinks these could be resolved later via - // a call to zeModuleDynamicLink. However, modules created with - // urProgramBuild are supposed to be fully linked and ready to use. - // Therefore, do an extra check now for unresolved symbols. - ZeResult = checkUnresolvedSymbols(ZeModuleHandle, &ZeBuildLog); + // a call to zeModuleDynamicLink. However, unless explicitly allowed, + // modules created with urProgramBuild are supposed to be fully linked and + // ready to use. Therefore, do an extra check now for unresolved symbols. + if (!(flags & UR_EXP_PROGRAM_FLAG_ALLOW_UNRESOLVED_SYMBOLS)) + ZeResult = checkUnresolvedSymbols(ZeModuleHandle, &ZeBuildLog); if (ZeResult != ZE_RESULT_SUCCESS) { hProgram->setState(ZeDevice, ur_program_handle_t_::Invalid); Result = (ZeResult == ZE_RESULT_ERROR_MODULE_LINK_FAILURE) @@ -280,6 +284,8 @@ ur_result_t urProgramCompileExp( uint32_t numDevices, /// [in][range(0, numDevices)] pointer to array of device handles ur_device_handle_t *phDevices, + /// [in] program information flags + [[maybe_unused]] ur_exp_program_flags_t flags, /// [in][optional] pointer to build options null-terminated string. const char *pOptions) { std::scoped_lock Guard(hProgram->Mutex); @@ -325,7 +331,8 @@ ur_result_t urProgramCompile( const char *Options) { auto devices = Context->getDevices(); return ur::level_zero::urProgramCompileExp(Program, devices.size(), - devices.data(), Options); + devices.data(), + ur_exp_program_flags_t{}, Options); } ur_result_t urProgramLink( @@ -340,9 +347,9 @@ ur_result_t urProgramLink( /// [out] pointer to handle of program object created. ur_program_handle_t *Program) { std::vector Devices = Context->getDevices(); - return ur::level_zero::urProgramLinkExp(Context, Devices.size(), - Devices.data(), Count, Programs, - Options, Program); + return ur::level_zero::urProgramLinkExp( + Context, Devices.size(), Devices.data(), ur_exp_program_flags_t{}, Count, + Programs, Options, Program); } ur_result_t urProgramLinkExp( @@ -352,6 +359,8 @@ ur_result_t urProgramLinkExp( uint32_t numDevices, /// [in][range(0, numDevices)] pointer to array of device handles ur_device_handle_t *phDevices, + /// [in] program information flags + ur_exp_program_flags_t flags, /// [in] number of program handles in `phPrograms`. uint32_t count, /// [in][range(0, count)] pointer to array of program handles. @@ -383,17 +392,18 @@ ur_result_t urProgramLinkExp( ur_result_t UrResult = UR_RESULT_SUCCESS; try { - // Acquire a "shared" lock on each of the input programs, and also validate - // that they are all in Object state for each device in the input list. + // Acquire a "shared" lock on each of the input programs, and also + // validate that they are all in Object state for each device in the input + // list. // // There is no danger of deadlock here even if two threads call - // urProgramLink simultaneously with the same input programs in a different - // order. If we were acquiring these with "exclusive" access, this could - // lead to a classic lock ordering deadlock. However, there is no such - // deadlock potential with "shared" access. There could also be a deadlock - // potential if there was some other code that holds more than one of these - // locks simultaneously with "exclusive" access. However, there is no such - // code like that, so this is also not a danger. + // urProgramLink simultaneously with the same input programs in a + // different order. If we were acquiring these with "exclusive" access, + // this could lead to a classic lock ordering deadlock. However, there is + // no such deadlock potential with "shared" access. There could also be a + // deadlock potential if there was some other code that holds more than + // one of these locks simultaneously with "exclusive" access. However, + // there is no such code like that, so this is also not a danger. std::vector> Guards(count); const ur_program_handle_t_::CodeFormat CommonCodeFormat = phPrograms[0]->getCodeFormat(); @@ -418,11 +428,11 @@ ur_result_t urProgramLinkExp( // Previous calls to urProgramCompile did not actually compile the SPIR-V. // Instead, we postpone compilation until this point, when all the modules - // are linked together. By doing compilation and linking together, the JIT - // compiler is able see all modules and do cross-module optimizations. + // are linked together. By doing compilation and linking together, the + // JIT compiler is able see all modules and do cross-module optimizations. // - // Construct a ze_module_program_exp_desc_t which contains information about - // all of the modules that will be linked together. + // Construct a ze_module_program_exp_desc_t which contains information + // about all of the modules that will be linked together. ZeStruct ZeExtModuleDesc; std::vector CodeSizes(count); std::vector CodeBufs(count); @@ -466,15 +476,15 @@ ur_result_t urProgramLinkExp( ZeModuleDesc.pInputModule = reinterpret_cast(1); ZeModuleDesc.inputSize = 1; - // We need a Level Zero extension to compile multiple programs together into - // a single Level Zero module. However, we don't need that extension if - // there happens to be only one input program. + // We need a Level Zero extension to compile multiple programs together + // into a single Level Zero module. However, we don't need that extension + // if there happens to be only one input program. // // The "|| (NumInputPrograms == 1)" term is a workaround for a bug in the // Level Zero driver. The driver's "ze_module_program_exp_desc_t" // extension should work even in the case when there is just one input - // module. However, there is currently a bug in the driver that leads to a - // crash. As a workaround, do not use the extension when there is one + // module. However, there is currently a bug in the driver that leads to + // a crash. As a workaround, do not use the extension when there is one // input module. // // TODO: Remove this workaround when the driver is fixed. @@ -519,8 +529,9 @@ ur_result_t urProgramLinkExp( &ZeModule, &ZeBuildLog)); // We still create a ur_program_handle_t_ object even if there is a - // BUILD_FAILURE because we need the object to hold the ZeBuildLog. There - // is no build log created for other errors, so we don't create an object. + // BUILD_FAILURE because we need the object to hold the ZeBuildLog. + // There is no build log created for other errors, so we don't create an + // object. UrResult = ze2urResult(ZeResult); if (ZeResult != ZE_RESULT_SUCCESS && ZeResult != ZE_RESULT_ERROR_MODULE_BUILD_FAILURE) { @@ -528,13 +539,14 @@ ur_result_t urProgramLinkExp( } // The call to zeModuleCreate does not report an error if there are - // unresolved symbols because it thinks these could be resolved later via - // a call to zeModuleDynamicLink. However, modules created with + // unresolved symbols because it thinks these could be resolved later + // via a call to zeModuleDynamicLink. However, modules created with // piProgramLink are supposed to be fully linked and ready to use. - // Therefore, do an extra check now for unresolved symbols. Note that we - // still create a ur_program_handle_t_ if there are unresolved symbols - // because the ZeBuildLog tells which symbols are unresolved. - if (ZeResult == ZE_RESULT_SUCCESS) { + // Therefore, do an extra check now for unresolved symbols. Note that + // we still create a ur_program_handle_t_ if there are unresolved + // symbols because the ZeBuildLog tells which symbols are unresolved. + if (ZeResult == ZE_RESULT_SUCCESS && + !(flags & UR_EXP_PROGRAM_FLAG_ALLOW_UNRESOLVED_SYMBOLS)) { ZeResult = checkUnresolvedSymbols(ZeModule, &ZeBuildLog); UrResult = ze2urResult(ZeResult); } @@ -555,6 +567,51 @@ ur_result_t urProgramLinkExp( return UrResult; } +ur_result_t urProgramDynamicLinkExp( + /// [in] handle of the context instance. + ur_context_handle_t hContext, + /// [in] number of program handles in `phPrograms`. + uint32_t count, + /// [in][range(0, count)] pointer to array of program handles. + const ur_program_handle_t *phPrograms) { + ur_result_t UrResult = UR_RESULT_SUCCESS; + + try { + // Reserve room for all modules. It may be too much on some devices, which + // is why we do not resize. + std::vector ZeModules; + ZeModules.reserve(count); + + for (ur_device_handle_t Device : hContext->getDevices()) { + for (uint32_t I = 0; I < count; ++I) { + if (phPrograms[I]->hasZeModuleForDevice(Device->ZeDevice)) { + ZeModules.push_back( + phPrograms[I]->getZeModuleHandle(Device->ZeDevice)); + } + } + + if (ZeModules.empty()) + continue; + + // TODO: What should be done with the log? Since there is no result + // program, what can it be attached to? + ze_result_t ZeResult = ZE_CALL_NOCHECK( + zeModuleDynamicLink, (ZeModules.size(), ZeModules.data(), nullptr)); + + if (ZeResult != ZE_RESULT_SUCCESS) + return ze2urResult(ZeResult); + + // Clear so the storage stays allocated, but the size is reset to 0. + ZeModules.clear(); + } + } catch (const std::bad_alloc &) { + return UR_RESULT_ERROR_OUT_OF_HOST_MEMORY; + } catch (...) { + return UR_RESULT_ERROR_UNKNOWN; + } + return UrResult; +} + ur_result_t urProgramRetain( /// [in] handle for the Program to retain ur_program_handle_t Program) { @@ -641,7 +698,8 @@ ur_result_t urProgramGetFunctionPointer( ur_program_handle_t Program, /// [in] A null-terminates string denoting the mangled function name. const char *FunctionName, - /// [out] Returns the pointer to the function if it is found in the program. + /// [out] Returns the pointer to the function if it is found in the + /// program. void **FunctionPointerRet) { std::shared_lock Guard(Program->Mutex); if (Program->getState(Device->ZeDevice) != ur_program_handle_t_::Exe) { @@ -729,10 +787,11 @@ ur_result_t urProgramGetInfo( ur_program_info_t PropName, /// [in] the size of the Program property. size_t PropSize, - /// [in,out][optional] array of bytes of holding the program info property. - /// If propSize is not equal to or greater than the real number of bytes - /// needed to return the info then the ::UR_RESULT_ERROR_INVALID_SIZE error - /// is returned and pProgramInfo is not used. + /// [in,out][optional] array of bytes of holding the program info + /// property. If propSize is not equal to or greater than the real number + /// of bytes needed to return the info then the + /// ::UR_RESULT_ERROR_INVALID_SIZE error is returned and pProgramInfo is + /// not used. void *ProgramInfo, /// [out][optional] pointer to the actual size in bytes of data copied to /// propName. @@ -782,12 +841,13 @@ ur_result_t urProgramGetInfo( std::shared_lock Guard(Program->Mutex); size_t NumDevices = Program->AssociatedDevices.size(); if (PropSizeRet) { - // Return the size of the array of pointers to binaries (for each device). + // Return the size of the array of pointers to binaries (for each + // device). *PropSizeRet = NumDevices * sizeof(uint8_t *); } - // If the caller did not provide an array of pointers to copy binaries into, - // return early. + // If the caller did not provide an array of pointers to copy binaries + // into, return early. if (!ProgramInfo) break; @@ -1038,8 +1098,9 @@ ur_result_t urProgramSetSpecializationConstants( std::scoped_lock Guard(Program->Mutex); // Remember the value of this specialization constant until the program is - // built. Note that we only save the pointer to the buffer that contains the - // value. The caller is responsible for maintaining storage for this buffer. + // built. Note that we only save the pointer to the buffer that contains + // the value. The caller is responsible for maintaining storage for this + // buffer. // // NOTE: SpecSize is unused in Level Zero, the size is known from SPIR-V by // SpecID. diff --git a/unified-runtime/source/adapters/level_zero/program.hpp b/unified-runtime/source/adapters/level_zero/program.hpp index fb2cc1f12ee5e..91bdd8c64d19a 100644 --- a/unified-runtime/source/adapters/level_zero/program.hpp +++ b/unified-runtime/source/adapters/level_zero/program.hpp @@ -113,11 +113,15 @@ struct ur_program_handle_t_ : ur_object { return DeviceDataMap[ZeDevice].State; } - ze_module_handle_t getZeModuleHandle(ze_device_handle_t ZeDevice) { + bool hasZeModuleForDevice(ze_device_handle_t ZeDevice) const { + return DeviceDataMap.find(ZeDevice) != DeviceDataMap.end(); + } + + ze_module_handle_t getZeModuleHandle(ze_device_handle_t ZeDevice) const { if (DeviceDataMap.find(ZeDevice) == DeviceDataMap.end()) return InteropZeModule; - return DeviceDataMap[ZeDevice].ZeModule; + return DeviceDataMap.at(ZeDevice).ZeModule; } CodeFormat getCodeFormat(ze_device_handle_t ZeDevice = nullptr) const { diff --git a/unified-runtime/source/adapters/level_zero/ur_interface_loader.cpp b/unified-runtime/source/adapters/level_zero/ur_interface_loader.cpp index ada7e5fe89aa5..c343fe4cb3236 100644 --- a/unified-runtime/source/adapters/level_zero/ur_interface_loader.cpp +++ b/unified-runtime/source/adapters/level_zero/ur_interface_loader.cpp @@ -413,6 +413,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urGetProgramExpProcAddrTable( return result; } + pDdiTable->pfnDynamicLinkExp = ur::level_zero::urProgramDynamicLinkExp; pDdiTable->pfnBuildExp = ur::level_zero::urProgramBuildExp; pDdiTable->pfnCompileExp = ur::level_zero::urProgramCompileExp; pDdiTable->pfnLinkExp = ur::level_zero::urProgramLinkExp; diff --git a/unified-runtime/source/adapters/level_zero/ur_interface_loader.hpp b/unified-runtime/source/adapters/level_zero/ur_interface_loader.hpp index 85545ed589173..160e0ea77f188 100644 --- a/unified-runtime/source/adapters/level_zero/ur_interface_loader.hpp +++ b/unified-runtime/source/adapters/level_zero/ur_interface_loader.hpp @@ -768,6 +768,9 @@ ur_result_t urCommandBufferGetNativeHandleExp(ur_exp_command_buffer_handle_t hCommandBuffer, ur_native_handle_t *phNativeCommandBuffer); ur_result_t urDeviceWaitExp(ur_device_handle_t hDevice); +ur_result_t urProgramDynamicLinkExp(ur_context_handle_t hContext, + uint32_t count, + const ur_program_handle_t *phPrograms); ur_result_t urEnqueueTimestampRecordingExp( ur_queue_handle_t hQueue, bool blocking, uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList, ur_event_handle_t *phEvent); @@ -793,13 +796,16 @@ ur_result_t urMemoryExportExportMemoryHandleExp( void *pMemHandleRet); ur_result_t urProgramBuildExp(ur_program_handle_t hProgram, uint32_t numDevices, ur_device_handle_t *phDevices, + ur_exp_program_flags_t flags, const char *pOptions); ur_result_t urProgramCompileExp(ur_program_handle_t hProgram, uint32_t numDevices, ur_device_handle_t *phDevices, + ur_exp_program_flags_t flags, const char *pOptions); ur_result_t urProgramLinkExp(ur_context_handle_t hContext, uint32_t numDevices, - ur_device_handle_t *phDevices, uint32_t count, + ur_device_handle_t *phDevices, + ur_exp_program_flags_t flags, uint32_t count, const ur_program_handle_t *phPrograms, const char *pOptions, ur_program_handle_t *phProgram); diff --git a/unified-runtime/source/adapters/mock/ur_mockddi.cpp b/unified-runtime/source/adapters/mock/ur_mockddi.cpp index 569570bf89117..cbb5cbc26e1f6 100644 --- a/unified-runtime/source/adapters/mock/ur_mockddi.cpp +++ b/unified-runtime/source/adapters/mock/ur_mockddi.cpp @@ -11281,6 +11281,53 @@ __urdlllocal ur_result_t UR_APICALL urDeviceWaitExp( return exceptionToResult(std::current_exception()); } +/////////////////////////////////////////////////////////////////////////////// +/// @brief Intercept function for urProgramDynamicLinkExp +__urdlllocal ur_result_t UR_APICALL urProgramDynamicLinkExp( + /// [in] handle of the context instance. + ur_context_handle_t hContext, + /// [in] number of program handles in `phPrograms`. + uint32_t count, + /// [in][range(0, count)] pointer to array of program handles. + const ur_program_handle_t *phPrograms) try { + ur_result_t result = UR_RESULT_SUCCESS; + + ur_program_dynamic_link_exp_params_t params = {&hContext, &count, + &phPrograms}; + + auto beforeCallback = reinterpret_cast( + mock::getCallbacks().get_before_callback("urProgramDynamicLinkExp")); + if (beforeCallback) { + result = beforeCallback(¶ms); + if (result != UR_RESULT_SUCCESS) { + return result; + } + } + + auto replaceCallback = reinterpret_cast( + mock::getCallbacks().get_replace_callback("urProgramDynamicLinkExp")); + if (replaceCallback) { + result = replaceCallback(¶ms); + } else { + + result = UR_RESULT_SUCCESS; + } + + if (result != UR_RESULT_SUCCESS) { + return result; + } + + auto afterCallback = reinterpret_cast( + mock::getCallbacks().get_after_callback("urProgramDynamicLinkExp")); + if (afterCallback) { + return afterCallback(¶ms); + } + + return result; +} catch (...) { + return exceptionToResult(std::current_exception()); +} + /////////////////////////////////////////////////////////////////////////////// /// @brief Intercept function for urEnqueueTimestampRecordingExp __urdlllocal ur_result_t UR_APICALL urEnqueueTimestampRecordingExp( @@ -11710,12 +11757,14 @@ __urdlllocal ur_result_t UR_APICALL urProgramBuildExp( uint32_t numDevices, /// [in][range(0, numDevices)] pointer to array of device handles ur_device_handle_t *phDevices, + /// [in] program information flags + ur_exp_program_flags_t flags, /// [in][optional] pointer to build options null-terminated string. const char *pOptions) try { ur_result_t result = UR_RESULT_SUCCESS; ur_program_build_exp_params_t params = {&hProgram, &numDevices, &phDevices, - &pOptions}; + &flags, &pOptions}; auto beforeCallback = reinterpret_cast( mock::getCallbacks().get_before_callback("urProgramBuildExp")); @@ -11759,12 +11808,14 @@ __urdlllocal ur_result_t UR_APICALL urProgramCompileExp( uint32_t numDevices, /// [in][range(0, numDevices)] pointer to array of device handles ur_device_handle_t *phDevices, + /// [in] program information flags + ur_exp_program_flags_t flags, /// [in][optional] pointer to build options null-terminated string. const char *pOptions) try { ur_result_t result = UR_RESULT_SUCCESS; ur_program_compile_exp_params_t params = {&hProgram, &numDevices, &phDevices, - &pOptions}; + &flags, &pOptions}; auto beforeCallback = reinterpret_cast( mock::getCallbacks().get_before_callback("urProgramCompileExp")); @@ -11808,6 +11859,8 @@ __urdlllocal ur_result_t UR_APICALL urProgramLinkExp( uint32_t numDevices, /// [in][range(0, numDevices)] pointer to array of device handles ur_device_handle_t *phDevices, + /// [in] program information flags + ur_exp_program_flags_t flags, /// [in] number of program handles in `phPrograms`. uint32_t count, /// [in][range(0, count)] pointer to array of program handles. @@ -11822,8 +11875,8 @@ __urdlllocal ur_result_t UR_APICALL urProgramLinkExp( } ur_program_link_exp_params_t params = {&hContext, &numDevices, &phDevices, - &count, &phPrograms, &pOptions, - &phProgram}; + &flags, &count, &phPrograms, + &pOptions, &phProgram}; auto beforeCallback = reinterpret_cast( mock::getCallbacks().get_before_callback("urProgramLinkExp")); @@ -13047,6 +13100,8 @@ UR_DLLEXPORT ur_result_t UR_APICALL urGetProgramExpProcAddrTable( ur_result_t result = UR_RESULT_SUCCESS; + pDdiTable->pfnDynamicLinkExp = driver::urProgramDynamicLinkExp; + pDdiTable->pfnBuildExp = driver::urProgramBuildExp; pDdiTable->pfnCompileExp = driver::urProgramCompileExp; diff --git a/unified-runtime/source/adapters/native_cpu/device.cpp b/unified-runtime/source/adapters/native_cpu/device.cpp index 8d0e43a50eee5..c65d9742d7c48 100644 --- a/unified-runtime/source/adapters/native_cpu/device.cpp +++ b/unified-runtime/source/adapters/native_cpu/device.cpp @@ -451,6 +451,9 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceGetInfo(ur_device_handle_t hDevice, case UR_DEVICE_INFO_DEVICE_WAIT_SUPPORT_EXP: return ReturnValue(false); + case UR_DEVICE_INFO_DYNAMIC_LINK_SUPPORT_EXP: + return ReturnValue(true); + case UR_DEVICE_INFO_GLOBAL_VARIABLE_SUPPORT: return ReturnValue(false); diff --git a/unified-runtime/source/adapters/native_cpu/program.cpp b/unified-runtime/source/adapters/native_cpu/program.cpp index fee72f8a6bc3c..be7dc0764fb6d 100644 --- a/unified-runtime/source/adapters/native_cpu/program.cpp +++ b/unified-runtime/source/adapters/native_cpu/program.cpp @@ -143,6 +143,7 @@ urProgramLink(ur_context_handle_t /*hContext*/, uint32_t /*count*/, UR_APIEXPORT ur_result_t UR_APICALL urProgramCompileExp(ur_program_handle_t, uint32_t, ur_device_handle_t *, + ur_exp_program_flags_t, const char *) { // Currently for Native CPU the program is offline compiled, so // urProgramCompile is a no-op. @@ -152,15 +153,17 @@ UR_APIEXPORT ur_result_t UR_APICALL urProgramCompileExp(ur_program_handle_t, UR_APIEXPORT ur_result_t UR_APICALL urProgramBuildExp(ur_program_handle_t, uint32_t, ur_device_handle_t *, + ur_exp_program_flags_t, const char *) { // Currently for Native CPU the program is offline compiled and linked, // so urProgramBuild is a no-op. return UR_RESULT_SUCCESS; } -UR_APIEXPORT ur_result_t UR_APICALL urProgramLinkExp( - ur_context_handle_t, uint32_t, ur_device_handle_t *, uint32_t, - const ur_program_handle_t *, const char *, ur_program_handle_t *phProgram) { +UR_APIEXPORT ur_result_t UR_APICALL +urProgramLinkExp(ur_context_handle_t, uint32_t, ur_device_handle_t *, + ur_exp_program_flags_t, uint32_t, const ur_program_handle_t *, + const char *, ur_program_handle_t *phProgram) { if (nullptr != phProgram) { *phProgram = nullptr; } @@ -169,6 +172,13 @@ UR_APIEXPORT ur_result_t UR_APICALL urProgramLinkExp( return UR_RESULT_SUCCESS; } +UR_APIEXPORT ur_result_t UR_APICALL urProgramDynamicLinkExp( + ur_context_handle_t, uint32_t, const ur_program_handle_t *) { + // Currently for Native CPU the program is already linked and all its + // symbols are resolved, so this is a no-op. + return UR_RESULT_SUCCESS; +} + UR_APIEXPORT ur_result_t UR_APICALL urProgramRetain(ur_program_handle_t hProgram) { hProgram->incrementReferenceCount(); diff --git a/unified-runtime/source/adapters/native_cpu/ur_interface_loader.cpp b/unified-runtime/source/adapters/native_cpu/ur_interface_loader.cpp index 6510e26e3d2d3..3bea6e1f3511a 100644 --- a/unified-runtime/source/adapters/native_cpu/ur_interface_loader.cpp +++ b/unified-runtime/source/adapters/native_cpu/ur_interface_loader.cpp @@ -470,6 +470,7 @@ UR_DLLEXPORT ur_result_t UR_APICALL urGetProgramExpProcAddrTable( pDdiTable->pfnBuildExp = urProgramBuildExp; pDdiTable->pfnCompileExp = urProgramCompileExp; pDdiTable->pfnLinkExp = urProgramLinkExp; + pDdiTable->pfnDynamicLinkExp = urProgramDynamicLinkExp; return UR_RESULT_SUCCESS; } diff --git a/unified-runtime/source/adapters/offload/device.cpp b/unified-runtime/source/adapters/offload/device.cpp index 649d74516608c..5ebe7170dcc48 100644 --- a/unified-runtime/source/adapters/offload/device.cpp +++ b/unified-runtime/source/adapters/offload/device.cpp @@ -241,6 +241,8 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceGetInfo(ur_device_handle_t hDevice, ur_queue_flags_t{UR_QUEUE_FLAG_OUT_OF_ORDER_EXEC_MODE_ENABLE}); case UR_DEVICE_INFO_QUEUE_ON_DEVICE_PROPERTIES: return ReturnValue(0); + case UR_DEVICE_INFO_DYNAMIC_LINK_SUPPORT_EXP: + return ReturnValue(false); case UR_DEVICE_INFO_KERNEL_LAUNCH_CAPABILITIES: return ReturnValue(0); case UR_DEVICE_INFO_SUPPORTED_PARTITIONS: { diff --git a/unified-runtime/source/adapters/offload/ur_interface_loader.cpp b/unified-runtime/source/adapters/offload/ur_interface_loader.cpp index 4843ef5be7d66..1925cd383df1c 100644 --- a/unified-runtime/source/adapters/offload/ur_interface_loader.cpp +++ b/unified-runtime/source/adapters/offload/ur_interface_loader.cpp @@ -424,6 +424,7 @@ UR_DLLEXPORT ur_result_t UR_APICALL urGetProgramExpProcAddrTable( pDdiTable->pfnBuildExp = urProgramBuildExp; pDdiTable->pfnCompileExp = nullptr; pDdiTable->pfnLinkExp = nullptr; + pDdiTable->pfnDynamicLinkExp = nullptr; return UR_RESULT_SUCCESS; } diff --git a/unified-runtime/source/adapters/opencl/device.cpp b/unified-runtime/source/adapters/opencl/device.cpp index 36471fff34bf7..c05c18db921cb 100644 --- a/unified-runtime/source/adapters/opencl/device.cpp +++ b/unified-runtime/source/adapters/opencl/device.cpp @@ -1429,6 +1429,8 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceGetInfo(ur_device_handle_t hDevice, return ReturnValue(true); case UR_DEVICE_INFO_DEVICE_WAIT_SUPPORT_EXP: return ReturnValue(false); + case UR_DEVICE_INFO_DYNAMIC_LINK_SUPPORT_EXP: + return ReturnValue(false); case UR_DEVICE_INFO_KERNEL_LAUNCH_CAPABILITIES: return ReturnValue(0); case UR_DEVICE_INFO_LUID: { diff --git a/unified-runtime/source/adapters/opencl/program.cpp b/unified-runtime/source/adapters/opencl/program.cpp index dffd9aed9074b..dbd963debfd27 100644 --- a/unified-runtime/source/adapters/opencl/program.cpp +++ b/unified-runtime/source/adapters/opencl/program.cpp @@ -294,6 +294,7 @@ urProgramLink(ur_context_handle_t hContext, uint32_t count, UR_APIEXPORT ur_result_t UR_APICALL urProgramCompileExp(ur_program_handle_t, uint32_t, ur_device_handle_t *, + ur_exp_program_flags_t, const char *) { return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; } @@ -301,19 +302,26 @@ UR_APIEXPORT ur_result_t UR_APICALL urProgramCompileExp(ur_program_handle_t, UR_APIEXPORT ur_result_t UR_APICALL urProgramBuildExp(ur_program_handle_t, uint32_t, ur_device_handle_t *, + ur_exp_program_flags_t, const char *) { return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; } -UR_APIEXPORT ur_result_t UR_APICALL urProgramLinkExp( - ur_context_handle_t, uint32_t, ur_device_handle_t *, uint32_t, - const ur_program_handle_t *, const char *, ur_program_handle_t *phProgram) { +UR_APIEXPORT ur_result_t UR_APICALL +urProgramLinkExp(ur_context_handle_t, uint32_t, ur_device_handle_t *, + ur_exp_program_flags_t, uint32_t, const ur_program_handle_t *, + const char *, ur_program_handle_t *phProgram) { if (nullptr != phProgram) { *phProgram = nullptr; } return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; } +UR_APIEXPORT ur_result_t UR_APICALL urProgramDynamicLinkExp( + ur_context_handle_t, uint32_t, const ur_program_handle_t *) { + return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; +} + static cl_int mapURProgramBuildInfoToCL(ur_program_build_info_t URPropName) { switch (static_cast(URPropName)) { diff --git a/unified-runtime/source/adapters/opencl/ur_interface_loader.cpp b/unified-runtime/source/adapters/opencl/ur_interface_loader.cpp index 96b627aea3f53..6dd875ec3019b 100644 --- a/unified-runtime/source/adapters/opencl/ur_interface_loader.cpp +++ b/unified-runtime/source/adapters/opencl/ur_interface_loader.cpp @@ -473,6 +473,7 @@ UR_DLLEXPORT ur_result_t UR_APICALL urGetProgramExpProcAddrTable( pDdiTable->pfnBuildExp = urProgramBuildExp; pDdiTable->pfnCompileExp = urProgramCompileExp; pDdiTable->pfnLinkExp = urProgramLinkExp; + pDdiTable->pfnDynamicLinkExp = urProgramDynamicLinkExp; return UR_RESULT_SUCCESS; } diff --git a/unified-runtime/source/loader/layers/sanitizer/asan/asan_ddi.cpp b/unified-runtime/source/loader/layers/sanitizer/asan/asan_ddi.cpp index 012aa1422cfa1..90e26e5566aa7 100644 --- a/unified-runtime/source/loader/layers/sanitizer/asan/asan_ddi.cpp +++ b/unified-runtime/source/loader/layers/sanitizer/asan/asan_ddi.cpp @@ -345,6 +345,8 @@ __urdlllocal ur_result_t UR_APICALL urProgramBuildExp( uint32_t numDevices, /// [in][range(0, numDevices)] pointer to array of device handles ur_device_handle_t *phDevices, + /// [in] program information flags + ur_exp_program_flags_t flags, /// [in][optional] pointer to build options null-terminated string. const char *pOptions) { auto pfnBuildExp = getContext()->urDdiTable.ProgramExp.pfnBuildExp; @@ -355,7 +357,7 @@ __urdlllocal ur_result_t UR_APICALL urProgramBuildExp( UR_LOG_L(getContext()->logger, DEBUG, "==== urProgramBuildExp"); - auto UrRes = pfnBuildExp(hProgram, numDevices, phDevices, pOptions); + auto UrRes = pfnBuildExp(hProgram, numDevices, phDevices, flags, pOptions); if (UrRes != UR_RESULT_SUCCESS) { PrintUrBuildLogIfError(UrRes, hProgram, phDevices, numDevices); return UrRes; @@ -409,6 +411,8 @@ ur_result_t UR_APICALL urProgramLinkExp( uint32_t numDevices, /// [in][range(0, numDevices)] pointer to array of device handles ur_device_handle_t *phDevices, + /// [in] program information flags + ur_exp_program_flags_t flags, /// [in] number of program handles in `phPrograms`. uint32_t count, /// [in][range(0, count)] pointer to array of program handles. @@ -425,7 +429,7 @@ ur_result_t UR_APICALL urProgramLinkExp( UR_LOG_L(getContext()->logger, DEBUG, "==== urProgramLinkExp"); - auto UrRes = pfnProgramLinkExp(hContext, numDevices, phDevices, count, + auto UrRes = pfnProgramLinkExp(hContext, numDevices, phDevices, flags, count, phPrograms, pOptions, phProgram); if (UrRes != UR_RESULT_SUCCESS) { PrintUrBuildLogIfError(UrRes, *phProgram, phDevices, numDevices); @@ -438,6 +442,27 @@ ur_result_t UR_APICALL urProgramLinkExp( return UR_RESULT_SUCCESS; } +/////////////////////////////////////////////////////////////////////////////// +/// @brief Intercept function for urProgramDynamicLinkExp +ur_result_t UR_APICALL urProgramDynamicLinkExp( + /// [in] handle of the context instance. + ur_context_handle_t hContext, + /// [in] number of program handles in `phPrograms`. + uint32_t count, + /// [in][range(0, count)] pointer to array of program handles. + const ur_program_handle_t *phPrograms) { + auto pfnProgramDynamicLinkExp = + getContext()->urDdiTable.ProgramExp.pfnDynamicLinkExp; + + if (nullptr == pfnProgramDynamicLinkExp) { + return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; + } + + UR_LOG_L(getContext()->logger, DEBUG, "==== urProgramDynamicLinkExp"); + + return pfnProgramDynamicLinkExp(hContext, count, phPrograms); +} + /////////////////////////////////////////////////////////////////////////////// /// @brief Intercept function for urProgramRelease ur_result_t UR_APICALL urProgramRelease( @@ -1848,6 +1873,8 @@ __urdlllocal ur_result_t UR_APICALL urGetProgramExpProcAddrTable( pDdiTable->pfnBuildExp = ur_sanitizer_layer::asan::urProgramBuildExp; pDdiTable->pfnLinkExp = ur_sanitizer_layer::asan::urProgramLinkExp; + pDdiTable->pfnDynamicLinkExp = + ur_sanitizer_layer::asan::urProgramDynamicLinkExp; return result; } diff --git a/unified-runtime/source/loader/layers/sanitizer/msan/msan_ddi.cpp b/unified-runtime/source/loader/layers/sanitizer/msan/msan_ddi.cpp index 810fd76120b2d..ebd1b197988b3 100644 --- a/unified-runtime/source/loader/layers/sanitizer/msan/msan_ddi.cpp +++ b/unified-runtime/source/loader/layers/sanitizer/msan/msan_ddi.cpp @@ -329,13 +329,15 @@ ur_result_t urProgramBuildExp( uint32_t numDevices, /// [in][range(0, numDevices)] pointer to array of device handles ur_device_handle_t *phDevices, + /// [in] program information flags + ur_exp_program_flags_t flags, /// [in][optional] pointer to build options null-terminated string. const char *pOptions) { auto pfnBuildExp = getContext()->urDdiTable.ProgramExp.pfnBuildExp; UR_LOG_L(getContext()->logger, DEBUG, "==== urProgramBuildExp"); - auto UrRes = pfnBuildExp(hProgram, numDevices, phDevices, pOptions); + auto UrRes = pfnBuildExp(hProgram, numDevices, phDevices, flags, pOptions); if (UrRes != UR_RESULT_SUCCESS) { PrintUrBuildLogIfError(UrRes, hProgram, phDevices, numDevices); return UrRes; @@ -385,6 +387,8 @@ ur_result_t urProgramLinkExp( uint32_t numDevices, /// [in][range(0, numDevices)] pointer to array of device handles ur_device_handle_t *phDevices, + /// [in] program information flags + ur_exp_program_flags_t flags, /// [in] number of program handles in `phPrograms`. uint32_t count, /// [in][range(0, count)] pointer to array of program handles. @@ -397,7 +401,7 @@ ur_result_t urProgramLinkExp( UR_LOG_L(getContext()->logger, DEBUG, "==== urProgramLinkExp"); - auto UrRes = pfnProgramLinkExp(hContext, numDevices, phDevices, count, + auto UrRes = pfnProgramLinkExp(hContext, numDevices, phDevices, flags, count, phPrograms, pOptions, phProgram); if (UrRes != UR_RESULT_SUCCESS) { PrintUrBuildLogIfError(UrRes, *phProgram, phDevices, numDevices); @@ -410,6 +414,23 @@ ur_result_t urProgramLinkExp( return UR_RESULT_SUCCESS; } +/////////////////////////////////////////////////////////////////////////////// +/// @brief Intercept function for urProgramDynamicLinkExp +ur_result_t urProgramDynamicLinkExp( + /// [in] handle of the context instance. + ur_context_handle_t hContext, + /// [in] number of program handles in `phPrograms`. + uint32_t count, + /// [in][range(0, count)] pointer to array of program handles. + const ur_program_handle_t *phPrograms) { + auto pfnProgramDynamicLinkExp = + getContext()->urDdiTable.ProgramExp.pfnDynamicLinkExp; + + UR_LOG_L(getContext()->logger, DEBUG, "==== urProgramDynamicLinkExp"); + + return pfnProgramDynamicLinkExp(hContext, count, phPrograms); +} + /////////////////////////////////////////////////////////////////////////////// /// @brief Intercept function for urProgramRelease ur_result_t urProgramRelease( @@ -1931,6 +1952,8 @@ ur_result_t urGetProgramExpProcAddrTable( pDdiTable->pfnBuildExp = ur_sanitizer_layer::msan::urProgramBuildExp; pDdiTable->pfnLinkExp = ur_sanitizer_layer::msan::urProgramLinkExp; + pDdiTable->pfnDynamicLinkExp = + ur_sanitizer_layer::msan::urProgramDynamicLinkExp; return result; } diff --git a/unified-runtime/source/loader/layers/sanitizer/tsan/tsan_ddi.cpp b/unified-runtime/source/loader/layers/sanitizer/tsan/tsan_ddi.cpp index 61849ac0b363a..48ab96effd63a 100644 --- a/unified-runtime/source/loader/layers/sanitizer/tsan/tsan_ddi.cpp +++ b/unified-runtime/source/loader/layers/sanitizer/tsan/tsan_ddi.cpp @@ -322,12 +322,14 @@ ur_result_t urProgramBuildExp( uint32_t numDevices, /// [in][range(0, numDevices)] pointer to array of device handles ur_device_handle_t *phDevices, + /// [in] program information flags + ur_exp_program_flags_t flags, /// [in][optional] pointer to build options null-terminated string. const char *pOptions) { UR_LOG_L(getContext()->logger, DEBUG, "==== urProgramBuildExp"); auto UrRes = getContext()->urDdiTable.ProgramExp.pfnBuildExp( - hProgram, numDevices, phDevices, pOptions); + hProgram, numDevices, phDevices, flags, pOptions); if (UrRes != UR_RESULT_SUCCESS) { PrintUrBuildLogIfError(UrRes, hProgram, phDevices, numDevices); return UrRes; @@ -345,6 +347,8 @@ ur_result_t urProgramLinkExp( uint32_t numDevices, /// [in][range(0, numDevices)] pointer to array of device handles ur_device_handle_t *phDevices, + /// [in] program information flags + ur_exp_program_flags_t flags, /// [in] number of program handles in `phPrograms`. uint32_t count, /// [in][range(0, count)] pointer to array of program handles. @@ -356,7 +360,8 @@ ur_result_t urProgramLinkExp( UR_LOG_L(getContext()->logger, DEBUG, "==== urProgramLinkExp"); auto UrRes = getContext()->urDdiTable.ProgramExp.pfnLinkExp( - hContext, numDevices, phDevices, count, phPrograms, pOptions, phProgram); + hContext, numDevices, phDevices, flags, count, phPrograms, pOptions, + phProgram); if (UrRes != UR_RESULT_SUCCESS) { PrintUrBuildLogIfError(UrRes, *phProgram, phDevices, numDevices); return UrRes; @@ -368,6 +373,20 @@ ur_result_t urProgramLinkExp( return UR_RESULT_SUCCESS; } +/// @brief Intercept function for urProgramDynamicLinkExp +ur_result_t urProgramDynamicLinkExp( + /// [in] handle of the context instance. + ur_context_handle_t hContext, + /// [in] number of program handles in `phPrograms`. + uint32_t count, + /// [in][range(0, count)] pointer to array of program handles. + const ur_program_handle_t *phPrograms) { + UR_LOG_L(getContext()->logger, DEBUG, "==== urProgramDynamicLinkExp"); + + return getContext()->urDdiTable.ProgramExp.pfnDynamicLinkExp(hContext, count, + phPrograms); +} + /////////////////////////////////////////////////////////////////////////////// /// @brief Intercept function for urMemBufferCreate ur_result_t urMemBufferCreate( @@ -1434,6 +1453,8 @@ ur_result_t urGetProgramExpProcAddrTable( pDdiTable->pfnBuildExp = ur_sanitizer_layer::tsan::urProgramBuildExp; pDdiTable->pfnLinkExp = ur_sanitizer_layer::tsan::urProgramLinkExp; + pDdiTable->pfnDynamicLinkExp = + ur_sanitizer_layer::tsan::urProgramDynamicLinkExp; return UR_RESULT_SUCCESS; } diff --git a/unified-runtime/source/loader/layers/tracing/ur_trcddi.cpp b/unified-runtime/source/loader/layers/tracing/ur_trcddi.cpp index 5ea65875d4310..917b853246975 100644 --- a/unified-runtime/source/loader/layers/tracing/ur_trcddi.cpp +++ b/unified-runtime/source/loader/layers/tracing/ur_trcddi.cpp @@ -9550,6 +9550,46 @@ __urdlllocal ur_result_t UR_APICALL urDeviceWaitExp( return result; } +/////////////////////////////////////////////////////////////////////////////// +/// @brief Intercept function for urProgramDynamicLinkExp +__urdlllocal ur_result_t UR_APICALL urProgramDynamicLinkExp( + /// [in] handle of the context instance. + ur_context_handle_t hContext, + /// [in] number of program handles in `phPrograms`. + uint32_t count, + /// [in][range(0, count)] pointer to array of program handles. + const ur_program_handle_t *phPrograms) { + auto pfnDynamicLinkExp = + getContext()->urDdiTable.ProgramExp.pfnDynamicLinkExp; + + if (nullptr == pfnDynamicLinkExp) + return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; + + ur_program_dynamic_link_exp_params_t params = {&hContext, &count, + &phPrograms}; + uint64_t instance = getContext()->notify_begin( + UR_FUNCTION_PROGRAM_DYNAMIC_LINK_EXP, "urProgramDynamicLinkExp", ¶ms); + + auto &logger = getContext()->logger; + UR_LOG_L(logger, INFO, " ---> urProgramDynamicLinkExp\n"); + + ur_result_t result = pfnDynamicLinkExp(hContext, count, phPrograms); + + getContext()->notify_end(UR_FUNCTION_PROGRAM_DYNAMIC_LINK_EXP, + "urProgramDynamicLinkExp", ¶ms, &result, + instance); + + if (logger.getLevel() <= UR_LOGGER_LEVEL_INFO) { + std::ostringstream args_str; + ur::extras::printFunctionParams( + args_str, UR_FUNCTION_PROGRAM_DYNAMIC_LINK_EXP, ¶ms); + UR_LOG_L(logger, INFO, " <--- urProgramDynamicLinkExp({}) -> {};\n", + args_str.str(), result); + } + + return result; +} + /////////////////////////////////////////////////////////////////////////////// /// @brief Intercept function for urEnqueueTimestampRecordingExp __urdlllocal ur_result_t UR_APICALL urEnqueueTimestampRecordingExp( @@ -9920,6 +9960,8 @@ __urdlllocal ur_result_t UR_APICALL urProgramBuildExp( uint32_t numDevices, /// [in][range(0, numDevices)] pointer to array of device handles ur_device_handle_t *phDevices, + /// [in] program information flags + ur_exp_program_flags_t flags, /// [in][optional] pointer to build options null-terminated string. const char *pOptions) { auto pfnBuildExp = getContext()->urDdiTable.ProgramExp.pfnBuildExp; @@ -9928,14 +9970,15 @@ __urdlllocal ur_result_t UR_APICALL urProgramBuildExp( return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; ur_program_build_exp_params_t params = {&hProgram, &numDevices, &phDevices, - &pOptions}; + &flags, &pOptions}; uint64_t instance = getContext()->notify_begin(UR_FUNCTION_PROGRAM_BUILD_EXP, "urProgramBuildExp", ¶ms); auto &logger = getContext()->logger; UR_LOG_L(logger, INFO, " ---> urProgramBuildExp\n"); - ur_result_t result = pfnBuildExp(hProgram, numDevices, phDevices, pOptions); + ur_result_t result = + pfnBuildExp(hProgram, numDevices, phDevices, flags, pOptions); getContext()->notify_end(UR_FUNCTION_PROGRAM_BUILD_EXP, "urProgramBuildExp", ¶ms, &result, instance); @@ -9960,6 +10003,8 @@ __urdlllocal ur_result_t UR_APICALL urProgramCompileExp( uint32_t numDevices, /// [in][range(0, numDevices)] pointer to array of device handles ur_device_handle_t *phDevices, + /// [in] program information flags + ur_exp_program_flags_t flags, /// [in][optional] pointer to build options null-terminated string. const char *pOptions) { auto pfnCompileExp = getContext()->urDdiTable.ProgramExp.pfnCompileExp; @@ -9968,14 +10013,15 @@ __urdlllocal ur_result_t UR_APICALL urProgramCompileExp( return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; ur_program_compile_exp_params_t params = {&hProgram, &numDevices, &phDevices, - &pOptions}; + &flags, &pOptions}; uint64_t instance = getContext()->notify_begin( UR_FUNCTION_PROGRAM_COMPILE_EXP, "urProgramCompileExp", ¶ms); auto &logger = getContext()->logger; UR_LOG_L(logger, INFO, " ---> urProgramCompileExp\n"); - ur_result_t result = pfnCompileExp(hProgram, numDevices, phDevices, pOptions); + ur_result_t result = + pfnCompileExp(hProgram, numDevices, phDevices, flags, pOptions); getContext()->notify_end(UR_FUNCTION_PROGRAM_COMPILE_EXP, "urProgramCompileExp", ¶ms, &result, instance); @@ -10000,6 +10046,8 @@ __urdlllocal ur_result_t UR_APICALL urProgramLinkExp( uint32_t numDevices, /// [in][range(0, numDevices)] pointer to array of device handles ur_device_handle_t *phDevices, + /// [in] program information flags + ur_exp_program_flags_t flags, /// [in] number of program handles in `phPrograms`. uint32_t count, /// [in][range(0, count)] pointer to array of program handles. @@ -10017,15 +10065,15 @@ __urdlllocal ur_result_t UR_APICALL urProgramLinkExp( return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; ur_program_link_exp_params_t params = {&hContext, &numDevices, &phDevices, - &count, &phPrograms, &pOptions, - &phProgram}; + &flags, &count, &phPrograms, + &pOptions, &phProgram}; uint64_t instance = getContext()->notify_begin(UR_FUNCTION_PROGRAM_LINK_EXP, "urProgramLinkExp", ¶ms); auto &logger = getContext()->logger; UR_LOG_L(logger, INFO, " ---> urProgramLinkExp\n"); - ur_result_t result = pfnLinkExp(hContext, numDevices, phDevices, count, + ur_result_t result = pfnLinkExp(hContext, numDevices, phDevices, flags, count, phPrograms, pOptions, phProgram); getContext()->notify_end(UR_FUNCTION_PROGRAM_LINK_EXP, "urProgramLinkExp", @@ -11393,6 +11441,9 @@ __urdlllocal ur_result_t UR_APICALL urGetProgramExpProcAddrTable( ur_result_t result = UR_RESULT_SUCCESS; + dditable.pfnDynamicLinkExp = pDdiTable->pfnDynamicLinkExp; + pDdiTable->pfnDynamicLinkExp = ur_tracing_layer::urProgramDynamicLinkExp; + dditable.pfnBuildExp = pDdiTable->pfnBuildExp; pDdiTable->pfnBuildExp = ur_tracing_layer::urProgramBuildExp; diff --git a/unified-runtime/source/loader/layers/validation/ur_valddi.cpp b/unified-runtime/source/loader/layers/validation/ur_valddi.cpp index 2d05787d3edd7..cda6c4849301d 100644 --- a/unified-runtime/source/loader/layers/validation/ur_valddi.cpp +++ b/unified-runtime/source/loader/layers/validation/ur_valddi.cpp @@ -10297,6 +10297,43 @@ __urdlllocal ur_result_t UR_APICALL urDeviceWaitExp( return result; } +/////////////////////////////////////////////////////////////////////////////// +/// @brief Intercept function for urProgramDynamicLinkExp +__urdlllocal ur_result_t UR_APICALL urProgramDynamicLinkExp( + /// [in] handle of the context instance. + ur_context_handle_t hContext, + /// [in] number of program handles in `phPrograms`. + uint32_t count, + /// [in][range(0, count)] pointer to array of program handles. + const ur_program_handle_t *phPrograms) { + auto pfnDynamicLinkExp = + getContext()->urDdiTable.ProgramExp.pfnDynamicLinkExp; + + if (nullptr == pfnDynamicLinkExp) { + return UR_RESULT_ERROR_UNINITIALIZED; + } + + if (getContext()->enableParameterValidation) { + if (NULL == phPrograms) + return UR_RESULT_ERROR_INVALID_NULL_POINTER; + + if (NULL == hContext) + return UR_RESULT_ERROR_INVALID_NULL_HANDLE; + + if (count == 0) + return UR_RESULT_ERROR_INVALID_SIZE; + } + + if (getContext()->enableLifetimeValidation && + !getContext()->refCountContext->isReferenceValid(hContext)) { + URLOG_CTX_INVALID_REFERENCE(hContext); + } + + ur_result_t result = pfnDynamicLinkExp(hContext, count, phPrograms); + + return result; +} + /////////////////////////////////////////////////////////////////////////////// /// @brief Intercept function for urEnqueueTimestampRecordingExp __urdlllocal ur_result_t UR_APICALL urEnqueueTimestampRecordingExp( @@ -10692,6 +10729,8 @@ __urdlllocal ur_result_t UR_APICALL urProgramBuildExp( uint32_t numDevices, /// [in][range(0, numDevices)] pointer to array of device handles ur_device_handle_t *phDevices, + /// [in] program information flags + ur_exp_program_flags_t flags, /// [in][optional] pointer to build options null-terminated string. const char *pOptions) { auto pfnBuildExp = getContext()->urDdiTable.ProgramExp.pfnBuildExp; @@ -10706,6 +10745,9 @@ __urdlllocal ur_result_t UR_APICALL urProgramBuildExp( if (NULL == hProgram) return UR_RESULT_ERROR_INVALID_NULL_HANDLE; + + if (UR_EXP_PROGRAM_FLAGS_MASK & flags) + return UR_RESULT_ERROR_INVALID_ENUMERATION; } if (getContext()->enableLifetimeValidation && @@ -10713,7 +10755,8 @@ __urdlllocal ur_result_t UR_APICALL urProgramBuildExp( URLOG_CTX_INVALID_REFERENCE(hProgram); } - ur_result_t result = pfnBuildExp(hProgram, numDevices, phDevices, pOptions); + ur_result_t result = + pfnBuildExp(hProgram, numDevices, phDevices, flags, pOptions); return result; } @@ -10727,6 +10770,8 @@ __urdlllocal ur_result_t UR_APICALL urProgramCompileExp( uint32_t numDevices, /// [in][range(0, numDevices)] pointer to array of device handles ur_device_handle_t *phDevices, + /// [in] program information flags + ur_exp_program_flags_t flags, /// [in][optional] pointer to build options null-terminated string. const char *pOptions) { auto pfnCompileExp = getContext()->urDdiTable.ProgramExp.pfnCompileExp; @@ -10741,6 +10786,9 @@ __urdlllocal ur_result_t UR_APICALL urProgramCompileExp( if (NULL == hProgram) return UR_RESULT_ERROR_INVALID_NULL_HANDLE; + + if (UR_EXP_PROGRAM_FLAGS_MASK & flags) + return UR_RESULT_ERROR_INVALID_ENUMERATION; } if (getContext()->enableLifetimeValidation && @@ -10748,7 +10796,8 @@ __urdlllocal ur_result_t UR_APICALL urProgramCompileExp( URLOG_CTX_INVALID_REFERENCE(hProgram); } - ur_result_t result = pfnCompileExp(hProgram, numDevices, phDevices, pOptions); + ur_result_t result = + pfnCompileExp(hProgram, numDevices, phDevices, flags, pOptions); return result; } @@ -10762,6 +10811,8 @@ __urdlllocal ur_result_t UR_APICALL urProgramLinkExp( uint32_t numDevices, /// [in][range(0, numDevices)] pointer to array of device handles ur_device_handle_t *phDevices, + /// [in] program information flags + ur_exp_program_flags_t flags, /// [in] number of program handles in `phPrograms`. uint32_t count, /// [in][range(0, count)] pointer to array of program handles. @@ -10792,6 +10843,9 @@ __urdlllocal ur_result_t UR_APICALL urProgramLinkExp( if (NULL == hContext) return UR_RESULT_ERROR_INVALID_NULL_HANDLE; + if (UR_EXP_PROGRAM_FLAGS_MASK & flags) + return UR_RESULT_ERROR_INVALID_ENUMERATION; + if (count == 0) return UR_RESULT_ERROR_INVALID_SIZE; } @@ -10801,7 +10855,7 @@ __urdlllocal ur_result_t UR_APICALL urProgramLinkExp( URLOG_CTX_INVALID_REFERENCE(hContext); } - ur_result_t result = pfnLinkExp(hContext, numDevices, phDevices, count, + ur_result_t result = pfnLinkExp(hContext, numDevices, phDevices, flags, count, phPrograms, pOptions, phProgram); return result; @@ -12197,6 +12251,9 @@ UR_DLLEXPORT ur_result_t UR_APICALL urGetProgramExpProcAddrTable( ur_result_t result = UR_RESULT_SUCCESS; + dditable.pfnDynamicLinkExp = pDdiTable->pfnDynamicLinkExp; + pDdiTable->pfnDynamicLinkExp = ur_validation_layer::urProgramDynamicLinkExp; + dditable.pfnBuildExp = pDdiTable->pfnBuildExp; pDdiTable->pfnBuildExp = ur_validation_layer::urProgramBuildExp; diff --git a/unified-runtime/source/loader/loader.def.in b/unified-runtime/source/loader/loader.def.in index 674937d4e9ac3..f2b29da873ece 100644 --- a/unified-runtime/source/loader/loader.def.in +++ b/unified-runtime/source/loader/loader.def.in @@ -358,6 +358,7 @@ EXPORTS urPrintExpImageCopyRegion urPrintExpImageMemType urPrintExpPeerInfo + urPrintExpProgramFlags urPrintExpSamplerAddrModes urPrintExpSamplerCubemapFilterMode urPrintExpSamplerCubemapProperties @@ -463,6 +464,7 @@ EXPORTS urPrintProgramCreateWithBinaryParams urPrintProgramCreateWithIlParams urPrintProgramCreateWithNativeHandleParams + urPrintProgramDynamicLinkExpParams urPrintProgramGetBuildInfoParams urPrintProgramGetFunctionPointerParams urPrintProgramGetGlobalVariablePointerParams @@ -564,6 +566,7 @@ EXPORTS urProgramCreateWithBinary urProgramCreateWithIL urProgramCreateWithNativeHandle + urProgramDynamicLinkExp urProgramGetBuildInfo urProgramGetFunctionPointer urProgramGetGlobalVariablePointer diff --git a/unified-runtime/source/loader/loader.map.in b/unified-runtime/source/loader/loader.map.in index dcc2db78188e6..1ace5a0644088 100644 --- a/unified-runtime/source/loader/loader.map.in +++ b/unified-runtime/source/loader/loader.map.in @@ -358,6 +358,7 @@ urPrintExpImageCopyRegion; urPrintExpImageMemType; urPrintExpPeerInfo; + urPrintExpProgramFlags; urPrintExpSamplerAddrModes; urPrintExpSamplerCubemapFilterMode; urPrintExpSamplerCubemapProperties; @@ -463,6 +464,7 @@ urPrintProgramCreateWithBinaryParams; urPrintProgramCreateWithIlParams; urPrintProgramCreateWithNativeHandleParams; + urPrintProgramDynamicLinkExpParams; urPrintProgramGetBuildInfoParams; urPrintProgramGetFunctionPointerParams; urPrintProgramGetGlobalVariablePointerParams; @@ -564,6 +566,7 @@ urProgramCreateWithBinary; urProgramCreateWithIL; urProgramCreateWithNativeHandle; + urProgramDynamicLinkExp; urProgramGetBuildInfo; urProgramGetFunctionPointer; urProgramGetGlobalVariablePointer; diff --git a/unified-runtime/source/loader/ur_ldrddi.cpp b/unified-runtime/source/loader/ur_ldrddi.cpp index 98809dc348cd1..e5fb500a0fcfb 100644 --- a/unified-runtime/source/loader/ur_ldrddi.cpp +++ b/unified-runtime/source/loader/ur_ldrddi.cpp @@ -5431,6 +5431,26 @@ __urdlllocal ur_result_t UR_APICALL urDeviceWaitExp( return pfnWaitExp(hDevice); } +/////////////////////////////////////////////////////////////////////////////// +/// @brief Intercept function for urProgramDynamicLinkExp +__urdlllocal ur_result_t UR_APICALL urProgramDynamicLinkExp( + /// [in] handle of the context instance. + ur_context_handle_t hContext, + /// [in] number of program handles in `phPrograms`. + uint32_t count, + /// [in][range(0, count)] pointer to array of program handles. + const ur_program_handle_t *phPrograms) { + + auto *dditable = *reinterpret_cast(hContext); + + auto *pfnDynamicLinkExp = dditable->ProgramExp.pfnDynamicLinkExp; + if (nullptr == pfnDynamicLinkExp) + return UR_RESULT_ERROR_UNINITIALIZED; + + // forward to device-platform + return pfnDynamicLinkExp(hContext, count, phPrograms); +} + /////////////////////////////////////////////////////////////////////////////// /// @brief Intercept function for urEnqueueTimestampRecordingExp __urdlllocal ur_result_t UR_APICALL urEnqueueTimestampRecordingExp( @@ -5642,6 +5662,8 @@ __urdlllocal ur_result_t UR_APICALL urProgramBuildExp( uint32_t numDevices, /// [in][range(0, numDevices)] pointer to array of device handles ur_device_handle_t *phDevices, + /// [in] program information flags + ur_exp_program_flags_t flags, /// [in][optional] pointer to build options null-terminated string. const char *pOptions) { @@ -5652,7 +5674,7 @@ __urdlllocal ur_result_t UR_APICALL urProgramBuildExp( return UR_RESULT_ERROR_UNINITIALIZED; // forward to device-platform - return pfnBuildExp(hProgram, numDevices, phDevices, pOptions); + return pfnBuildExp(hProgram, numDevices, phDevices, flags, pOptions); } /////////////////////////////////////////////////////////////////////////////// @@ -5664,6 +5686,8 @@ __urdlllocal ur_result_t UR_APICALL urProgramCompileExp( uint32_t numDevices, /// [in][range(0, numDevices)] pointer to array of device handles ur_device_handle_t *phDevices, + /// [in] program information flags + ur_exp_program_flags_t flags, /// [in][optional] pointer to build options null-terminated string. const char *pOptions) { @@ -5674,7 +5698,7 @@ __urdlllocal ur_result_t UR_APICALL urProgramCompileExp( return UR_RESULT_ERROR_UNINITIALIZED; // forward to device-platform - return pfnCompileExp(hProgram, numDevices, phDevices, pOptions); + return pfnCompileExp(hProgram, numDevices, phDevices, flags, pOptions); } /////////////////////////////////////////////////////////////////////////////// @@ -5686,6 +5710,8 @@ __urdlllocal ur_result_t UR_APICALL urProgramLinkExp( uint32_t numDevices, /// [in][range(0, numDevices)] pointer to array of device handles ur_device_handle_t *phDevices, + /// [in] program information flags + ur_exp_program_flags_t flags, /// [in] number of program handles in `phPrograms`. uint32_t count, /// [in][range(0, count)] pointer to array of program handles. @@ -5704,7 +5730,7 @@ __urdlllocal ur_result_t UR_APICALL urProgramLinkExp( return UR_RESULT_ERROR_UNINITIALIZED; // forward to device-platform - return pfnLinkExp(hContext, numDevices, phDevices, count, phPrograms, + return pfnLinkExp(hContext, numDevices, phDevices, flags, count, phPrograms, pOptions, phProgram); } @@ -6898,6 +6924,7 @@ UR_DLLEXPORT ur_result_t UR_APICALL urGetProgramExpProcAddrTable( if (ur_loader::getContext()->platforms.size() != 1 || ur_loader::getContext()->forceIntercept) { // return pointers to loader's DDIs + pDdiTable->pfnDynamicLinkExp = ur_loader::urProgramDynamicLinkExp; pDdiTable->pfnBuildExp = ur_loader::urProgramBuildExp; pDdiTable->pfnCompileExp = ur_loader::urProgramCompileExp; pDdiTable->pfnLinkExp = ur_loader::urProgramLinkExp; diff --git a/unified-runtime/source/loader/ur_libapi.cpp b/unified-runtime/source/loader/ur_libapi.cpp index 70ae105c5a94f..210fcb3009805 100644 --- a/unified-runtime/source/loader/ur_libapi.cpp +++ b/unified-runtime/source/loader/ur_libapi.cpp @@ -9988,6 +9988,51 @@ ur_result_t UR_APICALL urDeviceWaitExp( return exceptionToResult(std::current_exception()); } +/////////////////////////////////////////////////////////////////////////////// +/// @brief Creates dynamic links between exported and imported symbols in one or +/// more programs. +/// +/// @details +/// - The application may call this function from simultaneous threads. +/// - Following a successful call to this entry point the programs in +/// `phPrograms` will have all external symbols resolved and kernels +/// inside these programs would be ready for use. +/// +/// @returns +/// - ::UR_RESULT_SUCCESS +/// - ::UR_RESULT_ERROR_UNINITIALIZED +/// - ::UR_RESULT_ERROR_DEVICE_LOST +/// - ::UR_RESULT_ERROR_ADAPTER_SPECIFIC +/// - ::UR_RESULT_ERROR_INVALID_NULL_HANDLE +/// + `NULL == hContext` +/// - ::UR_RESULT_ERROR_INVALID_NULL_POINTER +/// + `NULL == phPrograms` +/// - ::UR_RESULT_ERROR_INVALID_PROGRAM +/// + If one of the programs in `phPrograms` isn't a valid program +/// object. +/// - ::UR_RESULT_ERROR_INVALID_SIZE +/// + `count == 0` +/// - ::UR_RESULT_ERROR_PROGRAM_LINK_FAILURE +/// + If an error occurred while linking `phPrograms`. +/// - ::UR_RESULT_ERROR_UNSUPPORTED_FEATURE +/// + If the adapter has no means to support the operation. +ur_result_t UR_APICALL urProgramDynamicLinkExp( + /// [in] handle of the context instance. + ur_context_handle_t hContext, + /// [in] number of program handles in `phPrograms`. + uint32_t count, + /// [in][range(0, count)] pointer to array of program handles. + const ur_program_handle_t *phPrograms) try { + auto pfnDynamicLinkExp = + ur_lib::getContext()->urDdiTable.ProgramExp.pfnDynamicLinkExp; + if (nullptr == pfnDynamicLinkExp) + return UR_RESULT_ERROR_UNINITIALIZED; + + return pfnDynamicLinkExp(hContext, count, phPrograms); +} catch (...) { + return exceptionToResult(std::current_exception()); +} + /////////////////////////////////////////////////////////////////////////////// /// @brief Enqueue a command for recording the device timestamp /// @@ -10348,6 +10393,8 @@ ur_result_t UR_APICALL urMemoryExportExportMemoryHandleExp( /// + `NULL == hProgram` /// - ::UR_RESULT_ERROR_INVALID_NULL_POINTER /// + `NULL == phDevices` +/// - ::UR_RESULT_ERROR_INVALID_ENUMERATION +/// + `::UR_EXP_PROGRAM_FLAGS_MASK & flags` /// - ::UR_RESULT_ERROR_INVALID_PROGRAM /// + If `hProgram` isn't a valid program object. /// - ::UR_RESULT_ERROR_PROGRAM_BUILD_FAILURE @@ -10359,13 +10406,15 @@ ur_result_t UR_APICALL urProgramBuildExp( uint32_t numDevices, /// [in][range(0, numDevices)] pointer to array of device handles ur_device_handle_t *phDevices, + /// [in] program information flags + ur_exp_program_flags_t flags, /// [in][optional] pointer to build options null-terminated string. const char *pOptions) try { auto pfnBuildExp = ur_lib::getContext()->urDdiTable.ProgramExp.pfnBuildExp; if (nullptr == pfnBuildExp) return UR_RESULT_ERROR_UNINITIALIZED; - return pfnBuildExp(hProgram, numDevices, phDevices, pOptions); + return pfnBuildExp(hProgram, numDevices, phDevices, flags, pOptions); } catch (...) { return exceptionToResult(std::current_exception()); } @@ -10392,6 +10441,8 @@ ur_result_t UR_APICALL urProgramBuildExp( /// + `NULL == hProgram` /// - ::UR_RESULT_ERROR_INVALID_NULL_POINTER /// + `NULL == phDevices` +/// - ::UR_RESULT_ERROR_INVALID_ENUMERATION +/// + `::UR_EXP_PROGRAM_FLAGS_MASK & flags` /// - ::UR_RESULT_ERROR_INVALID_PROGRAM /// + If `hProgram` isn't a valid program object. /// - ::UR_RESULT_ERROR_PROGRAM_BUILD_FAILURE @@ -10403,6 +10454,8 @@ ur_result_t UR_APICALL urProgramCompileExp( uint32_t numDevices, /// [in][range(0, numDevices)] pointer to array of device handles ur_device_handle_t *phDevices, + /// [in] program information flags + ur_exp_program_flags_t flags, /// [in][optional] pointer to build options null-terminated string. const char *pOptions) try { auto pfnCompileExp = @@ -10410,7 +10463,7 @@ ur_result_t UR_APICALL urProgramCompileExp( if (nullptr == pfnCompileExp) return UR_RESULT_ERROR_UNINITIALIZED; - return pfnCompileExp(hProgram, numDevices, phDevices, pOptions); + return pfnCompileExp(hProgram, numDevices, phDevices, flags, pOptions); } catch (...) { return exceptionToResult(std::current_exception()); } @@ -10445,6 +10498,8 @@ ur_result_t UR_APICALL urProgramCompileExp( /// + `NULL == phDevices` /// + `NULL == phPrograms` /// + `NULL == phProgram` +/// - ::UR_RESULT_ERROR_INVALID_ENUMERATION +/// + `::UR_EXP_PROGRAM_FLAGS_MASK & flags` /// - ::UR_RESULT_ERROR_INVALID_PROGRAM /// + If one of the programs in `phPrograms` isn't a valid program /// object. @@ -10459,6 +10514,8 @@ ur_result_t UR_APICALL urProgramLinkExp( uint32_t numDevices, /// [in][range(0, numDevices)] pointer to array of device handles ur_device_handle_t *phDevices, + /// [in] program information flags + ur_exp_program_flags_t flags, /// [in] number of program handles in `phPrograms`. uint32_t count, /// [in][range(0, count)] pointer to array of program handles. @@ -10474,7 +10531,7 @@ ur_result_t UR_APICALL urProgramLinkExp( if (nullptr == pfnLinkExp) return UR_RESULT_ERROR_UNINITIALIZED; - return pfnLinkExp(hContext, numDevices, phDevices, count, phPrograms, + return pfnLinkExp(hContext, numDevices, phDevices, flags, count, phPrograms, pOptions, phProgram); } catch (...) { return exceptionToResult(std::current_exception()); diff --git a/unified-runtime/source/loader/ur_print.cpp b/unified-runtime/source/loader/ur_print.cpp index 7e737bd641c40..04d6efe8a401a 100644 --- a/unified-runtime/source/loader/ur_print.cpp +++ b/unified-runtime/source/loader/ur_print.cpp @@ -1140,6 +1140,14 @@ ur_result_t urPrintExpCommandBufferUpdateKernelLaunchDesc( return str_copy(&ss, buffer, buff_size, out_size); } +ur_result_t urPrintExpProgramFlags(enum ur_exp_program_flag_t value, + char *buffer, const size_t buff_size, + size_t *out_size) { + std::stringstream ss; + ss << value; + return str_copy(&ss, buffer, buff_size, out_size); +} + ur_result_t urPrintExpPeerInfo(enum ur_exp_peer_info_t value, char *buffer, const size_t buff_size, size_t *out_size) { std::stringstream ss; @@ -2470,6 +2478,14 @@ urPrintProgramBuildParams(const struct ur_program_build_params_t *params, return str_copy(&ss, buffer, buff_size, out_size); } +ur_result_t urPrintProgramDynamicLinkExpParams( + const struct ur_program_dynamic_link_exp_params_t *params, char *buffer, + const size_t buff_size, size_t *out_size) { + std::stringstream ss; + ss << params; + return str_copy(&ss, buffer, buff_size, out_size); +} + ur_result_t urPrintProgramBuildExpParams(const struct ur_program_build_exp_params_t *params, char *buffer, const size_t buff_size, diff --git a/unified-runtime/source/ur_api.cpp b/unified-runtime/source/ur_api.cpp index 8a86df1a208e0..fafc7e205b5cb 100644 --- a/unified-runtime/source/ur_api.cpp +++ b/unified-runtime/source/ur_api.cpp @@ -8697,6 +8697,45 @@ ur_result_t UR_APICALL urDeviceWaitExp( return result; } +/////////////////////////////////////////////////////////////////////////////// +/// @brief Creates dynamic links between exported and imported symbols in one or +/// more programs. +/// +/// @details +/// - The application may call this function from simultaneous threads. +/// - Following a successful call to this entry point the programs in +/// `phPrograms` will have all external symbols resolved and kernels +/// inside these programs would be ready for use. +/// +/// @returns +/// - ::UR_RESULT_SUCCESS +/// - ::UR_RESULT_ERROR_UNINITIALIZED +/// - ::UR_RESULT_ERROR_DEVICE_LOST +/// - ::UR_RESULT_ERROR_ADAPTER_SPECIFIC +/// - ::UR_RESULT_ERROR_INVALID_NULL_HANDLE +/// + `NULL == hContext` +/// - ::UR_RESULT_ERROR_INVALID_NULL_POINTER +/// + `NULL == phPrograms` +/// - ::UR_RESULT_ERROR_INVALID_PROGRAM +/// + If one of the programs in `phPrograms` isn't a valid program +/// object. +/// - ::UR_RESULT_ERROR_INVALID_SIZE +/// + `count == 0` +/// - ::UR_RESULT_ERROR_PROGRAM_LINK_FAILURE +/// + If an error occurred while linking `phPrograms`. +/// - ::UR_RESULT_ERROR_UNSUPPORTED_FEATURE +/// + If the adapter has no means to support the operation. +ur_result_t UR_APICALL urProgramDynamicLinkExp( + /// [in] handle of the context instance. + ur_context_handle_t hContext, + /// [in] number of program handles in `phPrograms`. + uint32_t count, + /// [in][range(0, count)] pointer to array of program handles. + const ur_program_handle_t *phPrograms) { + ur_result_t result = UR_RESULT_SUCCESS; + return result; +} + /////////////////////////////////////////////////////////////////////////////// /// @brief Enqueue a command for recording the device timestamp /// @@ -9002,6 +9041,8 @@ ur_result_t UR_APICALL urMemoryExportExportMemoryHandleExp( /// + `NULL == hProgram` /// - ::UR_RESULT_ERROR_INVALID_NULL_POINTER /// + `NULL == phDevices` +/// - ::UR_RESULT_ERROR_INVALID_ENUMERATION +/// + `::UR_EXP_PROGRAM_FLAGS_MASK & flags` /// - ::UR_RESULT_ERROR_INVALID_PROGRAM /// + If `hProgram` isn't a valid program object. /// - ::UR_RESULT_ERROR_PROGRAM_BUILD_FAILURE @@ -9013,6 +9054,8 @@ ur_result_t UR_APICALL urProgramBuildExp( uint32_t numDevices, /// [in][range(0, numDevices)] pointer to array of device handles ur_device_handle_t *phDevices, + /// [in] program information flags + ur_exp_program_flags_t flags, /// [in][optional] pointer to build options null-terminated string. const char *pOptions) { ur_result_t result = UR_RESULT_SUCCESS; @@ -9041,6 +9084,8 @@ ur_result_t UR_APICALL urProgramBuildExp( /// + `NULL == hProgram` /// - ::UR_RESULT_ERROR_INVALID_NULL_POINTER /// + `NULL == phDevices` +/// - ::UR_RESULT_ERROR_INVALID_ENUMERATION +/// + `::UR_EXP_PROGRAM_FLAGS_MASK & flags` /// - ::UR_RESULT_ERROR_INVALID_PROGRAM /// + If `hProgram` isn't a valid program object. /// - ::UR_RESULT_ERROR_PROGRAM_BUILD_FAILURE @@ -9052,6 +9097,8 @@ ur_result_t UR_APICALL urProgramCompileExp( uint32_t numDevices, /// [in][range(0, numDevices)] pointer to array of device handles ur_device_handle_t *phDevices, + /// [in] program information flags + ur_exp_program_flags_t flags, /// [in][optional] pointer to build options null-terminated string. const char *pOptions) { ur_result_t result = UR_RESULT_SUCCESS; @@ -9088,6 +9135,8 @@ ur_result_t UR_APICALL urProgramCompileExp( /// + `NULL == phDevices` /// + `NULL == phPrograms` /// + `NULL == phProgram` +/// - ::UR_RESULT_ERROR_INVALID_ENUMERATION +/// + `::UR_EXP_PROGRAM_FLAGS_MASK & flags` /// - ::UR_RESULT_ERROR_INVALID_PROGRAM /// + If one of the programs in `phPrograms` isn't a valid program /// object. @@ -9102,6 +9151,8 @@ ur_result_t UR_APICALL urProgramLinkExp( uint32_t numDevices, /// [in][range(0, numDevices)] pointer to array of device handles ur_device_handle_t *phDevices, + /// [in] program information flags + ur_exp_program_flags_t flags, /// [in] number of program handles in `phPrograms`. uint32_t count, /// [in][range(0, count)] pointer to array of program handles. diff --git a/unified-runtime/test/conformance/program/urMultiDeviceProgramCreateWithBinary.cpp b/unified-runtime/test/conformance/program/urMultiDeviceProgramCreateWithBinary.cpp index 5f541fc9bc98f..a6631b8a89b4c 100644 --- a/unified-runtime/test/conformance/program/urMultiDeviceProgramCreateWithBinary.cpp +++ b/unified-runtime/test/conformance/program/urMultiDeviceProgramCreateWithBinary.cpp @@ -135,20 +135,23 @@ TEST_P(urMultiDeviceProgramCreateWithBinaryTest, MultipleBuildCalls) { auto second_subset = std::vector( devices.begin() + devices.size() / 2, devices.end()); ASSERT_SUCCESS(urProgramBuildExp(binary_program, first_subset.size(), - first_subset.data(), nullptr)); + first_subset.data(), + ur_exp_program_flags_t{}, nullptr)); auto kernelName = uur::KernelsEnvironment::instance->GetEntryPointNames("foo")[0]; uur::raii::Kernel kernel; ASSERT_SUCCESS( urKernelCreate(binary_program, kernelName.data(), kernel.ptr())); ASSERT_SUCCESS(urProgramBuildExp(binary_program, second_subset.size(), - second_subset.data(), nullptr)); + second_subset.data(), + ur_exp_program_flags_t{}, nullptr)); ASSERT_SUCCESS( urKernelCreate(binary_program, kernelName.data(), kernel.ptr())); // Building for the same subset of devices should not fail. ASSERT_SUCCESS(urProgramBuildExp(binary_program, first_subset.size(), - first_subset.data(), nullptr)); + first_subset.data(), + ur_exp_program_flags_t{}, nullptr)); } // Test the case we get native binaries from program created with multiple diff --git a/unified-runtime/test/conformance/program/urMultiDeviceProgramCreateWithIL.cpp b/unified-runtime/test/conformance/program/urMultiDeviceProgramCreateWithIL.cpp index 071a3ba901d7c..8630cb355405b 100644 --- a/unified-runtime/test/conformance/program/urMultiDeviceProgramCreateWithIL.cpp +++ b/unified-runtime/test/conformance/program/urMultiDeviceProgramCreateWithIL.cpp @@ -32,8 +32,8 @@ TEST_P(urMultiDeviceProgramTest, urMultiDeviceProgramGetInfo) { auto subset = std::vector( associated_devices.begin(), associated_devices.begin() + associated_devices.size() / 2); - ASSERT_SUCCESS( - urProgramBuildExp(program, subset.size(), subset.data(), nullptr)); + ASSERT_SUCCESS(urProgramBuildExp(program, subset.size(), subset.data(), + ur_exp_program_flags_t{}, nullptr)); std::vector binary_sizes(associated_devices.size()); ASSERT_SUCCESS(urProgramGetInfo(program, UR_PROGRAM_INFO_BINARY_SIZES, @@ -84,8 +84,8 @@ TEST_P(urMultiDeviceProgramTest, urMultiDeviceProgramGetInfoBinaries) { } // Build program for the second device only. - ASSERT_SUCCESS( - urProgramBuildExp(program, 1, associated_devices.data() + 1, nullptr)); + ASSERT_SUCCESS(urProgramBuildExp(program, 1, associated_devices.data() + 1, + ur_exp_program_flags_t{}, nullptr)); std::vector binary_sizes(associated_devices.size()); ASSERT_SUCCESS(urProgramGetInfo(program, UR_PROGRAM_INFO_BINARY_SIZES, binary_sizes.size() * sizeof(size_t), @@ -108,6 +108,7 @@ TEST_P(urMultiDeviceProgramTest, urMultiDeviceProgramGetInfoBinaries) { pointers.data() + 1, nullptr, &program_from_binary)); ASSERT_NE(program_from_binary, nullptr); ASSERT_SUCCESS(urProgramBuildExp(program_from_binary, 1, - associated_devices.data() + 1, nullptr)); + associated_devices.data() + 1, + ur_exp_program_flags_t{}, nullptr)); ASSERT_SUCCESS(urProgramRelease(program_from_binary)); } diff --git a/unified-runtime/tools/urinfo/urinfo.hpp b/unified-runtime/tools/urinfo/urinfo.hpp index f9d4fbeca2b67..a058b8e69a07c 100644 --- a/unified-runtime/tools/urinfo/urinfo.hpp +++ b/unified-runtime/tools/urinfo/urinfo.hpp @@ -467,6 +467,8 @@ inline void printDeviceInfos(ur_device_handle_t hDevice, std::cout << prefix; printDeviceInfo(hDevice, UR_DEVICE_INFO_DEVICE_WAIT_SUPPORT_EXP); std::cout << prefix; + printDeviceInfo(hDevice, UR_DEVICE_INFO_DYNAMIC_LINK_SUPPORT_EXP); + std::cout << prefix; printDeviceInfo(hDevice, UR_DEVICE_INFO_USM_CONTEXT_MEMCPY_SUPPORT_EXP); std::cout << prefix;