From ca13763d6850f0073f0e59089796f82bf765e91a Mon Sep 17 00:00:00 2001 From: "Raasz, Pawel" Date: Mon, 8 Dec 2025 14:27:39 +0000 Subject: [PATCH 1/3] Use custom implementation of string <-> wstring conversion Signed-off-by: Raasz, Pawel --- src/common/util/src/wstring_convert_util.cpp | 101 +++++++++++++++---- 1 file changed, 79 insertions(+), 22 deletions(-) diff --git a/src/common/util/src/wstring_convert_util.cpp b/src/common/util/src/wstring_convert_util.cpp index a8411092767e2b..d807fb94afef14 100644 --- a/src/common/util/src/wstring_convert_util.cpp +++ b/src/common/util/src/wstring_convert_util.cpp @@ -3,34 +3,59 @@ // #include "openvino/util/wstring_convert_util.hpp" +#ifdef _WIN32 +# include +#endif +namespace ov::util { #ifdef OPENVINO_ENABLE_UNICODE_PATH_SUPPORT -# include -# include +constexpr auto value_mask = 0x3FU; +constexpr auto codepoint_2nd_shift = 6U; +constexpr auto codepoint_3rd_shift = 12U; +constexpr auto codepoint_4th_shift = 18U; -# ifdef _WIN32 -# include -# endif - -# if defined(__clang__) || defined(__GNUC__) -# pragma GCC diagnostic push -# pragma GCC diagnostic ignored "-Wdeprecated-declarations" -# endif - -std::string ov::util::wstring_to_string(const std::wstring& wstr) { +std::string wstring_to_string(const std::wstring& wstr) { # ifdef _WIN32 int size_needed = WideCharToMultiByte(CP_UTF8, 0, &wstr[0], (int)wstr.size(), NULL, 0, NULL, NULL); std::string strTo(size_needed, 0); WideCharToMultiByte(CP_UTF8, 0, &wstr[0], (int)wstr.size(), &strTo[0], size_needed, NULL, NULL); return strTo; # else - std::wstring_convert> wstring_decoder; - return wstring_decoder.to_bytes(wstr); + std::string result; + result.reserve(wstr.size() * (sizeof(wchar_t) >= 4 ? 4 : 3)); // Worst case for UTF-8 + + for (const auto& wc : wstr) { + uint32_t codepoint = static_cast(wc); + + if (codepoint <= 0x7FU) { + // 1-byte sequence (ASCII) + result.push_back(static_cast(codepoint)); + } else if (codepoint <= 0x7FFU) { + // 2-byte sequence + result.push_back(static_cast(0xC0U | ((codepoint >> codepoint_2nd_shift) & 0x1FU))); + result.push_back(static_cast(0x80U | (codepoint & value_mask))); + } else if (codepoint <= 0xFFFFU) { + // 3-byte sequence + result.push_back(static_cast(0xE0U | ((codepoint >> codepoint_3rd_shift) & 0x0FU))); + result.push_back(static_cast(0x80U | ((codepoint >> codepoint_2nd_shift) & value_mask))); + result.push_back(static_cast(0x80U | (codepoint & value_mask))); + } else if (codepoint <= 0x10FFFFU) { + // 4-byte sequence + result.push_back(static_cast(0xF0U | ((codepoint >> codepoint_4th_shift) & 0x07U))); + result.push_back(static_cast(0x80U | ((codepoint >> codepoint_3rd_shift) & value_mask))); + result.push_back(static_cast(0x80U | ((codepoint >> codepoint_2nd_shift) & value_mask))); + result.push_back(static_cast(0x80U | (codepoint & value_mask))); + } else { + throw std::runtime_error("Invalid Unicode codepoint"); + } + } + result.shrink_to_fit(); + return result; # endif } -std::wstring ov::util::string_to_wstring(const std::string& string) { +std::wstring string_to_wstring(const std::string& string) { const char* str = string.c_str(); # ifdef _WIN32 int strSize = static_cast(std::strlen(str)); @@ -39,14 +64,46 @@ std::wstring ov::util::string_to_wstring(const std::string& string) { MultiByteToWideChar(CP_UTF8, 0, str, strSize, &wstrTo[0], size_needed); return wstrTo; # else - std::wstring_convert> wstring_encoder; - std::wstring result = wstring_encoder.from_bytes(str); + + const auto check_utf8_seq_size = [](const char* first, const char* last, const std::ptrdiff_t seq_size) { + if (seq_size > std::distance(first, last)) { + throw std::runtime_error("Invalid UTF-8 sequence"); + } + }; + + std::wstring result; + result.reserve(string.size()); + for (const auto last = str + string.size(); str < last;) { + auto codepoint = static_cast(*str++); + if (codepoint <= 0x7FU) { + // 1-byte sequence, nothing to do + } else if ((codepoint & 0xE0U) == 0xC0U) { + // 2-byte sequence + check_utf8_seq_size(str, last, 1); + codepoint = (codepoint & 0x1FU) << codepoint_2nd_shift; + codepoint |= (static_cast(*str++) & value_mask); + } else if ((codepoint & 0xF0U) == 0xE0U) { + // 3-byte sequence + check_utf8_seq_size(str, last, 2); + codepoint = (codepoint & 0x0FU) << codepoint_3rd_shift; + codepoint |= (static_cast(*str++) & value_mask) << codepoint_2nd_shift; + codepoint |= (static_cast(*str++) & value_mask); + } else if ((codepoint & 0xF8U) == 0xF0U) { + // 4-byte sequence + check_utf8_seq_size(str, last, 3); + codepoint = (codepoint & 0x07U) << codepoint_4th_shift; + codepoint |= (static_cast(*str++) & value_mask) << codepoint_3rd_shift; + codepoint |= (static_cast(*str++) & value_mask) << codepoint_2nd_shift; + codepoint |= (static_cast(*str++) & value_mask); + } else { + throw std::runtime_error("Invalid UTF-8 byte"); + } + + result.push_back(static_cast(codepoint)); + } + return result; # endif } - -# if defined(__clang__) || defined(__GNUC__) -# pragma GCC diagnostic pop -# endif - #endif // OPENVINO_ENABLE_UNICODE_PATH_SUPPORT +} // namespace ov::util From 056b1f18c91bf0f36965cd830ce11464d83becb1 Mon Sep 17 00:00:00 2001 From: "Raasz, Pawel" Date: Mon, 8 Dec 2025 14:38:43 +0000 Subject: [PATCH 2/3] Add missing include Signed-off-by: Raasz, Pawel --- src/common/util/src/wstring_convert_util.cpp | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/common/util/src/wstring_convert_util.cpp b/src/common/util/src/wstring_convert_util.cpp index d807fb94afef14..060ff2ccdc8bae 100644 --- a/src/common/util/src/wstring_convert_util.cpp +++ b/src/common/util/src/wstring_convert_util.cpp @@ -3,6 +3,8 @@ // #include "openvino/util/wstring_convert_util.hpp" + +#include #ifdef _WIN32 # include #endif From 9e6a3501dca025ac1739cc29bc69c52da6f6b299 Mon Sep 17 00:00:00 2001 From: "Raasz, Pawel" Date: Mon, 8 Dec 2025 14:49:50 +0000 Subject: [PATCH 3/3] Update include in wstring convert util Signed-off-by: Raasz, Pawel --- src/common/util/src/wstring_convert_util.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/src/common/util/src/wstring_convert_util.cpp b/src/common/util/src/wstring_convert_util.cpp index 060ff2ccdc8bae..f3e8fbb5d5a537 100644 --- a/src/common/util/src/wstring_convert_util.cpp +++ b/src/common/util/src/wstring_convert_util.cpp @@ -4,6 +4,7 @@ #include "openvino/util/wstring_convert_util.hpp" +#include #include #ifdef _WIN32 # include