diff --git a/onnxruntime/core/providers/openvino/backend_utils.cc b/onnxruntime/core/providers/openvino/backend_utils.cc index 73fbe9a0fa76f..7027861f0c4dc 100644 --- a/onnxruntime/core/providers/openvino/backend_utils.cc +++ b/onnxruntime/core/providers/openvino/backend_utils.cc @@ -150,6 +150,11 @@ CreateOVModel(std::string&& model, LOGS_DEFAULT(INFO) << log_tag << "Reshaping the ov tensor to specified shape"; ov_model->reshape(session_context.reshape); } + + if (!session_context.layout.empty()) { + LOGS_DEFAULT(INFO) << log_tag << "Setting the ov tensor layout to specified layout"; + ov_model = Set_Layout(ov_model, session_context.layout); + } // Check for Constant Folding if ((session_context.device_type != "NPU") && !session_context.is_wholly_supported_graph) { ov::pass::ConstantFolding pass_const_obj; @@ -199,6 +204,41 @@ GetOutputTensor(Ort::KernelContext& context, return context.GetOutput(index, output_shape); } +std::shared_ptr Set_Layout(std::shared_ptr ov_model, const layout_t& layout) { + ov::preprocess::PrePostProcessor preproc(ov_model); + + const auto& inputs = ov_model->inputs(); + const auto& outputs = ov_model->outputs(); + + auto find_tensor_index = [](const std::vector>& tensors, const std::string& name) -> std::optional { + for (size_t i = 0; i < tensors.size(); ++i) { + const auto& tensor = tensors[i]; + if (tensor.get_any_name() == name || tensor.get_tensor().get_names().count(name) > 0) { + return i; + } + } + return std::nullopt; + }; + + for (const auto& [tensor_name, layout_value] : layout) { + bool tensor_found = false; + + if (auto input_idx = find_tensor_index(inputs, tensor_name)) { + preproc.input(*input_idx).tensor().set_layout(layout_value); + tensor_found = true; + } else if (auto output_idx = find_tensor_index(outputs, tensor_name)) { + preproc.output(*output_idx).tensor().set_layout(layout_value); + tensor_found = true; + } + + if (!tensor_found) { + LOGS_DEFAULT(WARNING) << "Tensor '" << tensor_name << "' not found in model inputs or outputs"; + } + } + + return preproc.build(); +} + int GetFirstAvailableDevice(SessionContext& session_context) { int i = 0; // Get the first available VAD-M device and set the device to busy diff --git a/onnxruntime/core/providers/openvino/backend_utils.h b/onnxruntime/core/providers/openvino/backend_utils.h index 15145df651fa2..27f791c7a5bd1 100644 --- a/onnxruntime/core/providers/openvino/backend_utils.h +++ b/onnxruntime/core/providers/openvino/backend_utils.h @@ -79,6 +79,8 @@ int GetFirstAvailableDevice(SessionContext& session_context); void FillOutputsWithConstantData(std::shared_ptr node, Ort::UnownedValue& out_tensor); +std::shared_ptr Set_Layout(std::shared_ptr ov_model, const layout_t& layout); + template void FillOutputHelper(Ort::UnownedValue& out_tensor, std::shared_ptr node); diff --git a/onnxruntime/core/providers/openvino/backends/basic_backend.cc b/onnxruntime/core/providers/openvino/backends/basic_backend.cc index 6efd866d47c3c..0f939437109a6 100644 --- a/onnxruntime/core/providers/openvino/backends/basic_backend.cc +++ b/onnxruntime/core/providers/openvino/backends/basic_backend.cc @@ -98,6 +98,7 @@ BasicBackend::BasicBackend(std::unique_ptr& model_pr !subgraph_context_.has_dynamic_input_shape && !session_context_.so_context_enable && session_context_.reshape.empty() && + session_context_.layout.empty() && !enable_causallm && !eligible_for_cpu_fallback && auto_unified_compile); diff --git a/onnxruntime/core/providers/openvino/contexts.h b/onnxruntime/core/providers/openvino/contexts.h index 6a2b375d733f9..07b09899ac214 100644 --- a/onnxruntime/core/providers/openvino/contexts.h +++ b/onnxruntime/core/providers/openvino/contexts.h @@ -70,6 +70,7 @@ class SharedContext : public WeakSingleton { using config_t = std::map; using reshape_t = std::map; +using layout_t = std::map; struct ProviderInfo { std::string device_type{""}; // [device_type]: Overrides the accelerator hardware type and @@ -88,6 +89,7 @@ struct ProviderInfo { // (GPU) feature. If blob files are already present, // it will be directly loaded. reshape_t reshape{}; // Used for reshaping the ov input tensor shape at runtime. + layout_t layout{}; // Used for specifying the ov input/output tensor layout at runtime. std::string model_priority{"DEFAULT"}; // High-level OpenVINO model priority hint // Defines what model should be provided with more performant // bounded resource first @@ -110,7 +112,7 @@ struct ProviderInfo { const ConfigOptions* config_options{NULL}; const std::unordered_set valid_provider_keys = {"device_type", "device_id", "device_luid", "cache_dir", "precision", "load_config", "context", "num_of_threads", "model_priority", "num_streams", "enable_opencl_throttling", "enable_qdq_optimizer", - "enable_causallm", "disable_dynamic_shapes", "reshape_input"}; + "enable_causallm", "disable_dynamic_shapes", "reshape_input", "layout"}; }; // Holds context applicable to the entire EP instance. diff --git a/onnxruntime/core/providers/openvino/openvino_parser_utils.cc b/onnxruntime/core/providers/openvino/openvino_parser_utils.cc index 21fc7f935da23..a290fea73e0e8 100644 --- a/onnxruntime/core/providers/openvino/openvino_parser_utils.cc +++ b/onnxruntime/core/providers/openvino/openvino_parser_utils.cc @@ -236,5 +236,79 @@ ov::Dimension OpenVINOParserUtils::ParseDimensionRange(const std::string& range_ return ov::Dimension(range_start, range_end); } +layout_t OpenVINOParserUtils::ParseLayout(const std::string& layout_definition) { + layout_t parsed_layout_map; + + // Return empty map for empty input + if (layout_definition.empty()) { + ORT_THROW("Empty layout definition provided in layout parameter"); + } + + // Regular expression for parsing layout definitions + const std::regex layout_pattern(R"(([^\[\],]+)\s*\[(.*?)\])"); // e.g. "input_1[NC],data[CHW]" + + // Find all tensor layout definitions using regex + auto layout_begin = std::sregex_iterator( + layout_definition.begin(), + layout_definition.end(), + layout_pattern); + auto layout_end = std::sregex_iterator(); + + // If no matches found, throw error + if (layout_begin == layout_end) { + ORT_THROW("Invalid layout definition format: " + layout_definition); + } + + // Process each tensor definition + for (std::sregex_iterator i = std::move(layout_begin); i != layout_end; ++i) { + std::smatch layout_match = *i; + + // Extract tensor name and trim whitespace + std::string tensor_name = layout_match[1].str(); // Group 1: tensor name e.g. "input_1" + tensor_name = TrimWhitespace(tensor_name); + + if (tensor_name.empty()) { + ORT_THROW("Empty tensor name provided in layout parameter"); + } + + // Extract dimensions string + std::string dimensions_str = layout_match[2].str(); // Group 2: dimensions string [e.g. "NC", "CHW"] + + if (!Check_Valid_Layout(dimensions_str, tensor_name)) { + ORT_THROW("Invalid dimensions string provided in layout parameter"); + } + + // Store parsed shape in result map + parsed_layout_map[tensor_name] = ov::Layout(dimensions_str); + } + + return parsed_layout_map; +} + +bool OpenVINOParserUtils::Check_Valid_Layout(const std::string& layout_str, const std::string& tensor_name) { + // Check if the layout string is empty + if (layout_str.empty()) { + return false; + } + + std::unordered_set seen_alphabets; + for (char c : layout_str) { + if (std::isalpha(c)) { + char upper_c = static_cast(std::toupper(c)); // Convert to uppercase for case-insensitive comparison + if (seen_alphabets.find(upper_c) != seen_alphabets.end()) { + ORT_THROW("Repeated Dim '" + std::string(1, c) + + "' found in layout dimensions for tensor '" + tensor_name + "'"); + } + seen_alphabets.insert(upper_c); + } else if (c != '?') { + // Only '?' is allowed as non-alphabetic character + ORT_THROW("Invalid character '" + std::string(1, c) + + "' found in layout dimensions for tensor '" + tensor_name + "'"); + } + } + + return true; +} + } // namespace openvino_ep } // namespace onnxruntime diff --git a/onnxruntime/core/providers/openvino/openvino_parser_utils.h b/onnxruntime/core/providers/openvino/openvino_parser_utils.h index e6aa0e0a46a3b..a0936d627df40 100644 --- a/onnxruntime/core/providers/openvino/openvino_parser_utils.h +++ b/onnxruntime/core/providers/openvino/openvino_parser_utils.h @@ -18,8 +18,10 @@ class OpenVINOParserUtils { std::string& device_type, const std::string& option_name); static reshape_t ParseInputShape(const std::string& reshape_input_definition); + static layout_t ParseLayout(const std::string& layout_definition); static std::string TrimWhitespace(const std::string& str); static ov::Dimension ParseDimensionRange(const std::string& range_str, const std::string& tensor_name); + static bool Check_Valid_Layout(const std::string& layout_str, const std::string& tensor_name); }; } // namespace openvino_ep diff --git a/onnxruntime/core/providers/openvino/openvino_provider_factory.cc b/onnxruntime/core/providers/openvino/openvino_provider_factory.cc index 9dba8623031d0..c7fbae324a225 100644 --- a/onnxruntime/core/providers/openvino/openvino_provider_factory.cc +++ b/onnxruntime/core/providers/openvino/openvino_provider_factory.cc @@ -230,6 +230,10 @@ static void ParseProviderInfo(const ProviderOptions& provider_options, pi.reshape = OpenVINOParserUtils::ParseInputShape(provider_options.at("reshape_input")); } + if (provider_options.contains("layout")) { + pi.layout = OpenVINOParserUtils::ParseLayout(provider_options.at("layout")); + } + if (provider_options.contains("load_config")) { auto parse_config = [&](const std::string& config_str) -> std::map { // If the config string is empty, return an empty map and skip processing diff --git a/onnxruntime/test/perftest/command_args_parser.cc b/onnxruntime/test/perftest/command_args_parser.cc index 843875a881f0a..e992ce3304d7c 100644 --- a/onnxruntime/test/perftest/command_args_parser.cc +++ b/onnxruntime/test/perftest/command_args_parser.cc @@ -81,7 +81,9 @@ namespace perftest { "\t [OpenVINO only] [num_of_threads]: Overrides the accelerator hardware type and precision with these values at runtime.\n" "\t [OpenVINO only] [cache_dir]: Explicitly specify the path to dump and load the blobs(Model caching) or cl_cache (Kernel Caching) files feature. If blob files are already present, it will be directly loaded.\n" "\t [OpenVINO only] [enable_opencl_throttling]: Enables OpenCL queue throttling for GPU device(Reduces the CPU Utilization while using GPU) \n" - "\t [Example] [For OpenVINO EP] -e openvino -i \"device_type|CPU num_of_threads|5 enable_opencl_throttling|true cache_dir|\"\"\"\n" + "\t [OpenVINO only] [reshape_input]: Sets model input shapes with support for bounded dynamic dimensions using 'min..max' syntax (e.g., [1..10,3,224,224]) \n" + "\t [OpenVINO only] [layout]: Specifies the layout for inputs/outputs to interpret tensor dimensions correctly. \n" + "\t [Example] [For OpenVINO EP] -e openvino -i \"device_type|CPU num_of_threads|5 enable_opencl_throttling|true reshape_input|[1,3,60,60..100] layout|[NCHW] cache_dir|\"\"\"\n" "\n" "\t [QNN only] [backend_type]: QNN backend type. E.g., 'cpu', 'htp'. Mutually exclusive with 'backend_path'.\n" "\t [QNN only] [backend_path]: QNN backend path. E.g., '/folderpath/libQnnHtp.so', '/winfolderpath/QnnHtp.dll'. Mutually exclusive with 'backend_type'.\n" diff --git a/onnxruntime/test/perftest/ort_test_session.cc b/onnxruntime/test/perftest/ort_test_session.cc index 7a210ca8482a4..da6e66bae3eb0 100644 --- a/onnxruntime/test/perftest/ort_test_session.cc +++ b/onnxruntime/test/perftest/ort_test_session.cc @@ -828,12 +828,14 @@ select from 'TF8', 'TF16', 'UINT8', 'FLOAT', 'ITENSOR'. \n)"); ov_options[key] = value; } else if (key == "reshape_input") { ov_options[key] = value; + } else if (key == "layout") { + ov_options[key] = value; } else { ORT_THROW( "[ERROR] [OpenVINO] wrong key type entered. Choose from the following runtime key options that are available for OpenVINO." " ['device_type', 'device_id', 'num_of_threads', 'load_config', 'cache_dir', 'num_streams', " "'enable_opencl_throttling', 'disable_dynamic_shapes', 'enable_qdq_optimizer'," - " 'enable_causallm', 'model_priority'] \n"); + " 'enable_causallm', 'reshape_input', 'layout', 'model_priority'] \n"); } } session_options.AppendExecutionProvider_OpenVINO_V2(ov_options);