Skip to content

[OVEP] Support for providing layout to input/output to OpenVINO #767

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 2 commits into
base: ovep-develop
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
40 changes: 40 additions & 0 deletions onnxruntime/core/providers/openvino/backend_utils.cc
Original file line number Diff line number Diff line change
Expand Up @@ -150,6 +150,11 @@ CreateOVModel(std::string&& model,
LOGS_DEFAULT(INFO) << log_tag << "Reshaping the ov tensor to specified shape";
ov_model->reshape(session_context.reshape);
}

if (!session_context.layout.empty()) {
LOGS_DEFAULT(INFO) << log_tag << "Setting the ov tensor layout to specified layout";
ov_model = Set_Layout(ov_model, session_context.layout);
}
// Check for Constant Folding
if ((session_context.device_type != "NPU") && !session_context.is_wholly_supported_graph) {
ov::pass::ConstantFolding pass_const_obj;
Expand Down Expand Up @@ -199,6 +204,41 @@ GetOutputTensor(Ort::KernelContext& context,
return context.GetOutput(index, output_shape);
}

std::shared_ptr<OVNetwork> Set_Layout(std::shared_ptr<OVNetwork> ov_model, const layout_t& layout) {
ov::preprocess::PrePostProcessor preproc(ov_model);

const auto& inputs = ov_model->inputs();
const auto& outputs = ov_model->outputs();

auto find_tensor_index = [](const std::vector<ov::Output<ov::Node>>& tensors, const std::string& name) -> std::optional<size_t> {
for (size_t i = 0; i < tensors.size(); ++i) {
const auto& tensor = tensors[i];
if (tensor.get_any_name() == name || tensor.get_tensor().get_names().count(name) > 0) {
return i;
}
}
return std::nullopt;
};

for (const auto& [tensor_name, layout_value] : layout) {
bool tensor_found = false;

if (auto input_idx = find_tensor_index(inputs, tensor_name)) {
preproc.input(*input_idx).tensor().set_layout(layout_value);
tensor_found = true;
} else if (auto output_idx = find_tensor_index(outputs, tensor_name)) {
preproc.output(*output_idx).tensor().set_layout(layout_value);
tensor_found = true;
}

if (!tensor_found) {
LOGS_DEFAULT(WARNING) << "Tensor '" << tensor_name << "' not found in model inputs or outputs";
}
}

return preproc.build();
}

int GetFirstAvailableDevice(SessionContext& session_context) {
int i = 0;
// Get the first available VAD-M device and set the device to busy
Expand Down
2 changes: 2 additions & 0 deletions onnxruntime/core/providers/openvino/backend_utils.h
Original file line number Diff line number Diff line change
Expand Up @@ -79,6 +79,8 @@ int GetFirstAvailableDevice(SessionContext& session_context);

void FillOutputsWithConstantData(std::shared_ptr<ov::Node> node, Ort::UnownedValue& out_tensor);

std::shared_ptr<OVNetwork> Set_Layout(std::shared_ptr<OVNetwork> ov_model, const layout_t& layout);

template <typename T>
void FillOutputHelper(Ort::UnownedValue& out_tensor, std::shared_ptr<ov::Node> node);

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -98,6 +98,7 @@ BasicBackend::BasicBackend(std::unique_ptr<ONNX_NAMESPACE::ModelProto>& model_pr
!subgraph_context_.has_dynamic_input_shape &&
!session_context_.so_context_enable &&
session_context_.reshape.empty() &&
session_context_.layout.empty() &&
!enable_causallm &&
!eligible_for_cpu_fallback &&
auto_unified_compile);
Expand Down
4 changes: 3 additions & 1 deletion onnxruntime/core/providers/openvino/contexts.h
Original file line number Diff line number Diff line change
Expand Up @@ -70,6 +70,7 @@ class SharedContext : public WeakSingleton<SharedContext> {

using config_t = std::map<std::string, ov::AnyMap>;
using reshape_t = std::map<std::string, ov::PartialShape>;
using layout_t = std::map<std::string, ov::Layout>;

struct ProviderInfo {
std::string device_type{""}; // [device_type]: Overrides the accelerator hardware type and
Expand All @@ -88,6 +89,7 @@ struct ProviderInfo {
// (GPU) feature. If blob files are already present,
// it will be directly loaded.
reshape_t reshape{}; // Used for reshaping the ov input tensor shape at runtime.
layout_t layout{}; // Used for specifying the ov input/output tensor layout at runtime.
std::string model_priority{"DEFAULT"}; // High-level OpenVINO model priority hint
// Defines what model should be provided with more performant
// bounded resource first
Expand All @@ -110,7 +112,7 @@ struct ProviderInfo {
const ConfigOptions* config_options{NULL};
const std::unordered_set<std::string> valid_provider_keys = {"device_type", "device_id", "device_luid", "cache_dir", "precision",
"load_config", "context", "num_of_threads", "model_priority", "num_streams", "enable_opencl_throttling", "enable_qdq_optimizer",
"enable_causallm", "disable_dynamic_shapes", "reshape_input"};
"enable_causallm", "disable_dynamic_shapes", "reshape_input", "layout"};
};

// Holds context applicable to the entire EP instance.
Expand Down
74 changes: 74 additions & 0 deletions onnxruntime/core/providers/openvino/openvino_parser_utils.cc
Original file line number Diff line number Diff line change
Expand Up @@ -236,5 +236,79 @@
return ov::Dimension(range_start, range_end);
}

layout_t OpenVINOParserUtils::ParseLayout(const std::string& layout_definition) {
layout_t parsed_layout_map;

// Return empty map for empty input
if (layout_definition.empty()) {
ORT_THROW("Empty layout definition provided in layout parameter");
}

// Regular expression for parsing layout definitions
const std::regex layout_pattern(R"(([^\[\],]+)\s*\[(.*?)\])"); // e.g. "input_1[NC],data[CHW]"

// Find all tensor layout definitions using regex
auto layout_begin = std::sregex_iterator(
layout_definition.begin(),
layout_definition.end(),
layout_pattern);
auto layout_end = std::sregex_iterator();

// If no matches found, throw error
if (layout_begin == layout_end) {
ORT_THROW("Invalid layout definition format: " + layout_definition);
}

// Process each tensor definition
for (std::sregex_iterator i = std::move(layout_begin); i != layout_end; ++i) {

Check notice on line 263 in onnxruntime/core/providers/openvino/openvino_parser_utils.cc

View workflow job for this annotation

GitHub Actions / cpplint

[cpplint] onnxruntime/core/providers/openvino/openvino_parser_utils.cc#L263

Add #include <utility> for move [build/include_what_you_use] [4]
Raw output
onnxruntime/core/providers/openvino/openvino_parser_utils.cc:263:  Add #include <utility> for move  [build/include_what_you_use] [4]
std::smatch layout_match = *i;

// Extract tensor name and trim whitespace
std::string tensor_name = layout_match[1].str(); // Group 1: tensor name e.g. "input_1"
tensor_name = TrimWhitespace(tensor_name);

if (tensor_name.empty()) {
ORT_THROW("Empty tensor name provided in layout parameter");
}

// Extract dimensions string
std::string dimensions_str = layout_match[2].str(); // Group 2: dimensions string [e.g. "NC", "CHW"]

if (!Check_Valid_Layout(dimensions_str, tensor_name)) {
ORT_THROW("Invalid dimensions string provided in layout parameter");
}

// Store parsed shape in result map
parsed_layout_map[tensor_name] = ov::Layout(dimensions_str);
}

return parsed_layout_map;
}

bool OpenVINOParserUtils::Check_Valid_Layout(const std::string& layout_str, const std::string& tensor_name) {
// Check if the layout string is empty
if (layout_str.empty()) {
return false;
}

std::unordered_set<char> seen_alphabets;

Check notice on line 294 in onnxruntime/core/providers/openvino/openvino_parser_utils.cc

View workflow job for this annotation

GitHub Actions / cpplint

[cpplint] onnxruntime/core/providers/openvino/openvino_parser_utils.cc#L294

Add #include <unordered_set> for unordered_set<> [build/include_what_you_use] [4]
Raw output
onnxruntime/core/providers/openvino/openvino_parser_utils.cc:294:  Add #include <unordered_set> for unordered_set<>  [build/include_what_you_use] [4]
for (char c : layout_str) {
if (std::isalpha(c)) {
char upper_c = static_cast<char>(std::toupper(c)); // Convert to uppercase for case-insensitive comparison
if (seen_alphabets.find(upper_c) != seen_alphabets.end()) {
ORT_THROW("Repeated Dim '" + std::string(1, c) +
"' found in layout dimensions for tensor '" + tensor_name + "'");
}
seen_alphabets.insert(upper_c);
} else if (c != '?') {
// Only '?' is allowed as non-alphabetic character
ORT_THROW("Invalid character '" + std::string(1, c) +

Check notice on line 305 in onnxruntime/core/providers/openvino/openvino_parser_utils.cc

View workflow job for this annotation

GitHub Actions / cpplint

[cpplint] onnxruntime/core/providers/openvino/openvino_parser_utils.cc#L305

Add #include <string> for string [build/include_what_you_use] [4]
Raw output
onnxruntime/core/providers/openvino/openvino_parser_utils.cc:305:  Add #include <string> for string  [build/include_what_you_use] [4]
"' found in layout dimensions for tensor '" + tensor_name + "'");
}
}

return true;
}

} // namespace openvino_ep
} // namespace onnxruntime
2 changes: 2 additions & 0 deletions onnxruntime/core/providers/openvino/openvino_parser_utils.h
Original file line number Diff line number Diff line change
Expand Up @@ -18,8 +18,10 @@ class OpenVINOParserUtils {
std::string& device_type,
const std::string& option_name);
static reshape_t ParseInputShape(const std::string& reshape_input_definition);
static layout_t ParseLayout(const std::string& layout_definition);
static std::string TrimWhitespace(const std::string& str);
static ov::Dimension ParseDimensionRange(const std::string& range_str, const std::string& tensor_name);
static bool Check_Valid_Layout(const std::string& layout_str, const std::string& tensor_name);
};

} // namespace openvino_ep
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -230,6 +230,10 @@ static void ParseProviderInfo(const ProviderOptions& provider_options,
pi.reshape = OpenVINOParserUtils::ParseInputShape(provider_options.at("reshape_input"));
}

if (provider_options.contains("layout")) {
pi.layout = OpenVINOParserUtils::ParseLayout(provider_options.at("layout"));
}

if (provider_options.contains("load_config")) {
auto parse_config = [&](const std::string& config_str) -> std::map<std::string, ov::AnyMap> {
// If the config string is empty, return an empty map and skip processing
Expand Down
4 changes: 3 additions & 1 deletion onnxruntime/test/perftest/command_args_parser.cc
Original file line number Diff line number Diff line change
Expand Up @@ -81,7 +81,9 @@ namespace perftest {
"\t [OpenVINO only] [num_of_threads]: Overrides the accelerator hardware type and precision with these values at runtime.\n"
"\t [OpenVINO only] [cache_dir]: Explicitly specify the path to dump and load the blobs(Model caching) or cl_cache (Kernel Caching) files feature. If blob files are already present, it will be directly loaded.\n"
"\t [OpenVINO only] [enable_opencl_throttling]: Enables OpenCL queue throttling for GPU device(Reduces the CPU Utilization while using GPU) \n"
"\t [Example] [For OpenVINO EP] -e openvino -i \"device_type|CPU num_of_threads|5 enable_opencl_throttling|true cache_dir|\"<path>\"\"\n"
"\t [OpenVINO only] [reshape_input]: Sets model input shapes with support for bounded dynamic dimensions using 'min..max' syntax (e.g., [1..10,3,224,224]) \n"
"\t [OpenVINO only] [layout]: Specifies the layout for inputs/outputs to interpret tensor dimensions correctly. \n"
"\t [Example] [For OpenVINO EP] -e openvino -i \"device_type|CPU num_of_threads|5 enable_opencl_throttling|true reshape_input|<input_name>[1,3,60,60..100] layout|<input_name>[NCHW] cache_dir|\"<path>\"\"\n"
"\n"
"\t [QNN only] [backend_type]: QNN backend type. E.g., 'cpu', 'htp'. Mutually exclusive with 'backend_path'.\n"
"\t [QNN only] [backend_path]: QNN backend path. E.g., '/folderpath/libQnnHtp.so', '/winfolderpath/QnnHtp.dll'. Mutually exclusive with 'backend_type'.\n"
Expand Down
4 changes: 3 additions & 1 deletion onnxruntime/test/perftest/ort_test_session.cc
Original file line number Diff line number Diff line change
Expand Up @@ -828,12 +828,14 @@ select from 'TF8', 'TF16', 'UINT8', 'FLOAT', 'ITENSOR'. \n)");
ov_options[key] = value;
} else if (key == "reshape_input") {
ov_options[key] = value;
} else if (key == "layout") {
ov_options[key] = value;

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Should we then also update the THROW message?

ORT_THROW(
            "[ERROR] [OpenVINO] wrong key type entered. Choose from the following runtime key options that are available for OpenVINO."
            " ['device_type', 'device_id', 'num_of_threads', 'load_config', 'cache_dir', 'num_streams', "
            "'enable_opencl_throttling', 'disable_dynamic_shapes', 'enable_qdq_optimizer',"
            " 'enable_causallm', 'model_priority', 'layout'] \n");

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

fixed

} else {
ORT_THROW(
"[ERROR] [OpenVINO] wrong key type entered. Choose from the following runtime key options that are available for OpenVINO."
" ['device_type', 'device_id', 'num_of_threads', 'load_config', 'cache_dir', 'num_streams', "
"'enable_opencl_throttling', 'disable_dynamic_shapes', 'enable_qdq_optimizer',"
" 'enable_causallm', 'model_priority'] \n");
" 'enable_causallm', 'reshape_input', 'layout', 'model_priority'] \n");
}
}
session_options.AppendExecutionProvider_OpenVINO_V2(ov_options);
Expand Down
Loading