From b4b3a44e24a84aaf84b6b6f725c6daef84daafb8 Mon Sep 17 00:00:00 2001 From: n1harika Date: Tue, 24 Mar 2026 20:45:51 -0700 Subject: [PATCH 1/4] Adding support for OV 2026.1 and Float8 precisions --- .../openvino/ov_versions/capability.cc | 14 +++++------ .../openvino/ov_versions/data_ops.cc | 24 ++++++++++++++----- .../providers/openvino/ov_versions/data_ops.h | 4 +++- 3 files changed, 28 insertions(+), 14 deletions(-) diff --git a/onnxruntime/core/providers/openvino/ov_versions/capability.cc b/onnxruntime/core/providers/openvino/ov_versions/capability.cc index 77fa8eed8501c..e932595f48565 100644 --- a/onnxruntime/core/providers/openvino/ov_versions/capability.cc +++ b/onnxruntime/core/providers/openvino/ov_versions/capability.cc @@ -41,16 +41,16 @@ GetCapability::GetCapability(const EPCtxHandler& ep_ctx_handler, npu_qdq_optimizer_enabled = true; // see data_ops.cc ~615 where we check for int16 types for gpu, this may change to a better approach later } -#if OPENVINO_VERSION_MAJOR == 2025 && OPENVINO_VERSION_MINOR == 1 - data_ops_ = std::make_unique(graph_viewer_, V_2025_1, device_type_, npu_qdq_optimizer_enabled); -#elif OPENVINO_VERSION_MAJOR == 2025 && OPENVINO_VERSION_MINOR == 2 - data_ops_ = std::make_unique(graph_viewer_, V_2025_2, device_type_, npu_qdq_optimizer_enabled); -#elif OPENVINO_VERSION_MAJOR == 2025 && OPENVINO_VERSION_MINOR == 3 - data_ops_ = std::make_unique(graph_viewer_, V_2025_3, device_type_, npu_qdq_optimizer_enabled); +#if OPENVINO_VERSION_MAJOR == 2026 && OPENVINO_VERSION_MINOR == 1 + data_ops_ = std::make_unique(graph_viewer_, V_2026_1, device_type_, npu_qdq_optimizer_enabled); +#elif OPENVINO_VERSION_MAJOR == 2026 && OPENVINO_VERSION_MINOR == 0 + data_ops_ = std::make_unique(graph_viewer_, V_2026_0, device_type_, npu_qdq_optimizer_enabled); #elif OPENVINO_VERSION_MAJOR == 2025 && OPENVINO_VERSION_MINOR == 4 data_ops_ = std::make_unique(graph_viewer_, V_2025_4, device_type_, npu_qdq_optimizer_enabled); +#elif OPENVINO_VERSION_MAJOR == 2025 && OPENVINO_VERSION_MINOR == 3 + data_ops_ = std::make_unique(graph_viewer_, V_2025_3, device_type_, npu_qdq_optimizer_enabled); #else - data_ops_ = std::make_unique(graph_viewer_, V_2025_4, device_type_, npu_qdq_optimizer_enabled); + data_ops_ = std::make_unique(graph_viewer_, V_2026_1, device_type_, npu_qdq_optimizer_enabled); #endif } diff --git a/onnxruntime/core/providers/openvino/ov_versions/data_ops.cc b/onnxruntime/core/providers/openvino/ov_versions/data_ops.cc index 4d07193c2e196..b78111bf355d4 100644 --- a/onnxruntime/core/providers/openvino/ov_versions/data_ops.cc +++ b/onnxruntime/core/providers/openvino/ov_versions/data_ops.cc @@ -268,6 +268,10 @@ void DataOps::populate_types_supported() { std::make_pair(V_2024_4, ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_INT4)); supported_types_initializer_.insert( std::make_pair(V_2024_4, ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_UINT4)); + supported_types_npu_.insert( + std::make_pair(V_2026_1, ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_FLOAT8E4M3FN)); + supported_types_npu_.insert( + std::make_pair(V_2026_1, ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_FLOAT8E5M2FN)); supported_types_npu_.insert( std::make_pair(V_2020_4, ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_BOOL)); @@ -288,9 +292,9 @@ void DataOps::populate_types_supported() { supported_types_npu_.insert( std::make_pair(V_2021_1, ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_FLOAT16)); supported_types_npu_.insert( - std::make_pair(V_2024_3, ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_FLOAT8E4M3FN)); + std::make_pair(V_2026_1, ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_FLOAT8E4M3FN)); supported_types_npu_.insert( - std::make_pair(V_2024_3, ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_FLOAT8E4M3FNUZ)); + std::make_pair(V_2026_1, ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_FLOAT8E5M2FN)); supported_types_npu_.insert( std::make_pair(V_2024_4, ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_INT4)); supported_types_npu_.insert( @@ -320,6 +324,10 @@ void DataOps::populate_types_supported() { std::make_pair(V_2024_4, ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_INT4)); supported_types_cpu_.insert( std::make_pair(V_2024_4, ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_UINT4)); + supported_types_npu_.insert( + std::make_pair(V_2026_1, ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_FLOAT8E4M3FN)); + supported_types_npu_.insert( + std::make_pair(V_2026_1, ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_FLOAT8E5M2FN)); supported_types_gpu_.insert( std::make_pair(V_2020_4, ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_FLOAT)); @@ -339,6 +347,10 @@ void DataOps::populate_types_supported() { std::make_pair(V_2024_4, ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_INT4)); supported_types_gpu_.insert( std::make_pair(V_2024_4, ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_UINT4)); + supported_types_npu_.insert( + std::make_pair(V_2026_1, ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_FLOAT8E4M3FN)); + supported_types_npu_.insert( + std::make_pair(V_2026_1, ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_FLOAT8E5M2FN)); } void DataOps::populate_op_mode_supported() { @@ -396,7 +408,7 @@ void DataOps::populate_op_mode_supported() { // populate unsupportedmode_t { - UnsupportedOpMode obj = {{V_2024_1, V_2024_2, V_2024_3, V_2024_4, V_2024_5, V_2024_6, V_2025_0, V_2025_1, V_2025_2, V_2025_3, V_2025_4}, + UnsupportedOpMode obj = {{V_2024_1, V_2024_2, V_2024_3, V_2024_4, V_2024_5, V_2024_6, V_2025_0, V_2025_1, V_2025_2, V_2025_3, V_2025_4, V_2026_0, V_2026_1}, [this](const Node* node, const InitializedTensorSet&) { // If the Input of ReduceMax op is UINT8, it is rejected (Due to output mismatch) for (size_t i = 0; i < node->InputDefs().size(); i++) { @@ -413,7 +425,7 @@ void DataOps::populate_op_mode_supported() { { UnsupportedOpMode obj = {{V_2023_1, V_2023_2, V_2023_3, V_2024_0, V_2024_1, V_2024_2, V_2024_3, V_2024_4, V_2024_5, V_2024_6, V_2025_0, V_2025_1, - V_2025_2, V_2025_3, V_2025_4}, + V_2025_2, V_2025_3, V_2025_4, V_2026_0, V_2026_1}, [this](const Node* node, const InitializedTensorSet&) { const auto& input_args = node->InputDefs(); const auto& input_arg = (input_args.size() > 1) ? input_args[1] : input_args[0]; @@ -433,7 +445,7 @@ void DataOps::populate_op_mode_supported() { { UnsupportedOpMode obj = {{V_2023_1, V_2023_2, V_2023_3, V_2024_0, V_2024_1, V_2024_2, V_2024_3, V_2024_4, V_2024_5, V_2024_6, V_2025_0, V_2025_1, - V_2025_2, V_2025_3, V_2025_4}, + V_2025_2, V_2025_3, V_2025_4, V_2026_0, V_2026_1}, [this](const Node* node, const InitializedTensorSet&) { // If the operator is unsqueeze // If axes is an input, then we cannot produce a static graph. @@ -449,7 +461,7 @@ void DataOps::populate_op_mode_supported() { } { UnsupportedOpMode obj = {{V_2023_1, V_2023_2, V_2023_3, V_2024_0, V_2024_1, V_2024_2, V_2024_3, V_2024_4, V_2024_5, - V_2024_6, V_2025_0, V_2025_1, V_2025_2, V_2025_3, V_2025_4}, + V_2024_6, V_2025_0, V_2025_1, V_2025_2, V_2025_3, V_2025_4, V_2026_0, V_2026_1}, [this](const Node* node, const InitializedTensorSet&) { // check for attributes auto& upsample_attr = node->GetAttributes(); diff --git a/onnxruntime/core/providers/openvino/ov_versions/data_ops.h b/onnxruntime/core/providers/openvino/ov_versions/data_ops.h index cf6290ee07921..af4bdc6efffff 100644 --- a/onnxruntime/core/providers/openvino/ov_versions/data_ops.h +++ b/onnxruntime/core/providers/openvino/ov_versions/data_ops.h @@ -38,7 +38,9 @@ enum versionNum { V_2025_1, V_2025_2, V_2025_3, - V_2025_4 + V_2025_4, + V_2026_0, + V_2026_1 }; using VersionNum = enum versionNum; From 4c9a95446640632d3a53f09d6d352d4dff64fa1b Mon Sep 17 00:00:00 2001 From: n1harika Date: Tue, 24 Mar 2026 20:57:16 -0700 Subject: [PATCH 2/4] lint fix --- onnxruntime/core/providers/openvino/ov_versions/data_ops.cc | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/onnxruntime/core/providers/openvino/ov_versions/data_ops.cc b/onnxruntime/core/providers/openvino/ov_versions/data_ops.cc index b78111bf355d4..a7ef224cb625f 100644 --- a/onnxruntime/core/providers/openvino/ov_versions/data_ops.cc +++ b/onnxruntime/core/providers/openvino/ov_versions/data_ops.cc @@ -324,7 +324,7 @@ void DataOps::populate_types_supported() { std::make_pair(V_2024_4, ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_INT4)); supported_types_cpu_.insert( std::make_pair(V_2024_4, ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_UINT4)); - supported_types_npu_.insert( + supported_types_npu_.insert( std::make_pair(V_2026_1, ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_FLOAT8E4M3FN)); supported_types_npu_.insert( std::make_pair(V_2026_1, ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_FLOAT8E5M2FN)); @@ -347,7 +347,7 @@ void DataOps::populate_types_supported() { std::make_pair(V_2024_4, ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_INT4)); supported_types_gpu_.insert( std::make_pair(V_2024_4, ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_UINT4)); - supported_types_npu_.insert( + supported_types_npu_.insert( std::make_pair(V_2026_1, ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_FLOAT8E4M3FN)); supported_types_npu_.insert( std::make_pair(V_2026_1, ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_FLOAT8E5M2FN)); From 38e7aad3753d3371d7a4f6f333d3d045ca2cb8b5 Mon Sep 17 00:00:00 2001 From: n1harika Date: Tue, 24 Mar 2026 22:06:04 -0700 Subject: [PATCH 3/4] fix CI issues --- .../providers/openvino/ov_versions/data_ops.cc | 14 +++----------- 1 file changed, 3 insertions(+), 11 deletions(-) diff --git a/onnxruntime/core/providers/openvino/ov_versions/data_ops.cc b/onnxruntime/core/providers/openvino/ov_versions/data_ops.cc index a7ef224cb625f..f01a388cb9d8f 100644 --- a/onnxruntime/core/providers/openvino/ov_versions/data_ops.cc +++ b/onnxruntime/core/providers/openvino/ov_versions/data_ops.cc @@ -268,10 +268,8 @@ void DataOps::populate_types_supported() { std::make_pair(V_2024_4, ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_INT4)); supported_types_initializer_.insert( std::make_pair(V_2024_4, ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_UINT4)); - supported_types_npu_.insert( + supported_types_initializer_.insert( std::make_pair(V_2026_1, ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_FLOAT8E4M3FN)); - supported_types_npu_.insert( - std::make_pair(V_2026_1, ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_FLOAT8E5M2FN)); supported_types_npu_.insert( std::make_pair(V_2020_4, ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_BOOL)); @@ -293,8 +291,6 @@ void DataOps::populate_types_supported() { std::make_pair(V_2021_1, ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_FLOAT16)); supported_types_npu_.insert( std::make_pair(V_2026_1, ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_FLOAT8E4M3FN)); - supported_types_npu_.insert( - std::make_pair(V_2026_1, ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_FLOAT8E5M2FN)); supported_types_npu_.insert( std::make_pair(V_2024_4, ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_INT4)); supported_types_npu_.insert( @@ -324,10 +320,8 @@ void DataOps::populate_types_supported() { std::make_pair(V_2024_4, ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_INT4)); supported_types_cpu_.insert( std::make_pair(V_2024_4, ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_UINT4)); - supported_types_npu_.insert( + supported_types_cpu_.insert( std::make_pair(V_2026_1, ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_FLOAT8E4M3FN)); - supported_types_npu_.insert( - std::make_pair(V_2026_1, ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_FLOAT8E5M2FN)); supported_types_gpu_.insert( std::make_pair(V_2020_4, ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_FLOAT)); @@ -347,10 +341,8 @@ void DataOps::populate_types_supported() { std::make_pair(V_2024_4, ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_INT4)); supported_types_gpu_.insert( std::make_pair(V_2024_4, ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_UINT4)); - supported_types_npu_.insert( + supported_types_gpu_.insert( std::make_pair(V_2026_1, ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_FLOAT8E4M3FN)); - supported_types_npu_.insert( - std::make_pair(V_2026_1, ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_FLOAT8E5M2FN)); } void DataOps::populate_op_mode_supported() { From 31eb5b7847b055ce2c298cfd5b149fd7b3776de2 Mon Sep 17 00:00:00 2001 From: n1harika Date: Wed, 25 Mar 2026 21:45:59 -0700 Subject: [PATCH 4/4] Adding support for E5M3 type --- .../core/providers/openvino/ov_versions/data_ops.cc | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/onnxruntime/core/providers/openvino/ov_versions/data_ops.cc b/onnxruntime/core/providers/openvino/ov_versions/data_ops.cc index f01a388cb9d8f..28aedc0faae61 100644 --- a/onnxruntime/core/providers/openvino/ov_versions/data_ops.cc +++ b/onnxruntime/core/providers/openvino/ov_versions/data_ops.cc @@ -270,6 +270,8 @@ void DataOps::populate_types_supported() { std::make_pair(V_2024_4, ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_UINT4)); supported_types_initializer_.insert( std::make_pair(V_2026_1, ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_FLOAT8E4M3FN)); + supported_types_initializer_.insert( + std::make_pair(V_2026_1, ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_FLOAT8E5M2)); supported_types_npu_.insert( std::make_pair(V_2020_4, ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_BOOL)); @@ -291,6 +293,8 @@ void DataOps::populate_types_supported() { std::make_pair(V_2021_1, ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_FLOAT16)); supported_types_npu_.insert( std::make_pair(V_2026_1, ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_FLOAT8E4M3FN)); + supported_types_npu_.insert( + std::make_pair(V_2026_1, ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_FLOAT8E5M2)); supported_types_npu_.insert( std::make_pair(V_2024_4, ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_INT4)); supported_types_npu_.insert( @@ -322,6 +326,8 @@ void DataOps::populate_types_supported() { std::make_pair(V_2024_4, ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_UINT4)); supported_types_cpu_.insert( std::make_pair(V_2026_1, ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_FLOAT8E4M3FN)); + supported_types_cpu_.insert( + std::make_pair(V_2026_1, ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_FLOAT8E5M2)); supported_types_gpu_.insert( std::make_pair(V_2020_4, ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_FLOAT)); @@ -343,6 +349,8 @@ void DataOps::populate_types_supported() { std::make_pair(V_2024_4, ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_UINT4)); supported_types_gpu_.insert( std::make_pair(V_2026_1, ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_FLOAT8E4M3FN)); + supported_types_gpu_.insert( + std::make_pair(V_2026_1, ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_FLOAT8E5M2)); } void DataOps::populate_op_mode_supported() {