Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 7 additions & 7 deletions onnxruntime/core/providers/openvino/ov_versions/capability.cc
Original file line number Diff line number Diff line change
Expand Up @@ -41,16 +41,16 @@
npu_qdq_optimizer_enabled = true; // see data_ops.cc ~615 where we check for int16 types for gpu, this may change to a better approach later
}

#if OPENVINO_VERSION_MAJOR == 2025 && OPENVINO_VERSION_MINOR == 1
data_ops_ = std::make_unique<DataOps>(graph_viewer_, V_2025_1, device_type_, npu_qdq_optimizer_enabled);
#elif OPENVINO_VERSION_MAJOR == 2025 && OPENVINO_VERSION_MINOR == 2
data_ops_ = std::make_unique<DataOps>(graph_viewer_, V_2025_2, device_type_, npu_qdq_optimizer_enabled);
#elif OPENVINO_VERSION_MAJOR == 2025 && OPENVINO_VERSION_MINOR == 3
data_ops_ = std::make_unique<DataOps>(graph_viewer_, V_2025_3, device_type_, npu_qdq_optimizer_enabled);
#if OPENVINO_VERSION_MAJOR == 2026 && OPENVINO_VERSION_MINOR == 1
data_ops_ = std::make_unique<DataOps>(graph_viewer_, V_2026_1, device_type_, npu_qdq_optimizer_enabled);
#elif OPENVINO_VERSION_MAJOR == 2026 && OPENVINO_VERSION_MINOR == 0
data_ops_ = std::make_unique<DataOps>(graph_viewer_, V_2026_0, device_type_, npu_qdq_optimizer_enabled);
#elif OPENVINO_VERSION_MAJOR == 2025 && OPENVINO_VERSION_MINOR == 4
data_ops_ = std::make_unique<DataOps>(graph_viewer_, V_2025_4, device_type_, npu_qdq_optimizer_enabled);
#elif OPENVINO_VERSION_MAJOR == 2025 && OPENVINO_VERSION_MINOR == 3
data_ops_ = std::make_unique<DataOps>(graph_viewer_, V_2025_3, device_type_, npu_qdq_optimizer_enabled);
#else
data_ops_ = std::make_unique<DataOps>(graph_viewer_, V_2025_4, device_type_, npu_qdq_optimizer_enabled);
data_ops_ = std::make_unique<DataOps>(graph_viewer_, V_2026_1, device_type_, npu_qdq_optimizer_enabled);

Check notice on line 53 in onnxruntime/core/providers/openvino/ov_versions/capability.cc

View workflow job for this annotation

GitHub Actions / cpplint

[cpplint] onnxruntime/core/providers/openvino/ov_versions/capability.cc#L53

Add #include <memory> for make_unique<> [build/include_what_you_use] [4]
Raw output
onnxruntime/core/providers/openvino/ov_versions/capability.cc:53:  Add #include <memory> for make_unique<>  [build/include_what_you_use] [4]
#endif
}

Expand Down
24 changes: 18 additions & 6 deletions onnxruntime/core/providers/openvino/ov_versions/data_ops.cc
Original file line number Diff line number Diff line change
Expand Up @@ -268,6 +268,10 @@ void DataOps::populate_types_supported() {
std::make_pair(V_2024_4, ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_INT4));
supported_types_initializer_.insert(
std::make_pair(V_2024_4, ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_UINT4));
supported_types_initializer_.insert(

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@n1harika .. Please add support for E4M3 as well as E5M2.
ONNX FE support

Copy link
Author

@n1harika n1harika Mar 26, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Sure, added.
Just FYI, I haven't tested the E5M2 type as the model I have generated doesn't use it. There are also two other ONNX FP8 precisions- E4M3FNUZ and E5M2FNUZ, will add these when ONNX FE supports them.

std::make_pair(V_2026_1, ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_FLOAT8E4M3FN));
supported_types_initializer_.insert(
std::make_pair(V_2026_1, ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_FLOAT8E5M2));

supported_types_npu_.insert(
std::make_pair(V_2020_4, ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_BOOL));
Expand All @@ -288,9 +292,9 @@ void DataOps::populate_types_supported() {
supported_types_npu_.insert(
std::make_pair(V_2021_1, ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_FLOAT16));
supported_types_npu_.insert(
std::make_pair(V_2024_3, ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_FLOAT8E4M3FN));
std::make_pair(V_2026_1, ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_FLOAT8E4M3FN));
supported_types_npu_.insert(
std::make_pair(V_2024_3, ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_FLOAT8E4M3FNUZ));
std::make_pair(V_2026_1, ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_FLOAT8E5M2));
supported_types_npu_.insert(
std::make_pair(V_2024_4, ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_INT4));
supported_types_npu_.insert(
Expand Down Expand Up @@ -320,6 +324,10 @@ void DataOps::populate_types_supported() {
std::make_pair(V_2024_4, ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_INT4));
supported_types_cpu_.insert(
std::make_pair(V_2024_4, ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_UINT4));
supported_types_cpu_.insert(
std::make_pair(V_2026_1, ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_FLOAT8E4M3FN));
supported_types_cpu_.insert(
std::make_pair(V_2026_1, ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_FLOAT8E5M2));

supported_types_gpu_.insert(
std::make_pair(V_2020_4, ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_FLOAT));
Expand All @@ -339,6 +347,10 @@ void DataOps::populate_types_supported() {
std::make_pair(V_2024_4, ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_INT4));
supported_types_gpu_.insert(
std::make_pair(V_2024_4, ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_UINT4));
supported_types_gpu_.insert(
std::make_pair(V_2026_1, ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_FLOAT8E4M3FN));
supported_types_gpu_.insert(
std::make_pair(V_2026_1, ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_FLOAT8E5M2));
}

void DataOps::populate_op_mode_supported() {
Expand Down Expand Up @@ -396,7 +408,7 @@ void DataOps::populate_op_mode_supported() {

// populate unsupportedmode_t
{
UnsupportedOpMode obj = {{V_2024_1, V_2024_2, V_2024_3, V_2024_4, V_2024_5, V_2024_6, V_2025_0, V_2025_1, V_2025_2, V_2025_3, V_2025_4},
UnsupportedOpMode obj = {{V_2024_1, V_2024_2, V_2024_3, V_2024_4, V_2024_5, V_2024_6, V_2025_0, V_2025_1, V_2025_2, V_2025_3, V_2025_4, V_2026_0, V_2026_1},
[this](const Node* node, const InitializedTensorSet&) {
// If the Input of ReduceMax op is UINT8, it is rejected (Due to output mismatch)
for (size_t i = 0; i < node->InputDefs().size(); i++) {
Expand All @@ -413,7 +425,7 @@ void DataOps::populate_op_mode_supported() {
{
UnsupportedOpMode obj = {{V_2023_1, V_2023_2, V_2023_3, V_2024_0, V_2024_1, V_2024_2,
V_2024_3, V_2024_4, V_2024_5, V_2024_6, V_2025_0, V_2025_1,
V_2025_2, V_2025_3, V_2025_4},
V_2025_2, V_2025_3, V_2025_4, V_2026_0, V_2026_1},
[this](const Node* node, const InitializedTensorSet&) {
const auto& input_args = node->InputDefs();
const auto& input_arg = (input_args.size() > 1) ? input_args[1] : input_args[0];
Expand All @@ -433,7 +445,7 @@ void DataOps::populate_op_mode_supported() {
{
UnsupportedOpMode obj = {{V_2023_1, V_2023_2, V_2023_3, V_2024_0, V_2024_1, V_2024_2,
V_2024_3, V_2024_4, V_2024_5, V_2024_6, V_2025_0, V_2025_1,
V_2025_2, V_2025_3, V_2025_4},
V_2025_2, V_2025_3, V_2025_4, V_2026_0, V_2026_1},
[this](const Node* node, const InitializedTensorSet&) {
// If the operator is unsqueeze
// If axes is an input, then we cannot produce a static graph.
Expand All @@ -449,7 +461,7 @@ void DataOps::populate_op_mode_supported() {
}
{
UnsupportedOpMode obj = {{V_2023_1, V_2023_2, V_2023_3, V_2024_0, V_2024_1, V_2024_2, V_2024_3, V_2024_4, V_2024_5,
V_2024_6, V_2025_0, V_2025_1, V_2025_2, V_2025_3, V_2025_4},
V_2024_6, V_2025_0, V_2025_1, V_2025_2, V_2025_3, V_2025_4, V_2026_0, V_2026_1},
[this](const Node* node, const InitializedTensorSet&) {
// check for attributes
auto& upsample_attr = node->GetAttributes();
Expand Down
4 changes: 3 additions & 1 deletion onnxruntime/core/providers/openvino/ov_versions/data_ops.h
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,9 @@ enum versionNum {
V_2025_1,
V_2025_2,
V_2025_3,
V_2025_4
V_2025_4,
V_2026_0,
V_2026_1
};

using VersionNum = enum versionNum;
Expand Down
Loading