From 61b71d526452733b91c8a22efe25397faa6c6b96 Mon Sep 17 00:00:00 2001 From: haowhsu Date: Wed, 2 Jul 2025 00:26:53 +0800 Subject: [PATCH] lpai e2e & minimum inference runtime support --- backends/qualcomm/CMakeLists.txt | 34 +- backends/qualcomm/_passes/build_quant_io.py | 17 +- .../qualcomm/aot/wrappers/TensorWrapper.cpp | 10 + backends/qualcomm/fastrpc/CMakeLists.txt | 78 +++++ .../qualcomm/fastrpc/qnn_executor_runner.cpp | 148 +++++++++ backends/qualcomm/fastrpc/qnn_executorch.idl | 27 ++ .../qualcomm/fastrpc/qnn_executorch_impl.cpp | 314 ++++++++++++++++++ backends/qualcomm/runtime/Logging.cpp | 10 + .../qualcomm/runtime/QnnExecuTorchBackend.cpp | 2 + backends/qualcomm/runtime/Utils.cpp | 2 + .../qualcomm/runtime/backends/CMakeLists.txt | 35 +- .../runtime/backends/QnnBackendCache.cpp | 3 +- .../runtime/backends/QnnBackendFactory.cpp | 35 +- .../runtime/backends/QnnBackendFactory.h | 2 + .../backends/QnnBackendUnifiedRegistry.cpp | 14 +- .../backends/QnnBackendUnifiedRegistry.h | 2 +- .../runtime/backends/QnnCustomProtocol.cpp | 9 +- .../runtime/backends/QnnCustomProtocol.h | 1 + .../runtime/backends/QnnFunctionInterface.h | 2 + .../runtime/backends/QnnGraphCommon.h | 6 +- .../runtime/backends/QnnImplementation.cpp | 7 + .../backends/QnnSysFunctionInterface.h | 2 +- .../runtime/backends/lpai/LpaiBackend.cpp | 62 ++++ .../runtime/backends/lpai/LpaiBackend.h | 40 +++ .../backends/lpai/LpaiBackendCustomConfig.cpp | 66 ++++ .../backends/lpai/LpaiBackendCustomConfig.h | 45 +++ .../runtime/backends/lpai/LpaiContext.cpp | 56 ++++ .../runtime/backends/lpai/LpaiContext.h | 38 +++ .../backends/lpai/LpaiContextCustomConfig.h | 44 +++ .../runtime/backends/lpai/LpaiDevice.h | 31 ++ .../runtime/backends/lpai/LpaiGraph.h | 81 +++++ .../backends/lpai/LpaiGraphCustomConfig.h | 94 ++++++ .../lpai/aarch64/LpaiContextCustomConfig.cpp | 32 ++ .../backends/lpai/aarch64/LpaiDevice.cpp | 25 ++ .../backends/lpai/aarch64/LpaiGraph.cpp | 30 ++ .../lpai/aarch64/LpaiGraphCustomConfig.cpp | 95 ++++++ .../lpai/x86_64/LpaiContextCustomConfig.cpp | 22 ++ .../backends/lpai/x86_64/LpaiDevice.cpp | 21 ++ .../backends/lpai/x86_64/LpaiGraph.cpp | 21 ++ .../lpai/x86_64/LpaiGraphCustomConfig.cpp | 32 ++ backends/qualcomm/scripts/build.sh | 56 +++- backends/qualcomm/scripts/lpai_utils.sh | 90 +++++ backends/qualcomm/scripts/sqnr_verifier.py | 38 +++ .../serialization/qc_compiler_spec.fbs | 63 +++- backends/qualcomm/serialization/qc_schema.py | 62 +++- backends/qualcomm/tests/test_qnn_delegate.py | 97 +++++- backends/qualcomm/tests/utils.py | 28 +- backends/qualcomm/utils/utils.py | 78 ++++- .../executor_runner/qnn_executor_runner.cpp | 3 +- examples/qualcomm/utils.py | 23 +- extension/data_loader/file_data_loader.cpp | 57 ++-- third-party/CMakeLists.txt | 5 +- 52 files changed, 2110 insertions(+), 85 deletions(-) create mode 100644 backends/qualcomm/fastrpc/CMakeLists.txt create mode 100644 backends/qualcomm/fastrpc/qnn_executor_runner.cpp create mode 100644 backends/qualcomm/fastrpc/qnn_executorch.idl create mode 100644 backends/qualcomm/fastrpc/qnn_executorch_impl.cpp create mode 100644 backends/qualcomm/runtime/backends/lpai/LpaiBackend.cpp create mode 100644 backends/qualcomm/runtime/backends/lpai/LpaiBackend.h create mode 100644 backends/qualcomm/runtime/backends/lpai/LpaiBackendCustomConfig.cpp create mode 100644 backends/qualcomm/runtime/backends/lpai/LpaiBackendCustomConfig.h create mode 100644 backends/qualcomm/runtime/backends/lpai/LpaiContext.cpp create mode 100644 backends/qualcomm/runtime/backends/lpai/LpaiContext.h create mode 100644 backends/qualcomm/runtime/backends/lpai/LpaiContextCustomConfig.h create mode 100644 backends/qualcomm/runtime/backends/lpai/LpaiDevice.h create mode 100644 backends/qualcomm/runtime/backends/lpai/LpaiGraph.h create mode 100644 backends/qualcomm/runtime/backends/lpai/LpaiGraphCustomConfig.h create mode 100644 backends/qualcomm/runtime/backends/lpai/aarch64/LpaiContextCustomConfig.cpp create mode 100644 backends/qualcomm/runtime/backends/lpai/aarch64/LpaiDevice.cpp create mode 100644 backends/qualcomm/runtime/backends/lpai/aarch64/LpaiGraph.cpp create mode 100644 backends/qualcomm/runtime/backends/lpai/aarch64/LpaiGraphCustomConfig.cpp create mode 100644 backends/qualcomm/runtime/backends/lpai/x86_64/LpaiContextCustomConfig.cpp create mode 100644 backends/qualcomm/runtime/backends/lpai/x86_64/LpaiDevice.cpp create mode 100644 backends/qualcomm/runtime/backends/lpai/x86_64/LpaiGraph.cpp create mode 100644 backends/qualcomm/runtime/backends/lpai/x86_64/LpaiGraphCustomConfig.cpp create mode 100755 backends/qualcomm/scripts/lpai_utils.sh create mode 100644 backends/qualcomm/scripts/sqnr_verifier.py diff --git a/backends/qualcomm/CMakeLists.txt b/backends/qualcomm/CMakeLists.txt index 8ce1ce1bdbf..b9a23e243c6 100644 --- a/backends/qualcomm/CMakeLists.txt +++ b/backends/qualcomm/CMakeLists.txt @@ -239,11 +239,28 @@ target_link_libraries( shared_buffer qnn_dlc_manager ) -target_link_libraries( - qnn_executorch_backend - PRIVATE qnn_executorch_header qnn_schema qnn_manager executorch_core - extension_tensor qnn_backend_options -) +if(${CMAKE_SYSTEM_PROCESSOR} MATCHES Hexagon) + link_directories( + $ENV{HEXAGON_TOOLS_ROOT}/Tools/target/hexagon/lib/$ENV{HEXAGON_ARCH}/G0/pic + ) + target_link_libraries( + qnn_executorch_backend + PRIVATE qnn_executorch_header + qnn_schema + qnn_manager + executorch_core + extension_tensor + qnn_backend_options + c + c++ + ) +else() + target_link_libraries( + qnn_executorch_backend + PRIVATE qnn_executorch_header qnn_schema qnn_manager executorch_core + extension_tensor qnn_backend_options + ) +endif() set_target_properties( qnn_executorch_backend PROPERTIES LINK_FLAGS "-Wl,-rpath='$ORIGIN'" ) @@ -278,6 +295,13 @@ install( RUNTIME DESTINATION ${CMAKE_INSTALL_LIBDIR}/executorch/backends/qualcomm ) +if(DEFINED ENV{HEXAGON_SDK_ROOT}) + add_subdirectory( + ${QNN_EXECUTORCH_ROOT_DIR}/fastrpc + ${CMAKE_CURRENT_BINARY_DIR}/qnn_executorch/fastrpc + ) +endif() + # QNN pybind if(${CMAKE_SYSTEM_PROCESSOR} MATCHES "x86_64") add_subdirectory( diff --git a/backends/qualcomm/_passes/build_quant_io.py b/backends/qualcomm/_passes/build_quant_io.py index d43842e84a5..3bd90f8f4fc 100644 --- a/backends/qualcomm/_passes/build_quant_io.py +++ b/backends/qualcomm/_passes/build_quant_io.py @@ -5,9 +5,8 @@ # LICENSE file in the root directory of this source tree. import torch from executorch.backends.qualcomm.utils.constants import QCOM_QUANTIZED_IO -from executorch.exir.delegate import executorch_call_delegate -from executorch.exir.pass_base import ExportPass, ProxyValue +from executorch.exir.pass_base import ExportPass from executorch.exir.tensor import TensorSpec from torch.utils import _pytree as pytree @@ -39,11 +38,17 @@ def call_getitem(self, value, key: int, meta): return super().call_getitem(value, key, meta) def call_delegate(self, lowered_module, args, kwargs, meta): - args_data, _ = pytree.tree_map_only( - ProxyValue, lambda x: x.data, (args, kwargs) - ) + output_node = [ + node + for node in lowered_module.original_module.graph.nodes + if node.target == "output" + ][0] + tensors = [ + node.meta["val"].to(node.meta[QCOM_QUANTIZED_IO]) + for node in output_node.args[0] + ] meta["spec"] = pytree.tree_map( self._make_spec, - executorch_call_delegate(lowered_module, *args_data), + tuple(tensors), ) return super().call_delegate(lowered_module, args, kwargs, meta) diff --git a/backends/qualcomm/aot/wrappers/TensorWrapper.cpp b/backends/qualcomm/aot/wrappers/TensorWrapper.cpp index 17d76aac412..68dfa775469 100644 --- a/backends/qualcomm/aot/wrappers/TensorWrapper.cpp +++ b/backends/qualcomm/aot/wrappers/TensorWrapper.cpp @@ -118,7 +118,17 @@ TensorWrapper::TensorWrapper( Error TensorWrapper::FillDataBuffer(const void* data, bool copy_data) { if (data != nullptr) { QNN_TENSOR_VER_PTR(tensor_)->memType = QNN_TENSORMEMTYPE_RAW; +#ifdef __hexagon__ + // alignment is required + auto align_size = [](size_t alignment, size_t sz) { + return (sz + (alignment - 1)) & ~(alignment - 1); + }; + const size_t alignment = 64; + QNN_TENSOR_VER_PTR(tensor_)->clientBuf.dataSize = + align_size(alignment, bytes_); +#else QNN_TENSOR_VER_PTR(tensor_)->clientBuf.dataSize = bytes_; +#endif if (copy_data) { owned_data_ = std::make_unique(bytes_); const char* src_data = static_cast(data); diff --git a/backends/qualcomm/fastrpc/CMakeLists.txt b/backends/qualcomm/fastrpc/CMakeLists.txt new file mode 100644 index 00000000000..d49a8003f3f --- /dev/null +++ b/backends/qualcomm/fastrpc/CMakeLists.txt @@ -0,0 +1,78 @@ +# Copyright (c) Qualcomm Innovation Center, Inc. +# Copyright 2025 Arm Limited and/or its affiliates. +# All rights reserved +# +# This source code is licensed under the BSD-style license found in the +# LICENSE file in the root directory of this source tree. + +set(CMAKE_CXX_STANDARD 17) +set(CMAKE_CXX_STANDARD_REQUIRED ON) +set(CMAKE_POSITION_INDEPENDENT_CODE ON) + +set(_qnn_fastrpc__dir ${CMAKE_BINARY_DIR}/backends/qualcomm/fastrpc) +set(_qnn_fastrpc__srcs ${CMAKE_CURRENT_LIST_DIR}/qnn_executorch.idl) +set(_qnn_fastrpc__outputs + ${_qnn_fastrpc__dir}/qnn_executorch.h + ${_qnn_fastrpc__dir}/qnn_executorch_stub.c + ${_qnn_fastrpc__dir}/qnn_executorch_skel.c +) + +if(DEFINED ENV{HEXAGON_SDK_ROOT}) + add_custom_command( + OUTPUT ${_qnn_fastrpc__outputs} + COMMAND mkdir -p ${_qnn_fastrpc__dir} + COMMAND + $ENV{HEXAGON_SDK_ROOT}/ipc/fastrpc/qaic/bin/qaic -I + $ENV{HEXAGON_SDK_ROOT}/incs -I $ENV{HEXAGON_SDK_ROOT}/incs/stddef -o + ${_qnn_fastrpc__dir} ${_qnn_fastrpc__srcs} + WORKING_DIRECTORY ${EXECUTORCH_SOURCE_DIR} + DEPENDS qnn_executorch_backend + COMMENT "Codegen for fastrpc files" + ) + add_custom_target( + fastrpc_codegen + DEPENDS ${_qnn_fastrpc__outputs} + COMMENT "Codegen for fastrpc files" + ) + +endif() + +if(${CMAKE_SYSTEM_PROCESSOR} MATCHES Hexagon) + add_library( + qnn_executorch_skel SHARED + ${_qnn_fastrpc__dir}/qnn_executorch.h + ${_qnn_fastrpc__dir}/qnn_executorch_skel.c qnn_executorch_impl.cpp + ) + target_include_directories(qnn_executorch_skel PRIVATE ${_qnn_fastrpc__dir}) + target_link_libraries( + qnn_executorch_skel PRIVATE extension_data_loader qnn_executorch_backend + c++ c + ) + add_dependencies(qnn_executorch_skel fastrpc_codegen) +endif() + +if(${CMAKE_SYSTEM_PROCESSOR} MATCHES aarch64) + include_directories( + $ENV{HEXAGON_SDK_ROOT}/incs $ENV{HEXAGON_SDK_ROOT}/incs/stddef + ${_qnn_fastrpc__dir} + ) + link_directories( + $ENV{HEXAGON_SDK_ROOT}/ipc/fastrpc/remote/ship/android_aarch64 + ) + add_library( + qnn_executorch_stub SHARED ${_qnn_fastrpc__dir}/qnn_executorch.h + ${_qnn_fastrpc__dir}/qnn_executorch_stub.c + ) + # TODO: support cdsp if necessary + target_link_libraries(qnn_executorch_stub PRIVATE adsprpc) + add_dependencies(qnn_executorch_stub fastrpc_codegen) + + # build minimum example app + add_executable(qnn_executor_runner qnn_executor_runner.cpp) + target_link_libraries( + qnn_executor_runner PRIVATE executorch_core gflags qnn_executorch_stub + adsprpc + ) + # TODO: support cdsp if necessary + target_link_libraries(qnn_executor_runner PRIVATE adsprpc) +endif() diff --git a/backends/qualcomm/fastrpc/qnn_executor_runner.cpp b/backends/qualcomm/fastrpc/qnn_executor_runner.cpp new file mode 100644 index 00000000000..7e5b53e34da --- /dev/null +++ b/backends/qualcomm/fastrpc/qnn_executor_runner.cpp @@ -0,0 +1,148 @@ +/* + * Copyright (c) Qualcomm Innovation Center, Inc. + * All rights reserved. + * + * This source code is licensed under the BSD-style license found in the + * LICENSE file in the root directory of this source tree. + */ + +#include +#include +#include +#include + +#include +#include + +#include "qnn_executorch.h" + +DEFINE_string( + model_path, + "model.pte", + "Model serialized in flatbuffer format."); +DEFINE_string( + output_folder_path, + ".", + "Executorch inference data output path."); +DEFINE_string(input_list_path, "input_list.txt", "Model input list path."); + +int main(int argc, char** argv) { + gflags::ParseCommandLineFlags(&argc, &argv, true); + if (argc != 1) { + std::string msg = "extra commandline args:"; + for (int i = 1 /* skip argv[0] (program name) */; i < argc; i++) { + msg += std::string(" ") + argv[i]; + } + ET_LOG(Error, "%s", msg.c_str()); + return 1; + } + + // fastrpc related + // adsp + const int adsp_domain_id = 0; + // signed PD + const int enable_unsigned_pd = 0; + // domain uri + std::string domain_uri(qnn_executorch_URI); + domain_uri += "&_dom=adsp"; + // init session + struct remote_rpc_control_unsigned_module data; + data.domain = adsp_domain_id; + data.enable = enable_unsigned_pd; + int err = AEE_SUCCESS; + ET_CHECK_MSG( + AEE_SUCCESS == + (err = remote_session_control( + DSPRPC_CONTROL_UNSIGNED_MODULE, (void*)&data, sizeof(data))), + "remote_session_control failed: 0x%x", + err); + // start session + remote_handle64 handle = -1; + ET_CHECK_MSG( + AEE_SUCCESS == (err = qnn_executorch_open(domain_uri.data(), &handle)), + "qnn_executorch_open failed: 0x%x", + err); + // load model + const char* model_path = FLAGS_model_path.c_str(); + qnn_executorch_load(handle, model_path); + + // prepare io + std::vector> input_data, output_data; + std::vector input_tensor, output_tensor; + for (int i = 0;; ++i) { + int nbytes = 0; + qnn_executorch_get_input_size(handle, model_path, i, &nbytes); + if (nbytes == -1) { + break; + } + input_data.emplace_back(std::vector(nbytes)); + input_tensor.emplace_back( + tensor({input_data.back().data(), (int)input_data.back().size()})); + } + for (int i = 0;; ++i) { + int nbytes = 0; + qnn_executorch_get_output_size(handle, model_path, i, &nbytes); + if (nbytes == -1) { + break; + } + output_data.emplace_back(std::vector(nbytes)); + output_tensor.emplace_back( + tensor({output_data.back().data(), (int)output_data.back().size()})); + } + + // prepare input data + std::ifstream input_list(FLAGS_input_list_path); + // TODO: should check IO info via fastrpc first + if (input_list.is_open()) { + auto split = [](std::string s, std::string delimiter) { + size_t pos_start = 0, pos_end, delim_len = delimiter.length(); + std::string token; + std::vector res; + + while ((pos_end = s.find(delimiter, pos_start)) != std::string::npos) { + token = s.substr(pos_start, pos_end - pos_start); + pos_start = pos_end + delim_len; + res.push_back(token); + } + res.push_back(s.substr(pos_start)); + return res; + }; + + std::string file_path; + int inference_index = 0; + while (std::getline(input_list, file_path)) { + auto input_files = split(file_path, " "); + if (input_files.size() == 0) { + break; + } + size_t num_inputs = input_files.size(); + for (int i = 0; i < num_inputs; ++i) { + std::ifstream fin(input_files[i], std::ios::binary); + fin.seekg(0, fin.end); + size_t file_size = fin.tellg(); + fin.seekg(0, fin.beg); + fin.read((char*)input_data[i].data(), file_size); + fin.close(); + } + qnn_executorch_set_input( + handle, model_path, input_tensor.data(), input_tensor.size()); + qnn_executorch_execute(handle, model_path); + qnn_executorch_get_output( + handle, model_path, output_tensor.data(), output_tensor.size()); + for (size_t i = 0; i < output_tensor.size(); i++) { + auto output_file_name = FLAGS_output_folder_path + "/output_" + + std::to_string(inference_index) + "_" + std::to_string(i) + ".raw"; + std::ofstream fout(output_file_name.c_str(), std::ios::binary); + fout.write( + (const char*)output_tensor[i].data, output_tensor[i].dataLen); + fout.close(); + } + } + } + + // unload model + qnn_executorch_unload(handle, model_path); + // tear down + qnn_executorch_close(handle); + return 0; +} diff --git a/backends/qualcomm/fastrpc/qnn_executorch.idl b/backends/qualcomm/fastrpc/qnn_executorch.idl new file mode 100644 index 00000000000..fb1d5cc342f --- /dev/null +++ b/backends/qualcomm/fastrpc/qnn_executorch.idl @@ -0,0 +1,27 @@ +/* + * Copyright (c) Qualcomm Innovation Center, Inc. + * All rights reserved. + * + * This source code is licensed under the BSD-style license found in the + * LICENSE file in the root directory of this source tree. + */ + +#include "AEEStdDef.idl" +#include "remote.idl" + +/// Enabling stub-skel mismatch check feature in the auto-gen files. +/// Please refer to the IDL documentation for more details on the feature. +/// It is fully supported only on Kailua and later targets. +const string IDL_VERSION = "0.0.0"; + +typedef sequence tensor; + +interface qnn_executorch : remote_handle64 { + long load(in string pte_path); + long get_input_size(in string pte_path, in long index, rout long nbytes); + long set_input(in string pte_path, in sequence tensors); + long execute(in string pte_path); + long get_output_size(in string pte_path, in long index, rout long nbytes); + long get_output(in string pte_path, rout sequence tensors); + long unload(in string pte_path); +}; diff --git a/backends/qualcomm/fastrpc/qnn_executorch_impl.cpp b/backends/qualcomm/fastrpc/qnn_executorch_impl.cpp new file mode 100644 index 00000000000..5fafb3794a0 --- /dev/null +++ b/backends/qualcomm/fastrpc/qnn_executorch_impl.cpp @@ -0,0 +1,314 @@ +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include + +#include +#include "System/QnnSystemInterface.h" +#include "qnn_executorch.h" + +#include "HAP_farf.h" + +using executorch::aten::Tensor; +using executorch::aten::TensorImpl; +using executorch::extension::FileDataLoader; +using executorch::runtime::Error; +using executorch::runtime::HierarchicalAllocator; +using executorch::runtime::MemoryAllocator; +using executorch::runtime::MemoryManager; +using executorch::runtime::Method; +using executorch::runtime::MethodMeta; +using executorch::runtime::Program; +using executorch::runtime::Result; +using executorch::runtime::Span; + +class SimpleWrapper { + public: + SimpleWrapper(const char* pte_path) { + auto loader = FileDataLoader::from(pte_path, 256); + if (!loader.ok()) { + FARF( + RUNTIME_ERROR, + "FileDataLoader::from() failed: 0x%x", + (int)loader.error()); + return; + } + loader_ = std::make_unique(std::move(loader.get())); + + auto program = Program::load(loader_.get()); + if (!program.ok()) { + FARF(RUNTIME_ERROR, "failed to parse model file %s", pte_path); + return; + } + program_ = std::make_unique(std::move(program.get())); + + auto method_name = program_->get_method_name(0); + if (!method_name.ok()) { + FARF(RUNTIME_ERROR, "program has no methods"); + return; + } + FARF(RUNTIME_HIGH, "using method %s", *method_name); + + auto method_meta = program_->method_meta(*method_name); + if (!method_meta.ok()) { + FARF( + RUNTIME_ERROR, + "failed to get method_meta for %s: 0x%x", + *method_name, + (unsigned int)method_meta.error()); + return; + } + method_meta_ = std::make_unique(std::move(method_meta.get())); + + method_allocator_ = std::make_unique( + sizeof(method_allocator_pool_), method_allocator_pool_); + + for (size_t id = 0; id < method_meta_->num_memory_planned_buffers(); ++id) { + size_t buffer_size = static_cast( + method_meta->memory_planned_buffer_size(id).get()); + planned_buffers_.push_back(std::make_unique(buffer_size)); + planned_spans_.push_back({planned_buffers_.back().get(), buffer_size}); + } + planned_memory_ = std::make_unique( + Span>{planned_spans_.data(), planned_spans_.size()}); + + memory_manager_ = std::make_unique( + method_allocator_.get(), planned_memory_.get()); + + auto method = program_->load_method(*method_name, memory_manager_.get()); + if (!method.ok()) { + FARF( + RUNTIME_ERROR, + "loading of method %s failed with status 0x%x", + *method_name, + (int)method.error()); + } + method_ = std::make_unique(std::move(method.get())); + + input_tensors_.resize(method_->inputs_size()); + for (int i = 0; i < input_tensors_.size(); ++i) { + auto tensor_meta = method_meta_->input_tensor_meta(i); + input_tensors_[i].resize(padded_size(tensor_meta->nbytes())); + input_tensor_impls_.emplace_back(TensorImpl( + tensor_meta->scalar_type(), + tensor_meta->sizes().size(), + const_cast(tensor_meta->sizes().data()), + align_ptr(input_tensors_[i].data()), + const_cast( + tensor_meta->dim_order().data()))); + Error ret = method_->set_input(Tensor(&input_tensor_impls_.back()), i); + if (ret != Error::Ok) { + FARF(RUNTIME_ERROR, "failed to set input tensor: %d", (int)ret); + return; + } + } + output_tensors_.resize(method_->outputs_size()); + for (int i = 0; i < output_tensors_.size(); ++i) { + auto tensor_meta = method_meta_->output_tensor_meta(i); + output_tensors_[i].resize(padded_size(tensor_meta->nbytes())); + Error ret = method_->set_output_data_ptr( + align_ptr(output_tensors_[i].data()), tensor_meta->nbytes(), i); + if (ret != Error::Ok) { + FARF(RUNTIME_ERROR, "failed to set output tensor: %d", (int)ret); + return; + } + } + } + + size_t padded_size(size_t sz) { + size_t new_sz = alignment_ + sz; + return new_sz; + } + + void* align_ptr(void* ptr) { + void* addr = reinterpret_cast( + ((size_t)ptr + (alignment_ - 1)) & ~(alignment_ - 1)); + return addr; + } + + int get_input_size(const int index) { + if (index < input_tensors_.size()) { + auto tensor_meta = method_meta_->input_tensor_meta(index); + return tensor_meta.ok() ? tensor_meta->nbytes() : -1; + } + return -1; + } + + void set_input(int index, const tensor& t) { + if (padded_size(t.dataLen) > input_tensors_[index].size()) { + FARF( + RUNTIME_ERROR, + "input tensor %d size mismatched: %d vs %d", + index, + input_tensors_[index].size(), + t.dataLen); + return; + } + std::memcpy(align_ptr(input_tensors_[index].data()), t.data, t.dataLen); + } + + int get_output_size(const int index) { + if (index < output_tensors_.size()) { + auto tensor_meta = method_meta_->output_tensor_meta(index); + return tensor_meta.ok() ? tensor_meta->nbytes() : -1; + } + return -1; + } + + void get_output(int index, tensor& t) { + if (padded_size(t.dataLen) > output_tensors_[index].size()) { + FARF( + RUNTIME_ERROR, + "output tensor %d size mismatched: %d vs %d", + index, + output_tensors_[index].size(), + t.dataLen); + return; + } + std::memcpy(t.data, align_ptr(output_tensors_[index].data()), t.dataLen); + } + + void execute() { + Error status = method_->execute(); + if (status != Error::Ok) { + FARF( + RUNTIME_ERROR, + "Execution of method failed with status 0x%x", + (int)status); + } + } + + private: + uint8_t method_allocator_pool_[4 * 1024U]; + const size_t alignment_ = 64; + std::unique_ptr loader_; + std::unique_ptr planned_memory_; + std::unique_ptr method_meta_; + std::unique_ptr method_allocator_; + std::unique_ptr memory_manager_; + std::unique_ptr method_; + std::unique_ptr program_; + std::vector> planned_buffers_; + std::vector> planned_spans_; + std::vector> input_tensors_; + std::vector> output_tensors_; + std::vector input_tensor_impls_; + std::vector output_tensor_impls_; +}; + +std::unordered_map> + g_cached_request; + +AEEResult qnn_executorch_open(const char* uri, remote_handle64* h) { + FARF(RUNTIME_HIGH, __func__); + executorch::runtime::runtime_init(); + return 0; +} + +AEEResult qnn_executorch_close(remote_handle64 h) { + FARF(RUNTIME_HIGH, __func__); + g_cached_request.clear(); + return 0; +} + +AEEResult qnn_executorch_load(remote_handle64 _h, const char* pte_path) { + FARF(RUNTIME_HIGH, __func__); + std::string key(pte_path); + if (!g_cached_request.count(key)) { + g_cached_request[key] = std::make_unique(pte_path); + } + return 0; +} + +AEEResult qnn_executorch_get_input_size( + remote_handle64 _h, + const char* pte_path, + const int index, + int* nbytes) { + FARF(RUNTIME_HIGH, __func__); + std::string key(pte_path); + *nbytes = -1; + if (g_cached_request.count(key)) { + *nbytes = g_cached_request[key]->get_input_size(index); + } + return 0; +} + +AEEResult qnn_executorch_set_input( + remote_handle64 _h, + const char* pte_path, + const tensor* tensors, + int tensorsLen) { + FARF(RUNTIME_HIGH, __func__); + std::string key(pte_path); + if (g_cached_request.count(key)) { + auto& wrapper = g_cached_request[key]; + for (int i = 0; i < tensorsLen; ++i) { + wrapper->set_input(i, tensors[i]); + } + } + return 0; +} + +AEEResult qnn_executorch_execute(remote_handle64 _h, const char* pte_path) { + FARF(RUNTIME_HIGH, __func__); + std::string key(pte_path); + if (g_cached_request.count(key)) { + auto before_exec = std::chrono::high_resolution_clock::now(); + g_cached_request[key]->execute(); + auto after_exec = std::chrono::high_resolution_clock::now(); + double interval_infs = + std::chrono::duration_cast( + after_exec - before_exec) + .count() / + 1000.0; + FARF(RUNTIME_HIGH, "inferences took %f ms", interval_infs); + } + return 0; +} + +AEEResult qnn_executorch_get_output_size( + remote_handle64 _h, + const char* pte_path, + const int index, + int* nbytes) { + FARF(RUNTIME_HIGH, __func__); + std::string key(pte_path); + *nbytes = -1; + if (g_cached_request.count(key)) { + *nbytes = g_cached_request[key]->get_output_size(index); + } + return 0; +} + +AEEResult qnn_executorch_get_output( + remote_handle64 _h, + const char* pte_path, + tensor* tensors, + int tensorsLen) { + FARF(RUNTIME_HIGH, __func__); + std::string key(pte_path); + if (g_cached_request.count(key)) { + auto& wrapper = g_cached_request[key]; + for (int i = 0; i < tensorsLen; ++i) { + wrapper->get_output(i, tensors[i]); + } + } + return 0; +} + +AEEResult qnn_executorch_unload(remote_handle64 _h, const char* pte_path) { + FARF(RUNTIME_HIGH, __func__); + std::string key(pte_path); + if (g_cached_request.count(key)) { + g_cached_request.erase(key); + } + return 0; +} diff --git a/backends/qualcomm/runtime/Logging.cpp b/backends/qualcomm/runtime/Logging.cpp index acd39c52e08..44824bf31e8 100644 --- a/backends/qualcomm/runtime/Logging.cpp +++ b/backends/qualcomm/runtime/Logging.cpp @@ -11,6 +11,9 @@ #ifdef __ANDROID__ #include #endif +#ifdef __hexagon__ +#include "HAP_farf.h" +#endif namespace executorch { namespace backends { namespace qnn { @@ -58,10 +61,17 @@ void Log(QnnExecuTorchLogLevel log_level, const char* format, ...) { } __android_log_vprint(android_severity, "[Qnn ExecuTorch]", format, args); #endif +#ifndef __hexagon__ fprintf(stderr, "[%s] [Qnn ExecuTorch]: ", serverity_name); vfprintf(stderr, format, args); va_end(args); fputc('\n', stderr); +#else + char buf[128] = {0}; + vsprintf(buf, format, args); + va_end(args); + FARF(RUNTIME_HIGH, "[%s] [Qnn ExecuTorch]: %s\n", serverity_name, buf); +#endif } } // namespace qnn } // namespace backends diff --git a/backends/qualcomm/runtime/QnnExecuTorchBackend.cpp b/backends/qualcomm/runtime/QnnExecuTorchBackend.cpp index 41c2370e4cb..f0f9ac26d3c 100644 --- a/backends/qualcomm/runtime/QnnExecuTorchBackend.cpp +++ b/backends/qualcomm/runtime/QnnExecuTorchBackend.cpp @@ -113,7 +113,9 @@ Result QnnExecuTorchBackend::init( } add_cached_delegate(signature, qnn_manager); // This backend does not need its processed data after Init. +#ifndef __hexagon__ processed->Free(); +#endif return qnn_manager; } diff --git a/backends/qualcomm/runtime/Utils.cpp b/backends/qualcomm/runtime/Utils.cpp index f11e25c4ec2..b70040fc792 100644 --- a/backends/qualcomm/runtime/Utils.cpp +++ b/backends/qualcomm/runtime/Utils.cpp @@ -13,6 +13,7 @@ namespace backends { namespace qnn { void CreateDirectory(const std::string& path) { +#ifndef __hexagon__ // Create any recursive directory if (path.empty()) { QNN_EXECUTORCH_LOG_ERROR("Create folder shouldn't be empty"); @@ -29,6 +30,7 @@ void CreateDirectory(const std::string& path) { std::string err_msg = "Failed to create " + subdir + " folder\n"; QNN_EXECUTORCH_LOG_ERROR(err_msg.c_str()); } +#endif } } // namespace qnn diff --git a/backends/qualcomm/runtime/backends/CMakeLists.txt b/backends/qualcomm/runtime/backends/CMakeLists.txt index d0f025bfbaa..19298e3eb2f 100644 --- a/backends/qualcomm/runtime/backends/CMakeLists.txt +++ b/backends/qualcomm/runtime/backends/CMakeLists.txt @@ -43,13 +43,16 @@ target_sources( ${CMAKE_CURRENT_LIST_DIR}/QnnProfiler.cpp ) -set(HOST_ARCHITECTURE_GPU - ${CMAKE_CURRENT_LIST_DIR}/gpu/${CMAKE_SYSTEM_PROCESSOR} -) -set(HOST_ARCHITECTURE_HTP - ${CMAKE_CURRENT_LIST_DIR}/htp/${CMAKE_SYSTEM_PROCESSOR} -) -set(HOST_ARCHITECTURE_IR ${CMAKE_CURRENT_LIST_DIR}/ir/${CMAKE_SYSTEM_PROCESSOR}) +# quick workaround for hexagon target +set(target_platform aarch64) +if(${CMAKE_SYSTEM_PROCESSOR} MATCHES x86_64) + set(target_platform x86_64) +endif() + +set(HOST_ARCHITECTURE_GPU ${CMAKE_CURRENT_LIST_DIR}/gpu/${target_platform}) +set(HOST_ARCHITECTURE_HTP ${CMAKE_CURRENT_LIST_DIR}/htp/${target_platform}) +set(HOST_ARCHITECTURE_IR ${CMAKE_CURRENT_LIST_DIR}/ir/${target_platform}) +set(HOST_ARCHITECTURE_LPAI ${CMAKE_CURRENT_LIST_DIR}/lpai/${target_platform}) # qnn_device target_sources( @@ -57,6 +60,7 @@ target_sources( PUBLIC ${CMAKE_CURRENT_LIST_DIR}/QnnDeviceCommon.h ${CMAKE_CURRENT_LIST_DIR}/gpu/GpuDevice.h ${CMAKE_CURRENT_LIST_DIR}/htp/HtpDevice.h + ${CMAKE_CURRENT_LIST_DIR}/lpai/LpaiDevice.h PRIVATE ${CMAKE_CURRENT_LIST_DIR}/QnnDeviceCommon.cpp ${CMAKE_CURRENT_LIST_DIR}/htp/HtpDevice.cpp ${CMAKE_CURRENT_LIST_DIR}/htp/HtpDevicePlatformInfoConfig.h @@ -65,6 +69,7 @@ target_sources( # platform infomation and SocModel to Qnn ${HOST_ARCHITECTURE_HTP}/HtpDevicePlatformInfoConfig.cpp ${HOST_ARCHITECTURE_HTP}/HtpDeviceCustomConfig.cpp + ${HOST_ARCHITECTURE_LPAI}/LpaiDevice.cpp ) # qnn_context @@ -74,13 +79,17 @@ target_sources( ${CMAKE_CURRENT_LIST_DIR}/htp/HtpContext.h ${CMAKE_CURRENT_LIST_DIR}/ir/IrContext.h ${CMAKE_CURRENT_LIST_DIR}/gpu/GpuContext.h + ${CMAKE_CURRENT_LIST_DIR}/lpai/LpaiContext.h PRIVATE ${CMAKE_CURRENT_LIST_DIR}/QnnContextCommon.cpp - ${CMAKE_CURRENT_LIST_DIR}/htp/HtpContext.cpp - ${CMAKE_CURRENT_LIST_DIR}/htp/HtpContextCustomConfig.h ${CMAKE_CURRENT_LIST_DIR}/gpu/GpuContext.cpp ${CMAKE_CURRENT_LIST_DIR}/gpu/GpuContextCustomConfig.h + ${CMAKE_CURRENT_LIST_DIR}/htp/HtpContext.cpp + ${CMAKE_CURRENT_LIST_DIR}/htp/HtpContextCustomConfig.h + ${CMAKE_CURRENT_LIST_DIR}/lpai/LpaiContext.cpp + ${CMAKE_CURRENT_LIST_DIR}/lpai/LpaiContextCustomConfig.h ${HOST_ARCHITECTURE_GPU}/GpuContextCustomConfig.cpp ${HOST_ARCHITECTURE_HTP}/HtpContextCustomConfig.cpp + ${HOST_ARCHITECTURE_LPAI}/LpaiContextCustomConfig.cpp ${HOST_ARCHITECTURE_IR}/IrContext.cpp ) @@ -99,6 +108,7 @@ target_sources( PUBLIC ${CMAKE_CURRENT_LIST_DIR}/QnnGraphCommon.h ${CMAKE_CURRENT_LIST_DIR}/gpu/GpuGraph.h ${CMAKE_CURRENT_LIST_DIR}/htp/HtpGraph.h + ${CMAKE_CURRENT_LIST_DIR}/lpai/LpaiGraph.h PRIVATE ${CMAKE_CURRENT_LIST_DIR}/QnnGraphCommon.cpp ${CMAKE_CURRENT_LIST_DIR}/gpu/GpuGraph.cpp ${CMAKE_CURRENT_LIST_DIR}/gpu/GpuGraphCustomConfig.h @@ -107,6 +117,9 @@ target_sources( ${CMAKE_CURRENT_LIST_DIR}/htp/HtpGraphCustomConfig.h ${CMAKE_CURRENT_LIST_DIR}/htp/HtpGraphCustomConfig.cpp ${HOST_ARCHITECTURE_HTP}/HtpGraphCustomConfig.cpp + ${CMAKE_CURRENT_LIST_DIR}/lpai/LpaiGraphCustomConfig.h + ${HOST_ARCHITECTURE_LPAI}/LpaiGraph.cpp + ${HOST_ARCHITECTURE_LPAI}/LpaiGraphCustomConfig.cpp ) # qnn_op_package_manager @@ -123,10 +136,14 @@ target_sources( ${CMAKE_CURRENT_LIST_DIR}/gpu/GpuBackend.h ${CMAKE_CURRENT_LIST_DIR}/htp/HtpBackend.h ${CMAKE_CURRENT_LIST_DIR}/ir/IrBackend.h + ${CMAKE_CURRENT_LIST_DIR}/lpai/LpaiBackend.h PRIVATE ${CMAKE_CURRENT_LIST_DIR}/QnnBackendCommon.cpp ${CMAKE_CURRENT_LIST_DIR}/gpu/GpuBackend.cpp ${CMAKE_CURRENT_LIST_DIR}/gpu/GpuBackendCustomConfig.h ${CMAKE_CURRENT_LIST_DIR}/gpu/GpuBackendCustomConfig.cpp + ${CMAKE_CURRENT_LIST_DIR}/lpai/LpaiBackend.cpp + ${CMAKE_CURRENT_LIST_DIR}/lpai/LpaiBackendCustomConfig.h + ${CMAKE_CURRENT_LIST_DIR}/lpai/LpaiBackendCustomConfig.cpp ) # qnn_mem_manager diff --git a/backends/qualcomm/runtime/backends/QnnBackendCache.cpp b/backends/qualcomm/runtime/backends/QnnBackendCache.cpp index 3dd1738d33b..011ac68c33d 100644 --- a/backends/qualcomm/runtime/backends/QnnBackendCache.cpp +++ b/backends/qualcomm/runtime/backends/QnnBackendCache.cpp @@ -22,11 +22,10 @@ Error QnnBackendCache::GetQnnGraphInfoFromBinary( std::uint32_t num_graphs; QnnSystemContext_GraphInfo_t* graphs = nullptr; const QnnSystemContext_BinaryInfo_t* binaryinfo{nullptr}; - Qnn_ContextBinarySize_t binaryinfo_size = 0; Qnn_ErrorHandle_t error = QNN_SUCCESS; error = qnn_sys_interface.qnn_system_context_get_binary_info( - sys_context_handle_, buffer, nbytes, &binaryinfo, &binaryinfo_size); + sys_context_handle_, buffer, nbytes, &binaryinfo); if (error != QNN_SUCCESS) { QNN_EXECUTORCH_LOG_WARN( diff --git a/backends/qualcomm/runtime/backends/QnnBackendFactory.cpp b/backends/qualcomm/runtime/backends/QnnBackendFactory.cpp index 9c559d83fcc..9b5e22c1d64 100644 --- a/backends/qualcomm/runtime/backends/QnnBackendFactory.cpp +++ b/backends/qualcomm/runtime/backends/QnnBackendFactory.cpp @@ -119,7 +119,40 @@ std::unique_ptr QnnBackendFactory::Create( options->profile_level(), gpu_options); } break; - case QnnExecuTorchBackendType::kDspBackend: + case QnnExecuTorchBackendType::kLpaiBackend: { + auto lpai_options = options->backend_options()->lpai_options(); + if (options->log_level() >= QnnExecuTorchLogLevel::kLogLevelInfo) { + QNN_EXECUTORCH_LOG_INFO("fps in lpai_options: %d", lpai_options->fps()); + QNN_EXECUTORCH_LOG_INFO( + "ftrt_ratio in lpai_options: %d", lpai_options->ftrt_ratio()); + QNN_EXECUTORCH_LOG_INFO( + "client_perf_type in lpai_options: %s", + EnumNameQnnExecuTorchLpaiClientPerf( + lpai_options->client_perf_type())); + QNN_EXECUTORCH_LOG_INFO( + "affinity in lpai_options: %s", + QnnExecuTorchLpaiCoreAffinity(lpai_options->affinity())); + QNN_EXECUTORCH_LOG_INFO( + "core_selection in lpai_options: %d", + lpai_options->core_selection()); + } + backend_params->qnn_backend_cache_ptr_ = + std::make_unique(qnn_context_blob); + + backend_params->qnn_context_ptr_ = std::make_unique( + implementation_ptr, + qnn_backend_ptr, + qnn_device_ptr, + backend_params->qnn_backend_cache_ptr_.get(), + qnn_dlc_manager); + + backend_params->qnn_graph_ptr_ = std::make_unique( + implementation_ptr, + qnn_backend_ptr, + backend_params->qnn_context_ptr_.get(), + options->profile_level(), + lpai_options); + } break; case QnnExecuTorchBackendType::kUndefinedBackend: default: return nullptr; diff --git a/backends/qualcomm/runtime/backends/QnnBackendFactory.h b/backends/qualcomm/runtime/backends/QnnBackendFactory.h index c125d5ffca4..753d8cf3007 100644 --- a/backends/qualcomm/runtime/backends/QnnBackendFactory.h +++ b/backends/qualcomm/runtime/backends/QnnBackendFactory.h @@ -22,6 +22,8 @@ #include #include #include +#include +#include #include namespace executorch { diff --git a/backends/qualcomm/runtime/backends/QnnBackendUnifiedRegistry.cpp b/backends/qualcomm/runtime/backends/QnnBackendUnifiedRegistry.cpp index 8b1dcdf7a9d..35e58d22d78 100644 --- a/backends/qualcomm/runtime/backends/QnnBackendUnifiedRegistry.cpp +++ b/backends/qualcomm/runtime/backends/QnnBackendUnifiedRegistry.cpp @@ -13,6 +13,8 @@ #include #include #include +#include +#include #include @@ -56,7 +58,10 @@ Error QnnBackendUnifiedRegistry::GetOrCreateBackendBundle( current_lib_path = gpu_library_name_; break; } - case QnnExecuTorchBackendType::kDspBackend: + case QnnExecuTorchBackendType::kLpaiBackend: { + current_lib_path = lpai_library_name_; + break; + } case QnnExecuTorchBackendType::kUndefinedBackend: default: QNN_EXECUTORCH_LOG_ERROR( @@ -118,7 +123,12 @@ Error QnnBackendUnifiedRegistry::GetOrCreateBackendBundle( device = std::make_unique(implementation.get(), logger.get()); break; } - case QnnExecuTorchBackendType::kDspBackend: + case QnnExecuTorchBackendType::kLpaiBackend: { + backend = std::make_unique( + implementation.get(), logger.get(), options->soc_info()); + device = std::make_unique(implementation.get(), logger.get()); + break; + } case QnnExecuTorchBackendType::kUndefinedBackend: default: return Error::NotFound; diff --git a/backends/qualcomm/runtime/backends/QnnBackendUnifiedRegistry.h b/backends/qualcomm/runtime/backends/QnnBackendUnifiedRegistry.h index b2549a3356c..b401688ac59 100644 --- a/backends/qualcomm/runtime/backends/QnnBackendUnifiedRegistry.h +++ b/backends/qualcomm/runtime/backends/QnnBackendUnifiedRegistry.h @@ -73,7 +73,7 @@ class QnnBackendUnifiedRegistry { static constexpr const char* htp_library_name_ = "libQnnHtp.so"; static constexpr const char* gpu_library_name_ = "libQnnGpu.so"; - static constexpr const char* dsp_library_name_ = "libQnnDsp.so"; + static constexpr const char* lpai_library_name_ = "libQnnLpai.so"; std::unique_ptr GetImplementationConfig( const QnnExecuTorchOptions* options) { diff --git a/backends/qualcomm/runtime/backends/QnnCustomProtocol.cpp b/backends/qualcomm/runtime/backends/QnnCustomProtocol.cpp index b01d7ab6d80..bb0a77d033c 100644 --- a/backends/qualcomm/runtime/backends/QnnCustomProtocol.cpp +++ b/backends/qualcomm/runtime/backends/QnnCustomProtocol.cpp @@ -20,8 +20,7 @@ void QnnContextCustomProtocol::BuildContextCustomBuffer() { uint8_t magic_number_proto_size = sizeof(magic_number_); uint8_t binary_proto_size = sizeof(binary_size_); uint8_t signature_proto_size = sizeof(signature_); - uint64_t buffer_size = magic_number_proto_size + signature_proto_size + - binary_proto_size + binary_size_; + uint64_t buffer_size = alignment_ + binary_size_; qnn_custom_buffer_.resize(buffer_size, 0); size_t pos = 0; @@ -62,6 +61,8 @@ QnnContextCustomProtocol::DeserializeContextCustomBuffer(void* processed_data) { uint8_t magic_number_proto_size = sizeof(magic_number_); uint8_t binary_proto_size = sizeof(binary_size_); uint8_t signature_proto_size = sizeof(signature_); + uint32_t padding_size = alignment_ - magic_number_proto_size - + binary_proto_size - signature_proto_size; uint32_t magic_number; std::memcpy(&magic_number, ptr, magic_number_proto_size); @@ -80,13 +81,13 @@ QnnContextCustomProtocol::DeserializeContextCustomBuffer(void* processed_data) { uint64_t binary_size; std::memcpy(&binary_size, ptr, binary_proto_size); - ptr += binary_proto_size; + ptr += binary_proto_size + padding_size; return {status, signature_, binary_size, static_cast(ptr)}; } uint64_t QnnContextCustomProtocol::GetContextBinaryOffset() { - return sizeof(magic_number_) + sizeof(signature_) + sizeof(binary_size_); + return alignment_; } } // namespace qnn diff --git a/backends/qualcomm/runtime/backends/QnnCustomProtocol.h b/backends/qualcomm/runtime/backends/QnnCustomProtocol.h index 3cc6a6e25dc..4007c7ccabd 100644 --- a/backends/qualcomm/runtime/backends/QnnCustomProtocol.h +++ b/backends/qualcomm/runtime/backends/QnnCustomProtocol.h @@ -83,6 +83,7 @@ class QnnContextCustomProtocol : public QnnCustomProtocol { static constexpr uint32_t magic_number_ = 0x5678ABCD; int64_t signature_{0}; uint64_t binary_size_{0}; + uint32_t alignment_{256}; }; } // namespace qnn diff --git a/backends/qualcomm/runtime/backends/QnnFunctionInterface.h b/backends/qualcomm/runtime/backends/QnnFunctionInterface.h index 0e1e4727aa3..2a49505a672 100644 --- a/backends/qualcomm/runtime/backends/QnnFunctionInterface.h +++ b/backends/qualcomm/runtime/backends/QnnFunctionInterface.h @@ -65,6 +65,8 @@ class QnnInterface { DEFINE_SHIM_FUNCTION_INTERFACE(graph_finalize, graphFinalize); DEFINE_SHIM_FUNCTION_INTERFACE(graph_execute, graphExecute); DEFINE_SHIM_FUNCTION_INTERFACE(graph_retrieve, graphRetrieve); + DEFINE_SHIM_FUNCTION_INTERFACE(graph_set_config, graphSetConfig); + DEFINE_SHIM_FUNCTION_INTERFACE(graph_get_property, graphGetProperty); // --------- QnnLog --------- DEFINE_SHIM_FUNCTION_INTERFACE(log_create, logCreate); DEFINE_SHIM_FUNCTION_INTERFACE(log_free, logFree); diff --git a/backends/qualcomm/runtime/backends/QnnGraphCommon.h b/backends/qualcomm/runtime/backends/QnnGraphCommon.h index fbb5ab80140..ea83bfcab1b 100644 --- a/backends/qualcomm/runtime/backends/QnnGraphCommon.h +++ b/backends/qualcomm/runtime/backends/QnnGraphCommon.h @@ -34,7 +34,7 @@ class QnnGraph { virtual ~QnnGraph(){}; - executorch::runtime::Error Configure(const std::string& graph_name); + virtual executorch::runtime::Error Configure(const std::string& graph_name); Qnn_ErrorHandle_t GraphExecute( const std::string& graph_name, @@ -81,10 +81,10 @@ class QnnGraph { std::vector& config) { return executorch::runtime::Error::Ok; }; - - private: std::unordered_map handle_; QnnImplementation* implementation_; + + private: QnnBackend* backend_; QnnContext* context_; QnnExecuTorchProfileLevel profile_level_; diff --git a/backends/qualcomm/runtime/backends/QnnImplementation.cpp b/backends/qualcomm/runtime/backends/QnnImplementation.cpp index 246800791e6..7900b5cc5e3 100644 --- a/backends/qualcomm/runtime/backends/QnnImplementation.cpp +++ b/backends/qualcomm/runtime/backends/QnnImplementation.cpp @@ -25,6 +25,7 @@ struct DlCloser { Error QnnImplementation::InitBackend( void* const lib_handle, const QnnSaver_Config_t** saver_config) { +#ifndef __hexagon__ Qnn_ErrorHandle_t error = QNN_SUCCESS; // saver_config must be set before backend initialization auto saver_initialize = @@ -39,6 +40,7 @@ Error QnnImplementation::InitBackend( return Error::Internal; } } +#endif return Error::Ok; } @@ -50,6 +52,10 @@ const QnnInterface_t* QnnImplementation::StartBackend( const std::string& lib_path, const QnnSaver_Config_t** saver_config) { Qnn_ErrorHandle_t error = QNN_SUCCESS; +#ifdef __hexagon__ + std::unique_ptr lib_handle( + dlopen(lib_path.c_str(), RTLD_NOW | RTLD_GLOBAL)); +#else // If the library is already loaded, return the handle. std::unique_ptr lib_handle( dlopen(lib_path.c_str(), RTLD_NOW | RTLD_NOLOAD)); @@ -57,6 +63,7 @@ const QnnInterface_t* QnnImplementation::StartBackend( lib_handle = std::unique_ptr( dlopen(lib_path.c_str(), RTLD_NOW | RTLD_GLOBAL)); } +#endif if (lib_handle == nullptr) { QNN_EXECUTORCH_LOG_ERROR( "Cannot Open QNN library %s, with error: %s", diff --git a/backends/qualcomm/runtime/backends/QnnSysFunctionInterface.h b/backends/qualcomm/runtime/backends/QnnSysFunctionInterface.h index b77c7c2903e..28c3ed733f4 100644 --- a/backends/qualcomm/runtime/backends/QnnSysFunctionInterface.h +++ b/backends/qualcomm/runtime/backends/QnnSysFunctionInterface.h @@ -40,7 +40,7 @@ class QnnSystemInterface { systemContextCreate); DEFINE_SHIM_FUNCTION_SYS_INTERFACE( system_context_get_binary_info, - systemContextGetBinaryInfo); + systemContextGetMetaData); DEFINE_SHIM_FUNCTION_SYS_INTERFACE(system_context_free, systemContextFree); private: diff --git a/backends/qualcomm/runtime/backends/lpai/LpaiBackend.cpp b/backends/qualcomm/runtime/backends/lpai/LpaiBackend.cpp new file mode 100644 index 00000000000..c5d7294492b --- /dev/null +++ b/backends/qualcomm/runtime/backends/lpai/LpaiBackend.cpp @@ -0,0 +1,62 @@ +/* + * Copyright (c) Qualcomm Innovation Center, Inc. + * All rights reserved. + * + * This source code is licensed under the BSD-style license found in the + * LICENSE file in the root directory of this source tree. + */ + +#include + +#include "LPAI/QnnLpaiCommon.h" + +namespace executorch { +namespace backends { +namespace qnn { + +using executorch::runtime::Error; + +LpaiBackend::LpaiBackend( + QnnImplementation* implementation, + QnnLogger* logger, + const SocInfo* soc_info) + : QnnBackend(implementation, logger) { + lpai_backend_custom_config_ = + std::make_unique(soc_info); +} + +Qnn_Version_t LpaiBackend::GetExpectedBackendVersion() const { + Qnn_Version_t backend_version; + backend_version.major = QNN_LPAI_API_VERSION_MAJOR; + backend_version.minor = QNN_LPAI_API_VERSION_MINOR; + backend_version.patch = QNN_LPAI_API_VERSION_PATCH; + return backend_version; +} + +bool LpaiBackend::IsProfileEventTypeParentOfNodeTime( + QnnProfile_EventType_t event_type) { + return (event_type == QNN_PROFILE_EVENTTYPE_EXECUTE); +} + +Error LpaiBackend::MakeConfig(std::vector& config) { + const std::vector& backend_custom_config = + lpai_backend_custom_config_->CreateBackendCustomConfig(); + + uint32_t num_custom_configs = backend_custom_config.size(); + backend_config_.resize(num_custom_configs); + // +1 for null terminated + config.reserve(num_custom_configs + 1); + + for (std::size_t i = 0; i < num_custom_configs; ++i) { + backend_config_[i].option = QNN_BACKEND_CONFIG_OPTION_CUSTOM; + backend_config_[i].customConfig = backend_custom_config[i]; + config.push_back(&backend_config_[i]); + } + + config.push_back(nullptr); + return Error::Ok; +} + +} // namespace qnn +} // namespace backends +} // namespace executorch diff --git a/backends/qualcomm/runtime/backends/lpai/LpaiBackend.h b/backends/qualcomm/runtime/backends/lpai/LpaiBackend.h new file mode 100644 index 00000000000..1b5b0ffa779 --- /dev/null +++ b/backends/qualcomm/runtime/backends/lpai/LpaiBackend.h @@ -0,0 +1,40 @@ +/* + * Copyright (c) Qualcomm Innovation Center, Inc. + * All rights reserved. + * + * This source code is licensed under the BSD-style license found in the + * LICENSE file in the root directory of this source tree. + */ + +#pragma once + +#include + +namespace executorch { +namespace backends { +namespace qnn { + +class LpaiBackend : public QnnBackend { + public: + LpaiBackend( + QnnImplementation* implementation, + QnnLogger* logger, + const SocInfo* soc_info); + + Qnn_Version_t GetExpectedBackendVersion() const override; + + bool IsProfileEventTypeParentOfNodeTime( + QnnProfile_EventType_t event_type) override; + + protected: + executorch::runtime::Error MakeConfig( + std::vector& config) override; + + private: + std::vector backend_config_; + std::unique_ptr lpai_backend_custom_config_; +}; + +} // namespace qnn +} // namespace backends +} // namespace executorch diff --git a/backends/qualcomm/runtime/backends/lpai/LpaiBackendCustomConfig.cpp b/backends/qualcomm/runtime/backends/lpai/LpaiBackendCustomConfig.cpp new file mode 100644 index 00000000000..30647f61211 --- /dev/null +++ b/backends/qualcomm/runtime/backends/lpai/LpaiBackendCustomConfig.cpp @@ -0,0 +1,66 @@ +/* + * Copyright (c) Qualcomm Innovation Center, Inc. + * All rights reserved. + * + * This source code is licensed under the BSD-style license found in the + * LICENSE file in the root directory of this source tree. + */ + +#include + +namespace executorch { +namespace backends { +namespace qnn { + +LpaiBackendCustomConfig::LpaiBackendCustomConfig(const SocInfo* soc_info) + : soc_info_(soc_info) {} + +QnnLpaiBackend_CustomConfig_t* +LpaiBackendCustomConfig::AllocBackendCustomConfig() { + lpai_backend_config_.emplace_back( + std::make_unique()); + lpai_backend_config_.back()->option = QNN_LPAI_BACKEND_CUSTOM_CFG_UNDEFINED; + return lpai_backend_config_.back().get(); +} + +QnnLpaiBackend_CustomConfigHwInfo_t* LpaiBackendCustomConfig::AllocHwInfo() { + lpai_hw_info_.emplace_back( + std::make_unique()); + lpai_hw_info_.back()->hwVersion = QNN_LPAI_BACKEND_HW_VERSION_UNKNOWN; + lpai_hw_info_.back()->lpaiTarget = QNN_LPAI_BACKEND_TARGET_UNKNOWN; + return lpai_hw_info_.back().get(); +} + +std::vector +LpaiBackendCustomConfig::CreateBackendCustomConfig() { + std::vector ret; + QnnLpaiBackend_CustomConfig_t* p_custom_config = nullptr; + + std::unordered_map + lpai_hw_ver = { + {LpaiHardwareVersion::V1, QNN_LPAI_BACKEND_HW_VERSION_V1}, + {LpaiHardwareVersion::V2, QNN_LPAI_BACKEND_HW_VERSION_V2}, + {LpaiHardwareVersion::V3, QNN_LPAI_BACKEND_HW_VERSION_V3}, + {LpaiHardwareVersion::V4, QNN_LPAI_BACKEND_HW_VERSION_V4}, + {LpaiHardwareVersion::V5, QNN_LPAI_BACKEND_HW_VERSION_V5}, + {LpaiHardwareVersion::V5_1, QNN_LPAI_BACKEND_HW_VERSION_V5_1}, + {LpaiHardwareVersion::V6, QNN_LPAI_BACKEND_HW_VERSION_V6}, + {LpaiHardwareVersion::V7, QNN_LPAI_BACKEND_HW_VERSION_V7}, + }; + + p_custom_config = AllocBackendCustomConfig(); + auto p_hw_info = AllocHwInfo(); + p_custom_config->option = QNN_LPAI_BACKEND_CUSTOM_CFG_HW_INFO; + auto lpai_info = soc_info_->lpai_info(); + if (lpai_info && lpai_hw_ver.count(lpai_info->lpai_hardware_version())) { + p_hw_info->hwVersion = lpai_hw_ver[lpai_info->lpai_hardware_version()]; + } + p_hw_info->lpaiTarget = QNN_LPAI_BACKEND_TARGET_ADSP; + p_custom_config->config = p_hw_info; + ret.push_back(static_cast(p_custom_config)); + return ret; +} + +} // namespace qnn +} // namespace backends +} // namespace executorch diff --git a/backends/qualcomm/runtime/backends/lpai/LpaiBackendCustomConfig.h b/backends/qualcomm/runtime/backends/lpai/LpaiBackendCustomConfig.h new file mode 100644 index 00000000000..3abbbaa8508 --- /dev/null +++ b/backends/qualcomm/runtime/backends/lpai/LpaiBackendCustomConfig.h @@ -0,0 +1,45 @@ +/* + * Copyright (c) Qualcomm Innovation Center, Inc. + * All rights reserved. + * + * This source code is licensed under the BSD-style license found in the + * LICENSE file in the root directory of this source tree. + */ + +#pragma once + +#include +#include + +#include +#include +#include + +#include "LPAI/QnnLpaiBackend.h" + +namespace executorch { +namespace backends { +namespace qnn { + +using namespace qnn_delegate; + +class LpaiBackendCustomConfig { + public: + explicit LpaiBackendCustomConfig(const SocInfo* soc_info); + + std::vector CreateBackendCustomConfig(); + + private: + QnnLpaiBackend_CustomConfig_t* AllocBackendCustomConfig(); + std::vector> + lpai_backend_config_; + const SocInfo* soc_info_; + + std::vector> + lpai_hw_info_; + QnnLpaiBackend_CustomConfigHwInfo_t* AllocHwInfo(); +}; + +} // namespace qnn +} // namespace backends +} // namespace executorch diff --git a/backends/qualcomm/runtime/backends/lpai/LpaiContext.cpp b/backends/qualcomm/runtime/backends/lpai/LpaiContext.cpp new file mode 100644 index 00000000000..d5203898f6b --- /dev/null +++ b/backends/qualcomm/runtime/backends/lpai/LpaiContext.cpp @@ -0,0 +1,56 @@ +/* + * Copyright (c) Qualcomm Innovation Center, Inc. + * All rights reserved. + * + * This source code is licensed under the BSD-style license found in the + * LICENSE file in the root directory of this source tree. + */ + +#include + +namespace executorch { +namespace backends { +namespace qnn { + +using executorch::runtime::Error; + +LpaiContext::LpaiContext( + QnnImplementation* implementation, + QnnBackend* backend, + QnnDevice* device, + QnnBackendCache* cache, + QnnDlcManager* qnn_dlc_manager) + : QnnContext(implementation, backend, device, cache, qnn_dlc_manager) { + lpai_context_custom_config_ = std::make_unique(); +} + +Error LpaiContext::MakeConfig(std::vector& config) { + const std::vector& context_custom_config = + lpai_context_custom_config_->CreateContextCustomConfig(); + + uint32_t num_custom_configs = context_custom_config.size(); + context_config_.resize(num_custom_configs); + // +1 for null terminated + config.reserve(num_custom_configs + 1); + + for (std::size_t i = 0; i < num_custom_configs; ++i) { + context_config_[i].option = QNN_CONTEXT_CONFIG_OPTION_CUSTOM; + context_config_[i].customConfig = context_custom_config[i]; + config.push_back(&context_config_[i]); + } + +#ifdef __hexagon__ + QnnContext_Config_t adsp_context_config; + adsp_context_config.option = QNN_CONTEXT_CONFIG_PERSISTENT_BINARY; + adsp_context_config.isPersistentBinary = 1; + context_config_.push_back(adsp_context_config); + config.push_back(&context_config_.back()); +#endif + + config.push_back(nullptr); + return Error::Ok; +} + +} // namespace qnn +} // namespace backends +} // namespace executorch diff --git a/backends/qualcomm/runtime/backends/lpai/LpaiContext.h b/backends/qualcomm/runtime/backends/lpai/LpaiContext.h new file mode 100644 index 00000000000..b05dac469bf --- /dev/null +++ b/backends/qualcomm/runtime/backends/lpai/LpaiContext.h @@ -0,0 +1,38 @@ +/* + * Copyright (c) Qualcomm Innovation Center, Inc. + * All rights reserved. + * + * This source code is licensed under the BSD-style license found in the + * LICENSE file in the root directory of this source tree. + */ + +#pragma once + +#include + +namespace executorch { +namespace backends { +namespace qnn { + +class QnnDlcManager; +class LpaiContext : public QnnContext { + public: + LpaiContext( + QnnImplementation* implementation, + QnnBackend* backend, + QnnDevice* device, + QnnBackendCache* cache, + QnnDlcManager* qnn_dlc_manager); + + protected: + executorch::runtime::Error MakeConfig( + std::vector& config) override; + + private: + std::vector context_config_; + std::unique_ptr lpai_context_custom_config_; +}; + +} // namespace qnn +} // namespace backends +} // namespace executorch diff --git a/backends/qualcomm/runtime/backends/lpai/LpaiContextCustomConfig.h b/backends/qualcomm/runtime/backends/lpai/LpaiContextCustomConfig.h new file mode 100644 index 00000000000..d415eeb51df --- /dev/null +++ b/backends/qualcomm/runtime/backends/lpai/LpaiContextCustomConfig.h @@ -0,0 +1,44 @@ +/* + * Copyright (c) Qualcomm Innovation Center, Inc. + * All rights reserved. + * + * This source code is licensed under the BSD-style license found in the + * LICENSE file in the root directory of this source tree. + */ + +#pragma once + +#include +#include + +#include +#include + +#include "LPAI/QnnLpaiContext.h" + +namespace executorch { +namespace backends { +namespace qnn { + +using namespace qnn_delegate; + +class LpaiContextCustomConfig { + public: + explicit LpaiContextCustomConfig() {} + + std::vector CreateContextCustomConfig(); + + private: + QnnLpaiContext_CustomConfig_t* AllocContextCustomConfig() { + lpai_context_config_.emplace_back( + std::make_unique()); + lpai_context_config_.back()->option = QNN_LPAI_CONTEXT_SET_CFG_UNDEFINED; + return lpai_context_config_.back().get(); + } + std::vector> + lpai_context_config_; +}; + +} // namespace qnn +} // namespace backends +} // namespace executorch diff --git a/backends/qualcomm/runtime/backends/lpai/LpaiDevice.h b/backends/qualcomm/runtime/backends/lpai/LpaiDevice.h new file mode 100644 index 00000000000..14edcb2d66a --- /dev/null +++ b/backends/qualcomm/runtime/backends/lpai/LpaiDevice.h @@ -0,0 +1,31 @@ +/* + * Copyright (c) Qualcomm Innovation Center, Inc. + * All rights reserved. + * + * This source code is licensed under the BSD-style license found in the + * LICENSE file in the root directory of this source tree. + */ +#pragma once + +#include + +namespace executorch { +namespace backends { +namespace qnn { + +using executorch::runtime::Error; + +class LpaiDevice : public QnnDevice { + public: + LpaiDevice(QnnImplementation* implementation, QnnLogger* logger) + : QnnDevice(implementation, logger){}; + + executorch::runtime::Error Configure() override; + + private: + std::vector device_config_; +}; + +} // namespace qnn +} // namespace backends +} // namespace executorch diff --git a/backends/qualcomm/runtime/backends/lpai/LpaiGraph.h b/backends/qualcomm/runtime/backends/lpai/LpaiGraph.h new file mode 100644 index 00000000000..cf1cce90295 --- /dev/null +++ b/backends/qualcomm/runtime/backends/lpai/LpaiGraph.h @@ -0,0 +1,81 @@ +/* + * Copyright (c) Qualcomm Innovation Center, Inc. + * All rights reserved. + * + * This source code is licensed under the BSD-style license found in the + * LICENSE file in the root directory of this source tree. + */ + +#pragma once + +#include + +namespace executorch { +namespace backends { +namespace qnn { + +class LpaiGraph : public QnnGraph { + public: + LpaiGraph( + QnnImplementation* implementation, + QnnBackend* backend, + QnnContext* context, + const QnnExecuTorchProfileLevel& profile_level, + const QnnExecuTorchLpaiBackendOptions* lpai_options) + : QnnGraph(implementation, backend, context, profile_level) { + lpai_graph_custom_config_ = + std::make_unique(lpai_options, this); + }; + + executorch::runtime::Error Configure(const std::string& graph_name) override { + Error configure_status = QnnGraph::Configure(graph_name); + if (configure_status != Error::Ok) { + return configure_status; + } + const std::vector& graph_custom_config = + lpai_graph_custom_config_->CreateGraphCustomConfig(graph_name); + + std::vector config; + uint32_t num_custom_configs = graph_custom_config.size(); + graph_config_.resize(num_custom_configs); + // +1 for null terminated + config.reserve(num_custom_configs + 1); + + for (std::size_t i = 0; i < num_custom_configs; ++i) { + graph_config_[i].option = QNN_GRAPH_CONFIG_OPTION_CUSTOM; + graph_config_[i].customConfig = graph_custom_config[i]; + config.push_back(&graph_config_[i]); + } + config.push_back(nullptr); + + // LPAI specific > configs can only be set after graph create + const QnnInterface& qnn_interface = implementation_->GetQnnInterface(); + Qnn_ErrorHandle_t error = + qnn_interface.qnn_graph_set_config(handle_[graph_name], config.data()); + if (error != QNN_SUCCESS) { + QNN_EXECUTORCH_LOG_ERROR( + "qnn_graph_set_config failed. Error %d", QNN_GET_ERROR_CODE(error)); + return Error::Internal; + } + + // platform specific behavior + return AfterConfigure(graph_name); + } + + friend LpaiGraphCustomConfig; + + protected: + executorch::runtime::Error MakeConfig( + std::vector& config) override { + return {}; + } + + private: + executorch::runtime::Error AfterConfigure(const std::string& graph_name); + std::vector graph_config_; + std::unique_ptr lpai_graph_custom_config_; +}; + +} // namespace qnn +} // namespace backends +} // namespace executorch diff --git a/backends/qualcomm/runtime/backends/lpai/LpaiGraphCustomConfig.h b/backends/qualcomm/runtime/backends/lpai/LpaiGraphCustomConfig.h new file mode 100644 index 00000000000..67c1bca1279 --- /dev/null +++ b/backends/qualcomm/runtime/backends/lpai/LpaiGraphCustomConfig.h @@ -0,0 +1,94 @@ +/* + * Copyright (c) Qualcomm Innovation Center, Inc. + * All rights reserved. + * + * This source code is licensed under the BSD-style license found in the + * LICENSE file in the root directory of this source tree. + */ + +#pragma once + +#include +#include + +#include +#include +#include + +#include "LPAI/QnnLpaiGraph.h" +#include "LPAI/QnnLpaiGraphPrepare.h" + +namespace executorch { +namespace backends { +namespace qnn { + +using namespace qnn_delegate; + +class LpaiGraph; +class LpaiGraphCustomConfig { + public: + explicit LpaiGraphCustomConfig( + const QnnExecuTorchLpaiBackendOptions* lpai_options, + LpaiGraph* graph) + : graph_(graph), lpai_options_(lpai_options){}; + + std::vector CreateGraphCustomConfig( + const std::string& graph_name); + + private: + [[maybe_unused]] LpaiGraph* graph_; + [[maybe_unused]] const QnnExecuTorchLpaiBackendOptions* lpai_options_; + + std::vector> lpai_mem_; + QnnLpaiGraph_Mem_t* AllocMem() { + lpai_mem_.emplace_back(std::make_unique()); + lpai_mem_.back()->memType = QNN_LPAI_MEM_TYPE_UNDEFINED; + lpai_mem_.back()->size = 0; + lpai_mem_.back()->addr = nullptr; + return lpai_mem_.back().get(); + } + + std::vector> lpai_graph_config_; + QnnLpaiGraph_CustomConfig_t* AllocGraphCustomConfig() { + lpai_graph_config_.emplace_back( + std::make_unique()); + lpai_graph_config_.back()->option = QNN_LPAI_GRAPH_SET_CFG_UNDEFINED; + return lpai_graph_config_.back().get(); + } + + std::vector> lpai_perf_cfg_; + QnnLpaiGraph_PerfCfg_t* AllocPerfCfg() { + lpai_perf_cfg_.emplace_back(std::make_unique()); + lpai_perf_cfg_.back()->fps = 0; + lpai_perf_cfg_.back()->ftrtRatio = 0; + lpai_perf_cfg_.back()->clientType = + QNN_LPAI_GRAPH_CLIENT_PERF_TYPE_UNDEFINED; + return lpai_perf_cfg_.back().get(); + } + + std::vector> lpai_core_affinity_; + QnnLpaiGraph_CoreAffinity_t* AllocCoreAffinity() { + lpai_core_affinity_.emplace_back( + std::make_unique()); + lpai_core_affinity_.back()->affinity = + QNN_LPAI_GRAPH_CORE_AFFINITY_UNDEFINED; + lpai_core_affinity_.back()->coreSelection = 0; + return lpai_core_affinity_.back().get(); + } + + std::vector> + lpai_prepare_; + QnnLpaiGraph_CustomConfigPrepare_t* AllocPrepare() { + lpai_prepare_.emplace_back( + std::make_unique()); + lpai_prepare_.back()->enablePerLayer = 0; + lpai_prepare_.back()->enableCoreSelection = nullptr; + return lpai_prepare_.back().get(); + } + + std::vector scratch_buf_, persistent_buf_; +}; + +} // namespace qnn +} // namespace backends +} // namespace executorch diff --git a/backends/qualcomm/runtime/backends/lpai/aarch64/LpaiContextCustomConfig.cpp b/backends/qualcomm/runtime/backends/lpai/aarch64/LpaiContextCustomConfig.cpp new file mode 100644 index 00000000000..3f578ff0636 --- /dev/null +++ b/backends/qualcomm/runtime/backends/lpai/aarch64/LpaiContextCustomConfig.cpp @@ -0,0 +1,32 @@ +/* + * Copyright (c) Qualcomm Innovation Center, Inc. + * All rights reserved. + * + * This source code is licensed under the BSD-style license found in the + * LICENSE file in the root directory of this source tree. + */ + +#include + +namespace executorch { +namespace backends { +namespace qnn { + +std::vector +LpaiContextCustomConfig::CreateContextCustomConfig() { + std::vector ret; +#ifndef __hexagon__ + QnnLpaiContext_CustomConfig_t* p_custom_config = nullptr; + + // TODO: support graph based execution + p_custom_config = AllocContextCustomConfig(); + p_custom_config->option = QNN_LPAI_CONTEXT_SET_CFG_ENABLE_ISLAND; + p_custom_config->config = nullptr; + ret.push_back(static_cast(p_custom_config)); +#endif + return ret; +} + +} // namespace qnn +} // namespace backends +} // namespace executorch diff --git a/backends/qualcomm/runtime/backends/lpai/aarch64/LpaiDevice.cpp b/backends/qualcomm/runtime/backends/lpai/aarch64/LpaiDevice.cpp new file mode 100644 index 00000000000..4bcba99f5e3 --- /dev/null +++ b/backends/qualcomm/runtime/backends/lpai/aarch64/LpaiDevice.cpp @@ -0,0 +1,25 @@ +/* + * Copyright (c) Qualcomm Innovation Center, Inc. + * All rights reserved. + * + * This source code is licensed under the BSD-style license found in the + * LICENSE file in the root directory of this source tree. + */ + +#include + +namespace executorch { +namespace backends { +namespace qnn { + +Error LpaiDevice::Configure() { +#ifndef __hexagon__ + return QnnDevice::Configure(); +#else + return Error::Ok; +#endif +} + +} // namespace qnn +} // namespace backends +} // namespace executorch diff --git a/backends/qualcomm/runtime/backends/lpai/aarch64/LpaiGraph.cpp b/backends/qualcomm/runtime/backends/lpai/aarch64/LpaiGraph.cpp new file mode 100644 index 00000000000..8ed8df998e3 --- /dev/null +++ b/backends/qualcomm/runtime/backends/lpai/aarch64/LpaiGraph.cpp @@ -0,0 +1,30 @@ +/* + * Copyright (c) Qualcomm Innovation Center, Inc. + * All rights reserved. + * + * This source code is licensed under the BSD-style license found in the + * LICENSE file in the root directory of this source tree. + */ + +#include + +namespace executorch { +namespace backends { +namespace qnn { + +Error LpaiGraph::AfterConfigure(const std::string& graph_name) { + // LPAI does not support online prepare and require graph to be finalized + // again + Qnn_ErrorHandle_t error = GraphFinalize(graph_name); + if (error != QNN_SUCCESS) { + QNN_EXECUTORCH_LOG_ERROR( + "Failed to finalize Qnn Graph with error: %d", + QNN_GET_ERROR_CODE(error)); + return Error::Internal; + } + return Error::Ok; +} + +} // namespace qnn +} // namespace backends +} // namespace executorch diff --git a/backends/qualcomm/runtime/backends/lpai/aarch64/LpaiGraphCustomConfig.cpp b/backends/qualcomm/runtime/backends/lpai/aarch64/LpaiGraphCustomConfig.cpp new file mode 100644 index 00000000000..1d0f231b99e --- /dev/null +++ b/backends/qualcomm/runtime/backends/lpai/aarch64/LpaiGraphCustomConfig.cpp @@ -0,0 +1,95 @@ +/* + * Copyright (c) Qualcomm Innovation Center, Inc. + * All rights reserved. + * + * This source code is licensed under the BSD-style license found in the + * LICENSE file in the root directory of this source tree. + */ + +#include + +namespace executorch { +namespace backends { +namespace qnn { + +std::vector +LpaiGraphCustomConfig::CreateGraphCustomConfig(const std::string& graph_name) { + std::vector configs; + QnnLpaiGraph_CustomConfig_t* p_custom_config = nullptr; + +#ifdef __hexagon__ + uint32_t scratch_size = 0; + uint32_t persistent_size = 0; + QnnLpaiGraph_CustomProperty_t custom_props[2]; + custom_props[0].option = QNN_LPAI_GRAPH_GET_PROP_SCRATCH_MEM_SIZE; + custom_props[0].property = &scratch_size; + custom_props[1].option = QNN_LPAI_GRAPH_GET_PROP_PERSISTENT_MEM_SIZE; + custom_props[1].property = &persistent_size; + + QnnGraph_Property_t graph_props[2]; + graph_props[0].option = QNN_GRAPH_PROPERTY_OPTION_CUSTOM; + graph_props[0].customProperty = &custom_props[0]; + graph_props[1].option = QNN_GRAPH_PROPERTY_OPTION_CUSTOM; + graph_props[1].customProperty = &custom_props[1]; + QnnGraph_Property_t* graph_prop_ptrs[3] = {0}; + graph_prop_ptrs[0] = &graph_props[0]; + graph_prop_ptrs[1] = &graph_props[1]; + + const QnnInterface& qnn_interface = + graph_->implementation_->GetQnnInterface(); + Qnn_ErrorHandle_t error = qnn_interface.qnn_graph_get_property( + graph_->handle_[graph_name], graph_prop_ptrs); + + if (error != QNN_SUCCESS) { + QNN_EXECUTORCH_LOG_ERROR( + "failed to get graph property: %d", QNN_GET_ERROR_CODE(error)); + return {}; + } + + scratch_buf_.resize(scratch_size); + p_custom_config = AllocGraphCustomConfig(); + p_custom_config->option = QNN_LPAI_GRAPH_SET_CFG_SCRATCH_MEM; + auto p_scratch_config = AllocMem(); + p_scratch_config->memType = QNN_LPAI_MEM_TYPE_DDR; + p_scratch_config->size = scratch_size; + p_scratch_config->addr = scratch_buf_.data(); + p_custom_config->config = p_scratch_config; + configs.push_back(p_custom_config); + + persistent_buf_.resize(persistent_size); + p_custom_config = AllocGraphCustomConfig(); + p_custom_config->option = QNN_LPAI_GRAPH_SET_CFG_PERSISTENT_MEM_DEFAULT; + auto p_persistent_config = AllocMem(); + p_persistent_config->memType = QNN_LPAI_MEM_TYPE_DDR; + p_persistent_config->size = persistent_size; + p_persistent_config->addr = persistent_buf_.data(); + p_custom_config->config = p_persistent_config; + configs.push_back(p_custom_config); + // TODO: figure out how to add perf control (internal enum required) + // e.g. QNN_LPAI_GRAPH_SET_ENPU_CLOCK +#endif + // perf config + p_custom_config = AllocGraphCustomConfig(); + auto p_perf_cfg = AllocPerfCfg(); + p_custom_config->option = QNN_LPAI_GRAPH_SET_CFG_PERF_CFG; + p_perf_cfg->fps = lpai_options_->fps(); + p_perf_cfg->ftrtRatio = lpai_options_->ftrt_ratio(); + p_perf_cfg->clientType = static_cast( + lpai_options_->client_perf_type()); + p_custom_config->config = p_perf_cfg; + configs.push_back(p_custom_config); + // core affinity + p_custom_config = AllocGraphCustomConfig(); + auto p_core_affinity = AllocCoreAffinity(); + p_custom_config->option = QNN_LPAI_GRAPH_SET_CFG_CORE_AFFINITY; + p_core_affinity->affinity = + static_cast(lpai_options_->affinity()); + p_core_affinity->coreSelection = lpai_options_->core_selection(); + p_custom_config->config = p_core_affinity; + configs.push_back(p_custom_config); + return configs; +} + +} // namespace qnn +} // namespace backends +} // namespace executorch diff --git a/backends/qualcomm/runtime/backends/lpai/x86_64/LpaiContextCustomConfig.cpp b/backends/qualcomm/runtime/backends/lpai/x86_64/LpaiContextCustomConfig.cpp new file mode 100644 index 00000000000..7f6ece6e1df --- /dev/null +++ b/backends/qualcomm/runtime/backends/lpai/x86_64/LpaiContextCustomConfig.cpp @@ -0,0 +1,22 @@ +/* + * Copyright (c) Qualcomm Innovation Center, Inc. + * All rights reserved. + * + * This source code is licensed under the BSD-style license found in the + * LICENSE file in the root directory of this source tree. + */ + +#include + +namespace executorch { +namespace backends { +namespace qnn { + +std::vector +LpaiContextCustomConfig::CreateContextCustomConfig() { + return {}; +} + +} // namespace qnn +} // namespace backends +} // namespace executorch diff --git a/backends/qualcomm/runtime/backends/lpai/x86_64/LpaiDevice.cpp b/backends/qualcomm/runtime/backends/lpai/x86_64/LpaiDevice.cpp new file mode 100644 index 00000000000..739159e95b2 --- /dev/null +++ b/backends/qualcomm/runtime/backends/lpai/x86_64/LpaiDevice.cpp @@ -0,0 +1,21 @@ +/* + * Copyright (c) Qualcomm Innovation Center, Inc. + * All rights reserved. + * + * This source code is licensed under the BSD-style license found in the + * LICENSE file in the root directory of this source tree. + */ + +#include + +namespace executorch { +namespace backends { +namespace qnn { + +Error LpaiDevice::Configure() { + return Error::Ok; +} + +} // namespace qnn +} // namespace backends +} // namespace executorch diff --git a/backends/qualcomm/runtime/backends/lpai/x86_64/LpaiGraph.cpp b/backends/qualcomm/runtime/backends/lpai/x86_64/LpaiGraph.cpp new file mode 100644 index 00000000000..391bbcc83fc --- /dev/null +++ b/backends/qualcomm/runtime/backends/lpai/x86_64/LpaiGraph.cpp @@ -0,0 +1,21 @@ +/* + * Copyright (c) Qualcomm Innovation Center, Inc. + * All rights reserved. + * + * This source code is licensed under the BSD-style license found in the + * LICENSE file in the root directory of this source tree. + */ + +#include + +namespace executorch { +namespace backends { +namespace qnn { + +Error LpaiGraph::AfterConfigure(const std::string& graph_name) { + return Error::Ok; +} + +} // namespace qnn +} // namespace backends +} // namespace executorch diff --git a/backends/qualcomm/runtime/backends/lpai/x86_64/LpaiGraphCustomConfig.cpp b/backends/qualcomm/runtime/backends/lpai/x86_64/LpaiGraphCustomConfig.cpp new file mode 100644 index 00000000000..5720fb19997 --- /dev/null +++ b/backends/qualcomm/runtime/backends/lpai/x86_64/LpaiGraphCustomConfig.cpp @@ -0,0 +1,32 @@ +/* + * Copyright (c) Qualcomm Innovation Center, Inc. + * All rights reserved. + * + * This source code is licensed under the BSD-style license found in the + * LICENSE file in the root directory of this source tree. + */ + +#include + +namespace executorch { +namespace backends { +namespace qnn { + +std::vector +LpaiGraphCustomConfig::CreateGraphCustomConfig(const std::string& graph_name) { + std::vector configs; + QnnLpaiGraph_CustomConfig_t* p_custom_config = nullptr; + + p_custom_config = AllocGraphCustomConfig(); + auto p_core_prepare = AllocPrepare(); + static char core_selection = lpai_options_->core_selection() + '0'; + p_custom_config->option = QNN_LPAI_GRAPH_SET_CFG_PREPARE; + p_core_prepare->enableCoreSelection = &core_selection; + p_custom_config->config = p_core_prepare; + configs.push_back(static_cast(p_custom_config)); + return configs; +} + +} // namespace qnn +} // namespace backends +} // namespace executorch diff --git a/backends/qualcomm/scripts/build.sh b/backends/qualcomm/scripts/build.sh index b8f366d2f7c..e8fd342bc61 100755 --- a/backends/qualcomm/scripts/build.sh +++ b/backends/qualcomm/scripts/build.sh @@ -44,6 +44,8 @@ BUILD_ANDROID="true" CMAKE_ANDROID="build-android" BUILD_OE_LINUX="false" CMAKE_OE_LINUX="build-oe-linux" +BUILD_HEXAGON="false" +CMAKE_HEXAGON="build-hexagon" CLEAN="true" BUILD_TYPE="RelWithDebInfo" BUILD_JOB_NUMBER="16" @@ -56,7 +58,7 @@ if [ -z BUCK2 ]; then BUCK2="buck2" fi -long_options=skip_x86_64,skip_linux_android,skip_linux_embedded,enable_linux_embedded,no_clean,release,job_number: +long_options=skip_x86_64,skip_linux_android,skip_linux_embedded,skip_hexagon,enable_linux_embedded,enable_hexagon,no_clean,release,job_number: parsed_args=$(getopt -a --options '' --longoptions $long_options --name "$0" -- "$@") eval set -- "$parsed_args" @@ -68,6 +70,8 @@ while true ; do --skip_linux_android) BUILD_ANDROID="false"; shift;; --skip_linux_embedded) BUILD_OE_LINUX="false"; shift;; --enable_linux_embedded) BUILD_ANDROID="false"; BUILD_OE_LINUX="true"; shift;; + --skip_hexagon) BUILD_HEXAGON="false"; shift;; + --enable_hexagon) BUILD_HEXAGON="true"; shift;; --no_clean) CLEAN="false"; shift;; --release) BUILD_TYPE="Release"; shift;; --job_number) BUILD_JOB_NUMBER="$2"; shift 2;; @@ -239,6 +243,56 @@ if [ "$BUILD_OE_LINUX" = true ]; then cmake --build $LLAMA_EXAMPLE_ROOT -j$BUILD_JOB_NUMBER fi +if [ "$BUILD_HEXAGON" = true ]; then + if [[ -z ${ANDROID_NDK_ROOT} ]]; then + echo "Please export ANDROID_NDK_ROOT=/path/to/android_ndkXX" + exit -1 + fi + if [[ -z ${HEXAGON_SDK_ROOT} ]]; then + echo "Please export HEXAGON_SDK_ROOT=/path/to/hexagon-sdk-x.x.x" + exit -1 + fi + if [[ -z ${HEXAGON_TOOLS_ROOT} ]]; then + echo "Please export HEXAGON_TOOLS_ROOT=/path/to/hexagon-sdk-x.x.x/tools/HEXAGON_Tools/x.x.x" + exit -1 + fi + if [[ -z ${HEXAGON_ARCH} ]]; then + echo "Please export HEXAGON_ARCH=xx. e.g. SM8750=v79" + exit -1 + fi + + BUILD_ROOT=$PRJ_ROOT/$CMAKE_HEXAGON + if [ "$CLEAN" = true ]; then + rm -rf $BUILD_ROOT && mkdir $BUILD_ROOT + else + # force rebuild flatccrt for the correct platform + cd $BUILD_ROOT/third-party/flatcc && make clean + fi + cd $BUILD_ROOT + cmake .. \ + -DCMAKE_INSTALL_PREFIX=$BUILD_ROOT \ + -DCMAKE_BUILD_TYPE=$BUILD_TYPE \ + -DEXECUTORCH_BUILD_QNN=ON \ + -DEXECUTORCH_BUILD_PTHREADPOOL=OFF \ + -DEXECUTORCH_BUILD_EXECUTOR_RUNNER=OFF \ + -DEXECUTORCH_BUILD_EXTENSION_MODULE=ON \ + -DEXECUTORCH_BUILD_EXTENSION_DATA_LOADER=ON \ + -DEXECUTORCH_BUILD_EXTENSION_FLAT_TENSOR=ON \ + -DEXECUTORCH_BUILD_EXTENSION_NAMED_DATA_MAP=ON \ + -DEXECUTORCH_BUILD_EXTENSION_TENSOR=ON \ + -DEXECUTORCH_ENABLE_LOGGING=ON \ + -DFLATCC_ALLOW_WERROR=OFF \ + -DQNN_SDK_ROOT=$QNN_SDK_ROOT \ + -DHEXAGON_SDK_ROOT=$HEXAGON_SDK_ROOT \ + -DHEXAGON_TOOLS_ROOT=$HEXAGON_TOOLS_ROOT \ + -DHEXAGON_ARCH=$HEXAGON_ARCH \ + -DCMAKE_TOOLCHAIN_FILE=$HEXAGON_SDK_ROOT/build/cmake/hexagon_toolchain.cmake \ + -DPYTHON_EXECUTABLE=$PYTHON_EXECUTABLE \ + -B$BUILD_ROOT + + cmake --build $BUILD_ROOT -j$BUILD_JOB_NUMBER --target install +fi + if [ "$BUILD_X86_64" = true ]; then BUILD_ROOT=$PRJ_ROOT/$CMAKE_X86_64 if [ "$CLEAN" = true ]; then diff --git a/backends/qualcomm/scripts/lpai_utils.sh b/backends/qualcomm/scripts/lpai_utils.sh new file mode 100755 index 00000000000..13c8f8c2430 --- /dev/null +++ b/backends/qualcomm/scripts/lpai_utils.sh @@ -0,0 +1,90 @@ +#!/bin/bash +# Copyright (c) Qualcomm Innovation Center, Inc. +# All rights reserved +# +# This source code is licensed under the BSD-style license found in the +# LICENSE file in the root directory of this source tree. + +if [[ -z $HEXAGON_SDK_ROOT || -z $QNN_SDK_ROOT ]]; then + echo "please export HEXAGON_SDK_ROOT and QNN_SDK_ROOT" + exit -1 +fi + +usage() { echo "usage: $0 [--serial abc] [--workspace /data/tmp/local/xxx] [--direct] [--lpai v6] [--hexagon v81] [--artifact /path/to/artifacts]" 1>&2; exit 1; } + +short=H:,s:,w:,l:,x:,a:,d,h, +long=host:,serial:,workspace:,lpai:,hexagon:,artifact:,direct,help +args=$(getopt -a -o $short -l $long -n $0 -- $@) +eval set -- $args + +host="" +serial="" +workspace="" +mode="hlos" +lpai="" +hexagon="" +artifact="" +while true; do + case $1 in + -H | --host) host=$2; shift 2;; + -s | --serial) serial=$2; shift 2;; + -w | --workspace) workspace=$2; shift 2;; + -l | --lpai) lpai=$2; shift 2;; + -x | --hexagon) hexagon=$2; shift 2;; + -a | --artifact) artifact=$2; shift 2;; + -d | --direct) mode="direct"; shift;; + -h | --help) usage;; + --) shift; break;; + *) echo "unknown keyword: $1"; usage;; + esac +done + +if [[ -z $lpai ]]; then + echo "please specify lpai version" + usage +elif [[ $mode == "direct" && -z $workspace ]]; then + echo "please provide device serial and workspace while using direct mode" + usage +fi + +signed_folder=$QNN_SDK_ROOT/lib/lpai-$lpai/signed +signer=$HEXAGON_SDK_ROOT/tools/elfsigner/elfsigner.py +mkdir -p $signed_folder + +if [[ $mode == "hlos" ]]; then + yes 2>/dev/null | python $signer -i $QNN_SDK_ROOT/lib/lpai-$lpai/unsigned/libQnnLpaiSkel.so -o $signed_folder +else + if [[ -z $hexagon ]]; then + echo "please specify hexagon arch" + fi + adb_args="" + if [[ ! -z $host ]]; then + adb_args="$adb_args -H $host" + fi + if [[ ! -z $serial ]]; then + adb_args="$adb_args -s $serial" + fi + adb $adb_args shell mkdir -p $workspace + yes 2>/dev/null | python $signer -i $QNN_SDK_ROOT/lib/lpai-$lpai/unsigned/libQnnLpai.so -o $signed_folder + yes 2>/dev/null | python $signer -i $QNN_SDK_ROOT/lib/hexagon-$hexagon/unsigned/libQnnSystem.so -o $signed_folder + yes 2>/dev/null | python $signer -i $HEXAGON_TOOLS_ROOT/Tools/target/hexagon/lib/$hexagon/G0/pic/libc++.so.1.0 -o $signed_folder + yes 2>/dev/null | python $signer -i build-hexagon/backends/qualcomm/qnn_executorch/fastrpc/libqnn_executorch_skel.so -o $signed_folder + yes 2>/dev/null | python $signer -i build-hexagon/backends/qualcomm/libqnn_executorch_backend.so -o $signed_folder + if [[ ! -z $artifact ]]; then + adb $adb_args push $(find $QNN_SDK_ROOT/lib/lpai-$lpai/signed/ -name 'lib*' ! -name '*LpaiSkel*') $workspace + adb $adb_args push build-android/backends/qualcomm/qnn_executorch/fastrpc/libqnn_executorch_stub.so $workspace + adb $adb_args push build-android/backends/qualcomm/qnn_executorch/fastrpc/qnn_executor_runner $workspace + adb $adb_args shell "cd $workspace && rm -f libc++.so.1 && ln -s libc++.so.1.0 libc++.so.1" + pte=$(find $artifact -type f -name "*.pte") + input_list=$(find $artifact -type f -name "input*.txt") + input_data=$(find $artifact -type f -name "input*.raw") + output_folder=output_direct + adb $adb_args shell "rm -rf $workspace/$output_folder && mkdir -p $workspace/$output_folder" + adb $adb_args push $pte $input_list $input_data $workspace + adb $adb_args shell "cd $workspace && \ + export LD_LIBRARY_PATH=. && export ADSP_LIBRARY_PATH=. \ + && echo 0x0C > qnn_executor_runner.farf && logcat -c \ + && ./qnn_executor_runner --model_path $(basename $pte) --output_folder_path $output_folder" + adb $adb_args pull $workspace/$output_folder $artifact + fi +fi diff --git a/backends/qualcomm/scripts/sqnr_verifier.py b/backends/qualcomm/scripts/sqnr_verifier.py new file mode 100644 index 00000000000..77ecfd97097 --- /dev/null +++ b/backends/qualcomm/scripts/sqnr_verifier.py @@ -0,0 +1,38 @@ +import argparse + +import numpy as np +import torch +from torchao.quantization.utils import compute_error + +parser = argparse.ArgumentParser() +parser.add_argument( + "-g", + "--golden", + nargs="+", + type=str, +) +parser.add_argument( + "-o", + "--output", + nargs="+", + type=str, +) +parser.add_argument( + "-d", + "--dtype", + type=str, +) +parser.add_argument( + "-e", + "--encoding", + type=str, +) +args = parser.parse_args() +golden = [np.fromfile(f, dtype=np.float32) for f in args.golden] +output = [np.fromfile(f, dtype=eval(f"np.{args.dtype}")) for f in args.output] + +with open(args.encoding, "r") as f: + for i, g in enumerate(golden): + enc = [float(x) for x in f.readline().split()] + o = torch.from_numpy(output[i]).to(torch.float).sub(enc[1]).mul(enc[0]) + print(f"SQNR_{i}: {compute_error(torch.from_numpy(g), o)}") diff --git a/backends/qualcomm/serialization/qc_compiler_spec.fbs b/backends/qualcomm/serialization/qc_compiler_spec.fbs index c75c5cb3662..56e5af0c7b6 100644 --- a/backends/qualcomm/serialization/qc_compiler_spec.fbs +++ b/backends/qualcomm/serialization/qc_compiler_spec.fbs @@ -29,6 +29,24 @@ table HtpInfo { vtcm_size_in_mb:uint; } +/// Defines the LPAI hardware architecture available for LPAI backend. +enum LpaiHardwareVersion: int { + NONE = 0, + V1, + V2, + V3, + V4, + V5, + V5_1 = 0x10005, + V6 = 6, + V7, +} + +table LpaiInfo { + /// Represent the LPAI hardware version + lpai_hardware_version:LpaiHardwareVersion; +} + /// You could refer to Qualcomm AI Engine Direct SDK /// to get SoC Model in supported snapdragon devices enum QcomChipset: int { @@ -60,6 +78,9 @@ table SocInfo { /// Identifies the htp information of the specified SoC. htp_info:HtpInfo; + + /// Identifies the lpai information of the specified SoC. + lpai_info:LpaiInfo; } /// Defines performance modes available for GPU backend. @@ -133,7 +154,7 @@ enum QnnExecuTorchBackendType: int { kUndefinedBackend = 0, kGpuBackend, kHtpBackend, - kDspBackend, + kLpaiBackend, } /// Defines pd sessions available for HTP backend. @@ -181,7 +202,43 @@ table QnnExecuTorchHtpBackendOptions { use_weight_sharing:bool; } -/// Logging level of the delegate and QNN backend. +/// Real-time: Indicates that the model is intended for real-time use cases, where a specific performance threshold must be met. +/// Non-real-time: Refers to models without strict performance requirements. +enum QnnExecuTorchLpaiClientPerf: int { + /// this is the limit of old version flatbuffer + kUndefined = 0, + kRealTime, + kNonRealTime, +} + +/// Offloaded Ops shall be executed on core with affinity specified. +enum QnnExecuTorchLpaiCoreAffinity: int { + /// this is the limit of old version flatbuffer + kUndefined = 0, + kSoft, + kHard, +} + +/// Specifies the backend options for the LPAI backend. +table QnnExecuTorchLpaiBackendOptions { + /// Specifies how frequently inference must be completed. + fps:int; + + /// Determines the hardware configuration to meet the latency requirement for inference. + ftrt_ratio:int; + + /// Refers to models w/wo strict performance requirements. + client_perf_type:QnnExecuTorchLpaiClientPerf; + + /// Offloaded Ops shall be executed on core with affinity specified. + affinity:QnnExecuTorchLpaiCoreAffinity; + + /// Select which core for execution + core_selection:int; +} + +/// Determines the hardware configuration to meet the latency requirement for inference. +/// To ensure inference completes within this reduced time window, the eNPU must be boosted. enum QnnExecuTorchLogLevel: int { kLogOff = 0, kLogLevelError, @@ -251,6 +308,8 @@ table QnnExecuTorchBackendOptions { htp_options:QnnExecuTorchHtpBackendOptions; gpu_options:QnnExecuTorchGpuBackendOptions; + + lpai_options:QnnExecuTorchLpaiBackendOptions; } table QnnExecuTorchOptions { diff --git a/backends/qualcomm/serialization/qc_schema.py b/backends/qualcomm/serialization/qc_schema.py index 7967e80d18b..24c45c5f3e3 100644 --- a/backends/qualcomm/serialization/qc_schema.py +++ b/backends/qualcomm/serialization/qc_schema.py @@ -9,7 +9,7 @@ """ from dataclasses import dataclass, field -from enum import IntEnum, unique +from enum import auto, IntEnum, unique from typing import List, Optional @@ -36,6 +36,25 @@ class HtpInfo: vtcm_size_in_mb: int = 0 +@unique +class LpaiHardwareVersion(IntEnum): + NONE = 0 + V1 = auto() + V2 = auto() + V3 = auto() + V4 = auto() + V5 = auto() + V5_1 = 0x10005 + V6 = 6 + V7 = auto() + + +@dataclass +class LpaiInfo: + lpai_hardware_version: LpaiHardwareVersion = LpaiHardwareVersion.NONE + # TODO: see if we need to expose num_cores of adsp + + @unique class QcomChipset(IntEnum): UNKNOWN_SM = 0 @@ -63,6 +82,7 @@ class QcomChipset(IntEnum): class SocInfo: soc_model: QcomChipset = QcomChipset.UNKNOWN_SM htp_info: HtpInfo = field(default_factory=HtpInfo) + lpai_info: Optional[LpaiInfo] = None _soc_info_table = { @@ -73,15 +93,21 @@ class SocInfo: QcomChipset.SM8550: SocInfo(QcomChipset.SM8550, HtpInfo(HtpArch.V73, 8)), QcomChipset.SA8255: SocInfo(QcomChipset.SA8255, HtpInfo(HtpArch.V73, 8)), QcomChipset.SM8650: SocInfo(QcomChipset.SM8650, HtpInfo(HtpArch.V75, 8)), - QcomChipset.SM8750: SocInfo(QcomChipset.SM8750, HtpInfo(HtpArch.V79, 8)), - QcomChipset.SM8850: SocInfo(QcomChipset.SM8850, HtpInfo(HtpArch.V81, 8)), + QcomChipset.SM8750: SocInfo( + QcomChipset.SM8750, HtpInfo(HtpArch.V79, 8), LpaiInfo(LpaiHardwareVersion.V5) + ), + QcomChipset.SM8850: SocInfo( + QcomChipset.SM8850, HtpInfo(HtpArch.V81, 8), LpaiInfo(LpaiHardwareVersion.V6) + ), QcomChipset.SSG2115P: SocInfo(QcomChipset.SSG2115P, HtpInfo(HtpArch.V73, 2)), QcomChipset.SSG2125P: SocInfo(QcomChipset.SSG2125P, HtpInfo(HtpArch.V73, 2)), QcomChipset.SXR1230P: SocInfo(QcomChipset.SXR1230P, HtpInfo(HtpArch.V73, 2)), QcomChipset.SXR2230P: SocInfo(QcomChipset.SXR2230P, HtpInfo(HtpArch.V69, 8)), QcomChipset.SXR2330P: SocInfo(QcomChipset.SXR2330P, HtpInfo(HtpArch.V79, 8)), QcomChipset.QCS9100: SocInfo(QcomChipset.QCS9100, HtpInfo(HtpArch.V73, 8)), - QcomChipset.SAR2230P: SocInfo(QcomChipset.SAR2230P, HtpInfo(HtpArch.V81, 4)), + QcomChipset.SAR2230P: SocInfo( + QcomChipset.SAR2230P, HtpInfo(HtpArch.V81, 4), LpaiInfo(LpaiHardwareVersion.V6) + ), QcomChipset.SW6100: SocInfo(QcomChipset.SW6100, HtpInfo(HtpArch.V81, 4)), QcomChipset.QCM6490: SocInfo(QcomChipset.QCM6490, HtpInfo(HtpArch.V68, 2)), } @@ -146,7 +172,7 @@ class QnnExecuTorchBackendType(IntEnum): kUndefinedBackend = 0 kGpuBackend = 1 kHtpBackend = 2 - kDspBackend = 3 + kLpaiBackend = 3 @dataclass @@ -164,6 +190,31 @@ class QnnExecuTorchHtpBackendOptions: use_weight_sharing: bool = False +@unique +class QnnExecuTorchLpaiClientPerf(IntEnum): + kUndefined = 0 + kRealTime = 1 + kNonRealTime = 2 + + +@unique +class QnnExecuTorchLpaiCoreAffinity(IntEnum): + kUndefined = 0 + kSoft = 1 + kHard = 2 + + +@dataclass +class QnnExecuTorchLpaiBackendOptions: + fps: int = 1 + ftrt_ratio: int = 10 + client_perf_type: QnnExecuTorchLpaiClientPerf = ( + QnnExecuTorchLpaiClientPerf.kRealTime + ) + affinity: QnnExecuTorchLpaiCoreAffinity = QnnExecuTorchLpaiCoreAffinity.kSoft + core_selection: int = 0 + + @unique class QnnExecuTorchLogLevel(IntEnum): kLogOff = 0 @@ -187,6 +238,7 @@ class QnnExecuTorchBackendOptions: backend_type: QnnExecuTorchBackendType htp_options: Optional[QnnExecuTorchHtpBackendOptions] = None gpu_options: Optional[QnnExecuTorchGpuBackendOptions] = None + lpai_options: Optional[QnnExecuTorchLpaiBackendOptions] = None @unique diff --git a/backends/qualcomm/tests/test_qnn_delegate.py b/backends/qualcomm/tests/test_qnn_delegate.py index c57dbbcc332..50e0be974fb 100644 --- a/backends/qualcomm/tests/test_qnn_delegate.py +++ b/backends/qualcomm/tests/test_qnn_delegate.py @@ -51,6 +51,7 @@ from_context_binary, generate_gpu_compiler_spec, generate_htp_compiler_spec, + generate_lpai_compiler_spec, generate_qnn_executorch_compiler_spec, is_qnn_sdk_version_less_than, PyQnnManagerAdaptor, @@ -2088,9 +2089,15 @@ def test_qnn_backend_example_models(self): class TestQNNQuantizedOperator(TestQNN): # TODO: refactor to support different backends def setUp(self): + match self.get_backend_type(): + case QnnExecuTorchBackendType.kHtpBackend: + backend_options = generate_htp_compiler_spec(use_fp16=False) + case QnnExecuTorchBackendType.kLpaiBackend: + backend_options = generate_lpai_compiler_spec() + case _: + raise ValueError("Backend is not implemented yet") TestQNN.atol = 1e-1 TestQNN.rtol = 1 - backend_options = generate_htp_compiler_spec(use_fp16=False) TestQNN.compiler_specs = generate_qnn_executorch_compiler_spec( soc_model=self.chipset_table[TestQNN.model], backend_options=backend_options, @@ -2474,6 +2481,90 @@ def test_qnn_backend_conv1d(self): module = self.get_qdq_module(module, sample_input) self.lower_module_and_test_output(module, sample_input) + def test_qnn_backend_lpai(self): + from executorch.backends.qualcomm._passes.build_quant_io import BuildQuantIo + from executorch.backends.qualcomm.utils.constants import ( + QCOM_DTYPE, + QCOM_QUANT_ATTRS, + ) + from executorch.exir.capture._config import ExecutorchBackendConfig + from executorch.exir.passes.memory_planning_pass import MemoryPlanningPass + + """ fp_module = Conv2dSequential() # noqa: F405 + input_shape = (1, 1, 3, 3) + sample_input = (torch.randn(input_shape),) """ + """ fp_module = torch.load( + "/local2/mnt/workspace/executorch_artifacts/meta_models/four_class.pt", + weights_only=False, + ).eval() + input_shape = (1,1,372,496) + sample_input = (torch.randn(input_shape),) """ + fp_module = torch.load( + "/local2/mnt/workspace/executorch_artifacts/meta_models/ocr_haptic.pt", + weights_only=False, + ).eval() + input_shape = (1,1,372,496) + sample_input = (torch.randn(input_shape),) + """ fp_module = Add() # noqa: F405 + input_shape = (1, 1, 256, 256) + sample_input = (torch.randn(input_shape), torch.randn(input_shape)) """ + with torch.no_grad(): + module = self.get_qdq_module( + fp_module, + sample_input, + quant_dtype=QuantDtype.use_8a8w, + ) + # strip unsupported quantize / dequantize ops generated in preprocess + pass_jobs = get_capture_program_passes() + pass_jobs[TagQuantIO][QCOM_PASS_ACTIVATE_KEY] = True + pass_jobs[TagQuantIO][QCOM_PASS_ARGS_KWARGS_DEFAULTS_KEY][ + "get_quant_io_dtype_fn" + ] = lambda n: ( + torch.uint8 + # if n.name in {"x", "aten_convolution_default_1"} + if n.name in { + "x_1", "aten_permute_copy_default", "aten_permute_copy_default_1" + } + # if n.name in {"x", "y", "aten_add_tensor"} + else None + ) + edge_prog_mgr = to_edge_transform_and_lower_to_qnn( + module, + sample_input, + self.compiler_specs, + passes_job=pass_jobs, + ) + + # collect encodings for ios + input_encodings, output_encodings = [], [] + for n in edge_prog_mgr.exported_program().graph.nodes: + if n.op == "placeholder": + input_encodings.append(n.meta[QCOM_QUANT_ATTRS]) + input_encodings[-1][QCOM_DTYPE] = torch.uint8 + elif n.op == "output": + output_encodings = n.meta[QCOM_QUANT_ATTRS_MAP].values() + for output_encoding in output_encodings: + output_encoding[QCOM_DTYPE] = torch.uint8 + + exec_prog = edge_prog_mgr.to_executorch( + ExecutorchBackendConfig( + passes=[BuildQuantIo()], + memory_planning_pass=MemoryPlanningPass( + alloc_graph_input=False, + alloc_graph_output=False, + ), + segment_alignment=256, + ) + ) + self.verify_output( + fp_module, + sample_input, + exec_prog, + input_encodings=tuple(input_encodings), + output_encodings=tuple(output_encodings), + artifact_dir=self.artifact_dir, + ) + def test_qnn_backend_conv2d(self): modules = [Conv2dSequential(), Conv2dSequential(bias=False)] # noqa: F405 sample_input = (torch.randn([1, 1, 3, 3]),) @@ -8906,8 +8997,8 @@ def setup_environment(): ) parser.add_argument( "--backend", - help="Backend to be deployed ('htp'/'gpu' are currently supported).", - choices=["htp", "gpu"], + help="Backend to be deployed ('htp'/'gpu'/'lpai' are currently supported).", + choices=["htp", "gpu", "lpai"], default="htp", type=str, ) diff --git a/backends/qualcomm/tests/utils.py b/backends/qualcomm/tests/utils.py index f4b9339e1c2..a05d01d765e 100644 --- a/backends/qualcomm/tests/utils.py +++ b/backends/qualcomm/tests/utils.py @@ -160,6 +160,7 @@ class TestQNN(unittest.TestCase): compiler_specs: List[CompileSpec] = None chipset_table = get_soc_to_chipset_map() error_only = False + oss_repo: str = "" ip = "localhost" port = 8080 executorch_root: str = "" @@ -168,6 +169,7 @@ class TestQNN(unittest.TestCase): qa_dataset: str = "" sentence_dataset: str = "" pretrained_weight: str = "" + model_name: str = "" enable_profile: bool = False op_package_dir: str = "" target: str = "" @@ -216,6 +218,9 @@ def _save_model_and_expected_output( else: ref_outputs.append(ref_output.detach()) + for i, output in enumerate(ref_output): + output.numpy().tofile(f"{dir_name}/golden_{0}_{i}.raw") + pte_fname = f"{dir_name}/qnn_executorch_test.pte" with open(pte_fname, "wb") as file: file.write(buffer) @@ -253,8 +258,12 @@ def verify_output( # noqa: C901 save_inference_speed: bool = False, expected_compared_events: int = -1, qnn_intermediate_debugger: QNNIntermediateDebugger = None, + artifact_dir: str = None, ): with tempfile.TemporaryDirectory() as tmp_dir: + tmp_dir = artifact_dir if artifact_dir is not None else tmp_dir + if artifact_dir is not None: + make_output_dir(artifact_dir) ( ref_outputs, pte_fname, @@ -293,6 +302,9 @@ def post_process(): .sub(enc[QCOM_ZERO_POINT]) .mul(enc[QCOM_SCALE]) ) + from torchao.quantization.utils import compute_error + + print(f"SQNR:{compute_error(ref_outputs[i], output)}") outputs.append(output) def validate_profile(): @@ -332,6 +344,10 @@ def validate_intermediate_tensor(): ) processed_inputs = list(sample_inputs) + with open(f"{tmp_dir}/encoding.txt", "w") as f: + for enc in output_encodings: + f.write(f"{enc[QCOM_SCALE]} {enc[QCOM_ZERO_POINT]}\n") + for i, enc in enumerate(input_encodings): processed_inputs[i] = ( processed_inputs[i] @@ -458,6 +474,7 @@ def validate_intermediate_tensor(): adb.push( inputs=[processed_inputs], files=op_package_paths, + artifact_dir=tmp_dir, ) adb.extra_cmds += extra_cmds if save_inference_speed: @@ -591,9 +608,14 @@ def get_qdq_module( block_size_map: Dict[str, Tuple] = None, submodule_qconfig_list: Optional[List[Tuple[Callable, ModuleQConfig]]] = None, ) -> torch.fx.GraphModule: - m = torch.export.export( - module, inputs, dynamic_shapes=dynamic_shapes, strict=True - ).module() + if isinstance(module, torch.jit.ScriptModule): + from torch._export.converter import TS2EPConverter + + m = TS2EPConverter(module, inputs).convert().module() + else: + m = torch.export.export( + module, inputs, dynamic_shapes=dynamic_shapes, strict=True + ).module() quantizer = make_quantizer( quant_dtype=quant_dtype, diff --git a/backends/qualcomm/utils/utils.py b/backends/qualcomm/utils/utils.py index cccf198e924..e603f1b234e 100644 --- a/backends/qualcomm/utils/utils.py +++ b/backends/qualcomm/utils/utils.py @@ -31,6 +31,7 @@ from executorch.backends.qualcomm.serialization.qc_schema import ( _soc_info_table, HtpArch, + LpaiHardwareVersion, QcomChipset, QnnExecuTorchBackendOptions, QnnExecuTorchBackendType, @@ -40,6 +41,9 @@ QnnExecuTorchHtpPerformanceMode, QnnExecuTorchHtpPrecision, QnnExecuTorchLogLevel, + QnnExecuTorchLpaiBackendOptions, + QnnExecuTorchLpaiClientPerf, + QnnExecuTorchLpaiCoreAffinity, QnnExecuTorchOpPackageOptions, QnnExecuTorchOptions, QnnExecuTorchProfileLevel, @@ -957,7 +961,7 @@ def generate_gpu_compiler_spec( Returns: QnnExecuTorchGpuBackendOptions: backend options for QNN GPU. """ - # TODO: enable performance hint mechanism in runtime and make this as an option + # TODO: enable power config mechanism in runtime and make this as an option gpu_options = QnnExecuTorchGpuBackendOptions() gpu_options.precision = precision gpu_options.use_memory_optimizations = use_memory_optimizations @@ -1014,6 +1018,68 @@ def generate_htp_compiler_spec( ) +def generate_lpai_compiler_spec( + fps: int = 1, + ftrt_ratio: int = 10, + client_perf_type: QnnExecuTorchLpaiClientPerf = QnnExecuTorchLpaiClientPerf.kRealTime, + affinity: QnnExecuTorchLpaiCoreAffinity = QnnExecuTorchLpaiCoreAffinity.kSoft, + core_selection: int = 0, +) -> QnnExecuTorchBackendOptions: + """ + Helper function generating backend options for QNN LPAI + + Args: + fps: + Specifies how frequently inference must be completed. + This sets the overall time budget for each frame, including pre-processing, + inference, and post-processing. + ftrt_ratio: + Determines the hardware configuration to meet the latency requirement for inference. + Setting ftrt_ratio = 50 applies a multiplication factor of 5.0 to the base clock frequency, + helping the eNPU meet the tighter latency constraint. + client_perf_type: + kRealtime - Indicates that the model is intended for real-time use cases, + where a specific performance threshold must be met. + If the required performance cannot be achieved, the finalize function will return an error. + kNonRealTime - Refers to models without strict performance requirements. + In these cases, LPAI will make a best-effort attempt to accommodate the workload, + and finalize will not fail due to performance limitations. + affinity: + kSoft - Default affinity. Scheduler will assign jobs to requested cores when feasible + kHard - Scheduler will honour affinity requested by the client + core_selection: + A bit mask for core selection. Each bit corresponds to a core, set the bit to use the core + Note that all zeros and all ones mean any core can be used for the eAI instance + + Example for 2 cores: + +--------+--------+---------------+-------------------------------------------------------------------------+ + | bit 1 | bit 0 | affinity | scheduler behavior | + +--------+--------+---------------+-------------------------------------------------------------------------+ + | 0 | 0 | any | Default affinity, scheduler will pick any core based on load | + | 1 | 1 | any | Same as default affinity | + | 0 | 1 | hard | All jobs will only be sent to core 0 | + | 1 | 0 | hard | All jobs will only be sent to core 1 | + | 0 | 1 | soft | Scheduler will attempt to send jobs to core 0 | + | 1 | 0 | soft | Scheduler will attempt to send jobs to core 1 | + +--------+--------+---------------+-------+-----------------------------------------------------------------+ + + Returns: + QnnExecuTorchBackendOptions: backend options for QNN LPAI. + """ + # TODO: enable power config mechanism in runtime and make this as an option + lpai_options = QnnExecuTorchLpaiBackendOptions() + lpai_options.fps = fps + lpai_options.ftrt_ratio = ftrt_ratio + lpai_options.client_perf_type = client_perf_type + lpai_options.affinity = affinity + lpai_options.core_selection = core_selection + + return QnnExecuTorchBackendOptions( + backend_type=QnnExecuTorchBackendType.kLpaiBackend, + lpai_options=lpai_options, + ) + + def generate_qnn_executorch_compiler_spec( soc_model: QcomChipset, backend_options: QnnExecuTorchBackendOptions, @@ -1126,7 +1192,7 @@ def generate_qnn_executorch_compiler_spec( ] -def get_soc_to_arch_map(): +def get_soc_to_htp_arch_map(): return { "SA8295": HtpArch.V68, "SM8350": HtpArch.V68, @@ -1149,6 +1215,14 @@ def get_soc_to_arch_map(): } +def get_soc_to_lpai_hw_ver_map(): + return { + "SM8750": LpaiHardwareVersion.V5, + "SM8850": LpaiHardwareVersion.V6, + "SAR2230P": LpaiHardwareVersion.V6, + } + + def get_soc_to_chipset_map(): return { "SA8295": QcomChipset.SA8295, diff --git a/examples/qualcomm/executor_runner/qnn_executor_runner.cpp b/examples/qualcomm/executor_runner/qnn_executor_runner.cpp index bd48c553698..59bb1645a3d 100644 --- a/examples/qualcomm/executor_runner/qnn_executor_runner.cpp +++ b/examples/qualcomm/executor_runner/qnn_executor_runner.cpp @@ -465,7 +465,8 @@ int main(int argc, char** argv) { if (expected_input_shapes.empty()) { ET_CHECK_MSG( - file_size == tensor_meta->nbytes(), + // workaround for LPAI (== → <=), should figure out root cause of graph without io QDQ + file_size <= tensor_meta->nbytes(), "Input(%d) size mismatch. file bytes: %zu, tensor bytes: %zu", input_index, file_size, diff --git a/examples/qualcomm/utils.py b/examples/qualcomm/utils.py index ca1d655c0db..e849daf9a6a 100755 --- a/examples/qualcomm/utils.py +++ b/examples/qualcomm/utils.py @@ -39,7 +39,8 @@ from executorch.backends.qualcomm.utils.utils import ( generate_htp_compiler_spec, generate_qnn_executorch_compiler_spec, - get_soc_to_arch_map, + get_soc_to_htp_arch_map, + get_soc_to_lpai_hw_ver_map, to_edge_transform_and_lower_to_qnn, ) from executorch.exir.backend.utils import get_delegates @@ -103,7 +104,8 @@ def __init__( self.dump_intermediate_outputs = dump_intermediate_outputs self.debug_output_path = f"{self.workspace}/debug_output.bin" self.output_folder = f"{self.workspace}/outputs" - self.htp_arch = get_soc_to_arch_map()[soc_model] + self.htp_arch = get_soc_to_htp_arch_map()[soc_model] + self.soc_model = soc_model self.error_only = error_only self.shared_buffer = shared_buffer self.runner = runner @@ -130,7 +132,9 @@ def _adb(self, cmd, output_callback: Optional[Callable[[str], None]] = None): cmds, stdout=subprocess.DEVNULL if self.error_only else sys.stdout ) - def push(self, inputs=None, input_list=None, files=None, init_env=True): + def push( + self, inputs=None, input_list=None, files=None, init_env=True, artifact_dir=None + ): artifacts = [] if init_env: self._adb(["shell", f"rm -rf {self.workspace}"]) @@ -153,6 +157,15 @@ def push(self, inputs=None, input_list=None, files=None, init_env=True): QnnExecuTorchBackendType.kGpuBackend: [ f"{self.qnn_sdk}/lib/{self.target}/libQnnGpu.so", ], + # please note that users need to sign LPAI related libs manually + QnnExecuTorchBackendType.kLpaiBackend: [ + f"{self.qnn_sdk}/lib/{self.target}/libQnnLpai.so", + ( + f"{self.qnn_sdk}/lib/lpai-v{get_soc_to_lpai_hw_ver_map()[self.soc_model]}/" + f"signed/libQnnLpaiSkel.so" + ), + f"{self.qnn_sdk}/lib/{self.target}/libQnnLpaiStub.so", + ], }[self.backend] artifacts.extend( @@ -165,6 +178,7 @@ def push(self, inputs=None, input_list=None, files=None, init_env=True): ] ) with tempfile.TemporaryDirectory() as tmp_dir: + tmp_dir = artifact_dir if artifact_dir is not None else tmp_dir input_list_file, input_files = generate_inputs( tmp_dir, self.input_list_filename, inputs ) @@ -229,6 +243,8 @@ def execute( qnn_executor_runner_cmds = " ".join( [ f"cd {self.workspace} &&", + "export ADSP_LIBRARY_PATH=. &&", + "export LD_LIBRARY_PATH=. &&", "chmod +x ./qnn_executor_runner &&", f"./qnn_executor_runner {qnn_executor_runner_args}", ] @@ -519,6 +535,7 @@ def make_output_dir(path: str): if os.path.exists(path): shutil.rmtree(path, ignore_errors=True) os.makedirs(path) + os.chmod(path, 0o777) def topk_accuracy(predictions, targets, k): diff --git a/extension/data_loader/file_data_loader.cpp b/extension/data_loader/file_data_loader.cpp index 92c4cd61eea..f64b84894f3 100644 --- a/extension/data_loader/file_data_loader.cpp +++ b/extension/data_loader/file_data_loader.cpp @@ -26,7 +26,7 @@ // Some platforms (e.g. Xtensa) do not support pread() that we use to read the // file at different offsets simultaneously from multiple threads not affecting // each other. We list them below and use a workaround for them. -#if defined(__xtensa__) +#if defined(__xtensa__) || defined(__hexagon__) #define ET_HAVE_PREAD 0 #endif // defined(__xtensa__) @@ -42,7 +42,27 @@ namespace executorch { namespace extension { namespace { +inline void* et_aligned_alloc(size_t size, std::align_val_t alignment) { + return ::operator new(size, alignment); +} + +inline void et_aligned_free(void* ptr, std::align_val_t alignment) { + return ::operator delete(ptr, alignment); +} +/** + * FreeableBuffer::FreeFn-compatible callback. + * + * `data` is the original buffer pointer. + * `context` is the original alignment. + * + * `size` is unused. + */ +void FreeSegment(void* context, void* data, ET_UNUSED size_t size) { + et_aligned_free( + data, + static_cast(reinterpret_cast(context))); +} /** * Returns true if the value is an integer power of 2. */ @@ -54,7 +74,7 @@ static bool is_power_of_2(size_t value) { FileDataLoader::~FileDataLoader() { // file_name_ can be nullptr if this instance was moved from, but freeing a // null pointer is safe. - std::free(const_cast(file_name_)); + et_aligned_free(const_cast(file_name_), alignment_); // fd_ can be -1 if this instance was moved from, but closing a negative fd is // safe (though it will return an error). if (fd_ == -1) { @@ -99,44 +119,21 @@ Result FileDataLoader::from( return Error::AccessFailed; } size_t file_size = st.st_size; - // Copy the filename so we can print better debug messages if reads fail. - const char* file_name_copy = ::strdup(file_name); + size_t file_name_len = ::strlen(file_name); + char* file_name_copy = + (char*)et_aligned_alloc(file_name_len, std::align_val_t(alignment)); + if (file_name_copy == nullptr) { ET_LOG(Error, "strdup(%s) failed", file_name); ::close(fd); return Error::MemoryAllocationFailed; } + ::strcpy(file_name_copy, file_name); return FileDataLoader(fd, file_size, alignment, file_name_copy); } -namespace { - -inline void* et_aligned_alloc(size_t size, std::align_val_t alignment) { - return ::operator new(size, alignment); -} - -inline void et_aligned_free(void* ptr, std::align_val_t alignment) { - return ::operator delete(ptr, alignment); -} - -/** - * FreeableBuffer::FreeFn-compatible callback. - * - * `data` is the original buffer pointer. - * `context` is the original alignment. - * - * `size` is unused. - */ -void FreeSegment(void* context, void* data, ET_UNUSED size_t size) { - et_aligned_free( - data, - static_cast(reinterpret_cast(context))); -} - -} // namespace - Result FileDataLoader::load( size_t offset, size_t size, diff --git a/third-party/CMakeLists.txt b/third-party/CMakeLists.txt index 767ac367e19..d51059363c8 100644 --- a/third-party/CMakeLists.txt +++ b/third-party/CMakeLists.txt @@ -6,7 +6,10 @@ set(CMAKE_POLICY_VERSION_MINIMUM 3.5) add_subdirectory(json) -add_subdirectory(gflags) +# [workaround]: mkdir was not supported in hexagon +if(NOT ${CMAKE_SYSTEM_PROCESSOR} MATCHES Hexagon) + add_subdirectory(gflags) +endif() if(EXECUTORCH_BUILD_PYBIND) add_subdirectory(pybind11)