diff --git a/cmake/onnxruntime_providers.cmake b/cmake/onnxruntime_providers.cmake index caae2aacfd582..521ac7ff2b48f 100644 --- a/cmake/onnxruntime_providers.cmake +++ b/cmake/onnxruntime_providers.cmake @@ -15,8 +15,10 @@ function(substitute_op_reduction_srcs all_srcs) set(original_srcs "${ONNXRUNTIME_ROOT}/contrib_ops/cpu/cpu_contrib_kernels.cc" "${ONNXRUNTIME_ROOT}/contrib_ops/cuda/cuda_contrib_kernels.cc" + "${ONNXRUNTIME_ROOT}/contrib_ops/openvino/openvino_contrib_kernels.cc" "${ONNXRUNTIME_ROOT}/core/providers/cpu/cpu_execution_provider.cc" "${ONNXRUNTIME_ROOT}/core/providers/cuda/cuda_execution_provider.cc" + "${ONNXRUNTIME_ROOT}/core/providers/openvino/openvino_execution_provider.cc" "${ONNXRUNTIME_ROOT}/core/providers/op_kernel_type_control_overrides.inc" "${ORTTRAINING_SOURCE_DIR}/training_ops/cpu/cpu_training_kernels.cc" "${ORTTRAINING_SOURCE_DIR}/training_ops/cuda/cuda_training_kernels.cc" @@ -93,6 +95,11 @@ file(GLOB_RECURSE onnxruntime_rocm_contrib_ops_cu_srcs CONFIGURE_DEPENDS "${ONNXRUNTIME_ROOT}/contrib_ops/rocm/*.cuh" ) +file(GLOB_RECURSE onnxruntime_openvino_contrib_ops_cc_srcs CONFIGURE_DEPENDS + "${ONNXRUNTIME_ROOT}/contrib_ops/openvino/*.h" + "${ONNXRUNTIME_ROOT}/contrib_ops/openvino/*.cc" +) + file(GLOB onnxruntime_providers_common_srcs CONFIGURE_DEPENDS "${ONNXRUNTIME_ROOT}/core/providers/*.h" "${ONNXRUNTIME_ROOT}/core/providers/*.cc" @@ -799,12 +806,12 @@ if (onnxruntime_USE_OPENVINO) # include_directories("${CMAKE_CURRENT_BINARY_DIR}/onnx") file(GLOB_RECURSE onnxruntime_providers_openvino_cc_srcs CONFIGURE_DEPENDS + "${ONNXRUNTIME_ROOT}/core/providers/shared_library/*.h" + "${ONNXRUNTIME_ROOT}/core/providers/shared_library/*.cc" "${ONNXRUNTIME_ROOT}/core/providers/openvino/*.h" "${ONNXRUNTIME_ROOT}/core/providers/openvino/*.cc" "${ONNXRUNTIME_ROOT}/core/providers/openvino/*.hpp" "${ONNXRUNTIME_ROOT}/core/providers/openvino/*.cpp" - "${ONNXRUNTIME_ROOT}/core/providers/shared_library/*.h" - "${ONNXRUNTIME_ROOT}/core/providers/shared_library/*.cc" ) if (WIN32) @@ -824,6 +831,7 @@ if (onnxruntime_USE_OPENVINO) unset(CMAKE_MAP_IMPORTED_CONFIG_RELWITHDEBINFO) endif() + if ((DEFINED ENV{OPENCL_LIBS}) AND (DEFINED ENV{OPENCL_INCS})) add_definitions(-DIO_BUFFER_ENABLED=1) list(APPEND OPENVINO_LIB_LIST $ENV{OPENCL_LIBS} ${OV_20_LIBS} ${InferenceEngine_LIBRARIES} ${NGRAPH_LIBRARIES} ngraph::onnx_importer ${PYTHON_LIBRARIES}) @@ -832,8 +840,11 @@ if (onnxruntime_USE_OPENVINO) endif() source_group(TREE ${ONNXRUNTIME_ROOT}/core FILES ${onnxruntime_providers_openvino_cc_srcs}) + + source_group(TREE ${ONNXRUNTIME_ROOT} FILES ${onnxruntime_openvino_contrib_ops_cc_srcs}) + list(APPEND onnxruntime_providers_openvino_cc_srcs ${onnxruntime_openvino_contrib_ops_cc_srcs}) onnxruntime_add_shared_library_module(onnxruntime_providers_openvino ${onnxruntime_providers_openvino_cc_srcs} "${ONNXRUNTIME_ROOT}/core/dll/onnxruntime.rc") - onnxruntime_add_include_to_target(onnxruntime_providers_openvino onnxruntime_common onnx) + onnxruntime_add_include_to_target(onnxruntime_providers_openvino onnxruntime_common onnx_framework onnx onnx_proto) install(DIRECTORY ${PROJECT_SOURCE_DIR}/../include/onnxruntime/core/providers/openvino DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}/onnxruntime/core/providers) set_target_properties(onnxruntime_providers_openvino PROPERTIES LINKER_LANGUAGE CXX) set_target_properties(onnxruntime_providers_openvino PROPERTIES FOLDER "ONNXRuntime") diff --git a/include/onnxruntime/core/framework/op_kernel.h b/include/onnxruntime/core/framework/op_kernel.h index ed712cf00e096..baa1a04809857 100644 --- a/include/onnxruntime/core/framework/op_kernel.h +++ b/include/onnxruntime/core/framework/op_kernel.h @@ -199,6 +199,14 @@ KernelCreateInfo BuildKernelCreateInfo(); } // namespace snpe } // namespace contrib +namespace contrib { +namespace openvino_ep { +template +KernelCreateInfo BuildKernelCreateInfo(); +} // namespace openvino_ep +} // namespace contrib + + using BuildKernelCreateInfoFn = KernelCreateInfo (*)(); // Naming convention for operator kernel classes diff --git a/onnxruntime/contrib_ops/cpu/transformers/beam_search.h b/onnxruntime/contrib_ops/cpu/transformers/beam_search.h index 63b0418a12d59..e2e4b10d4a90f 100644 --- a/onnxruntime/contrib_ops/cpu/transformers/beam_search.h +++ b/onnxruntime/contrib_ops/cpu/transformers/beam_search.h @@ -4,8 +4,10 @@ #pragma once #include +#ifndef SHARED_PROVIDER #include "core/common/common.h" #include "core/framework/op_kernel.h" +#endif #include "core/providers/cpu/controlflow/utils.h" #include "contrib_ops/cpu/transformers/beam_search_parameters.h" #include "contrib_ops/cpu/transformers/subgraph_gpt.h" diff --git a/onnxruntime/contrib_ops/cpu/transformers/subgraph_base.cc b/onnxruntime/contrib_ops/cpu/transformers/subgraph_base.cc index 5f0d1db87bef3..8c969d97938b8 100644 --- a/onnxruntime/contrib_ops/cpu/transformers/subgraph_base.cc +++ b/onnxruntime/contrib_ops/cpu/transformers/subgraph_base.cc @@ -128,6 +128,7 @@ const IExecutionProvider* Subgraph::GetProvider() const { const IExecutionProvider* cpu_provider = providers.Get(onnxruntime::kCpuExecutionProvider); const IExecutionProvider* cuda_provider = providers.Get(onnxruntime::kCudaExecutionProvider); const IExecutionProvider* rocm_provider = providers.Get(onnxruntime::kRocmExecutionProvider); + //const IExecutionProvider* openvino_provider = providers.Get(onnxruntime::kOpenVINOExecutionProvider); const IExecutionProvider* gpu_provider = cuda_provider ? cuda_provider : rocm_provider; const IExecutionProvider* provider = gpu_provider ? gpu_provider : cpu_provider; return provider; diff --git a/onnxruntime/contrib_ops/cuda/bert/add_bias_transpose.cu b/onnxruntime/contrib_ops/cuda/bert/add_bias_transpose.cu index 8782201a64e6b..5af43a77fc90b 100644 --- a/onnxruntime/contrib_ops/cuda/bert/add_bias_transpose.cu +++ b/onnxruntime/contrib_ops/cuda/bert/add_bias_transpose.cu @@ -2,7 +2,8 @@ // Licensed under the MIT License. #include "core/providers/cuda/cuda_common.h" #include "core/providers/cuda/cu_inc/common.cuh" -#include "contrib_ops/cuda/bert/add_bias_transpose.h" +#include " +/cuda/bert/add_bias_transpose.h" #include "contrib_ops/cuda/bert/rotary_embedding_util.h" using namespace onnxruntime::cuda; diff --git a/onnxruntime/contrib_ops/cuda/transformers/generation_device_helper.h b/onnxruntime/contrib_ops/cuda/transformers/generation_device_helper.h index 5ed956f9a2ecd..ca030e8321d20 100644 --- a/onnxruntime/contrib_ops/cuda/transformers/generation_device_helper.h +++ b/onnxruntime/contrib_ops/cuda/transformers/generation_device_helper.h @@ -44,7 +44,7 @@ Status AddToFeeds(const IExecutionProvider* execution_provider, std::initializer_list inputs, std::vector& feeds, IAllocatorUniquePtr& buffer); - +. template void InitBeamState(transformers::IBeamSearchState* beam_state, gsl::span& sequence_lengths, diff --git a/onnxruntime/contrib_ops/openvino/beam_search.cc b/onnxruntime/contrib_ops/openvino/beam_search.cc new file mode 100644 index 0000000000000..2193172f41feb --- /dev/null +++ b/onnxruntime/contrib_ops/openvino/beam_search.cc @@ -0,0 +1,39 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +#include "contrib_ops/openvino/beam_search.h" + +namespace onnxruntime { +namespace contrib { +namespace openvino_ep { + +#define REGISTER_KERNEL_TYPED(T) \ + ONNX_OPERATOR_TYPED_KERNEL_EX( \ + BeamSearch, \ + kMSDomain, \ + 1, \ + T, \ + kOpenVINOExecutionProvider, \ + (*KernelDefBuilder::Create()) \ + .TypeConstraint("T", DataTypeImpl::GetTensorType()), \ + BeamSearch); + +REGISTER_KERNEL_TYPED(float) + +BeamSearch::BeamSearch(const OpKernelInfo& info) + : onnxruntime::contrib::transformers::BeamSearch(info) { + +} + +Status BeamSearch::ComputeInternal(OpKernelContext* context) const { + return onnxruntime::contrib::transformers::BeamSearch::Compute(context); +} + +Status BeamSearch::Compute(OpKernelContext* context) const { + auto s = ComputeInternal(context); + return s; +} + +} // namespace cuda +} // namespace contrib +} // namespace onnxruntime diff --git a/onnxruntime/contrib_ops/openvino/beam_search.h b/onnxruntime/contrib_ops/openvino/beam_search.h new file mode 100644 index 0000000000000..c4d94a6c31327 --- /dev/null +++ b/onnxruntime/contrib_ops/openvino/beam_search.h @@ -0,0 +1,29 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +#pragma once + +#include "core/providers/shared_library/provider_api.h" +#include "contrib_ops/cpu/transformers/beam_search.h" + +namespace onnxruntime { + +class SessionState; + +namespace contrib { + +namespace openvino_ep { + +class BeamSearch final : public onnxruntime::contrib::transformers::BeamSearch { + public: + BeamSearch(const OpKernelInfo& info); + + Status Compute(OpKernelContext* context) const override; + + private: + Status ComputeInternal(OpKernelContext* context) const; +}; + +} // namespace openvino +} // namespace contrib +} // namespace onnxruntime diff --git a/onnxruntime/contrib_ops/openvino/openvino_contrib_kernels.cc b/onnxruntime/contrib_ops/openvino/openvino_contrib_kernels.cc new file mode 100644 index 0000000000000..44a9d6f70d610 --- /dev/null +++ b/onnxruntime/contrib_ops/openvino/openvino_contrib_kernels.cc @@ -0,0 +1,40 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +#include "contrib_ops/openvino/openvino_contrib_kernels.h" + +using namespace onnxruntime::common; + +namespace onnxruntime { +namespace contrib { +namespace openvino_ep { + +class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kOpenVINOExecutionProvider, kMSDomain, 1, float, BeamSearch); + +template <> +KernelCreateInfo BuildKernelCreateInfo() { + KernelCreateInfo info; + return info; +} + +Status RegisterOpenVINOContribKernels(KernelRegistry& kernel_registry) { + static const BuildKernelCreateInfoFn function_table[] = { + BuildKernelCreateInfo, // default entry to avoid the list become empty after ops-reducing + BuildKernelCreateInfo + }; + + for (auto& function_table_entry : function_table) { + KernelCreateInfo info = function_table_entry(); + if (info.kernel_def != nullptr) { // filter disabled entries where type is void + ORT_RETURN_IF_ERROR(kernel_registry.Register(std::move(info))); + //return kernel_registry.Register(std::move(info)); + } + } + + return Status::OK(); +} + +} // namespace openvino +} // namespace contrib +} // namespace onnxruntime + diff --git a/onnxruntime/contrib_ops/openvino/openvino_contrib_kernels.h b/onnxruntime/contrib_ops/openvino/openvino_contrib_kernels.h new file mode 100644 index 0000000000000..c06deac45514e --- /dev/null +++ b/onnxruntime/contrib_ops/openvino/openvino_contrib_kernels.h @@ -0,0 +1,14 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +#pragma once +#include "core/common/status.h" +#include "core/providers/shared_library/provider_api.h" + +namespace onnxruntime { +namespace contrib { +namespace openvino_ep { +Status RegisterOpenVINOContribKernels(KernelRegistry& kernel_registry); +} // namespace OpenVINO +} // namespace contrib +} // namespace onnxruntime diff --git a/onnxruntime/core/providers/cpu/cpu_provider_shared.cc b/onnxruntime/core/providers/cpu/cpu_provider_shared.cc index d3d252cf7026e..04147264a10ef 100644 --- a/onnxruntime/core/providers/cpu/cpu_provider_shared.cc +++ b/onnxruntime/core/providers/cpu/cpu_provider_shared.cc @@ -75,6 +75,23 @@ struct ProviderHostCPUImpl : ProviderHostCPU { Status NonMaxSuppressionBase__PrepareCompute(OpKernelContext* ctx, PrepareContext& pc) override { return NonMaxSuppressionBase::PrepareCompute(ctx, pc); } Status NonMaxSuppressionBase__GetThresholdsFromInputs(const PrepareContext& pc, int64_t& max_output_boxes_per_class, float& iou_threshold, float& score_threshold) override { return NonMaxSuppressionBase::GetThresholdsFromInputs(pc, max_output_boxes_per_class, iou_threshold, score_threshold); } +#if defined(USE_OPENVINO) + void BeamSearch__Init(contrib::transformers::BeamSearch* p, const OpKernelInfo& info) override { + p->contrib::transformers::BeamSearch::Init(info); + } + + Status BeamSearch__Compute(const contrib::transformers::BeamSearch* p, OpKernelContext* ctx) override { + return p->contrib::transformers::BeamSearch::Compute(ctx); + } + + Status BeamSearch__SetupSubgraphExecutionInfo(contrib::transformers::BeamSearch* p, const SessionState& session_state, + const std::string& attribute_name, + const SessionState& subgraph_session_state) override { + return p->contrib::transformers::BeamSearch::SetupSubgraphExecutionInfo(session_state, attribute_name, + subgraph_session_state); + } +#endif + #if defined(USE_CUDA) || defined(USE_ROCM) // From cpu/tensor/size.h (direct) Status Size__Compute(const Size* p, OpKernelContext* context) override { return p->Size::Compute(context); } @@ -218,7 +235,7 @@ struct ProviderHostCPUImpl : ProviderHostCPU { int sequence_length, int& past_sequence_length) override { return p->contrib::AttentionBase::GetPresent(context, past, batch_size, head_size, sequence_length, past_sequence_length); - } + }. void BeamSearch__Init(contrib::transformers::BeamSearch* p, const OpKernelInfo& info) override { p->contrib::transformers::BeamSearch::Init(info); diff --git a/onnxruntime/core/providers/cpu/cpu_provider_shared.h b/onnxruntime/core/providers/cpu/cpu_provider_shared.h index 1fafc646c8aaa..43fd5655963dd 100644 --- a/onnxruntime/core/providers/cpu/cpu_provider_shared.h +++ b/onnxruntime/core/providers/cpu/cpu_provider_shared.h @@ -30,7 +30,18 @@ struct ProviderHostCPU { virtual Status NonMaxSuppressionBase__PrepareCompute(OpKernelContext* ctx, PrepareContext& pc) = 0; virtual Status NonMaxSuppressionBase__GetThresholdsFromInputs(const PrepareContext& pc, int64_t& max_output_boxes_per_class, float& iou_threshold, float& score_threshold) = 0; -#if defined(USE_CUDA) || defined(USE_ROCM) +#if defined(USE_OPENVINO) + // BeamSearch + virtual void BeamSearch__Init(contrib::transformers::BeamSearch* p, const OpKernelInfo& info) = 0; + virtual Status BeamSearch__Compute(const contrib::transformers::BeamSearch* p, OpKernelContext* ctx) = 0; + virtual Status BeamSearch__SetupSubgraphExecutionInfo(contrib::transformers::BeamSearch* p, + const SessionState& session_state, + const std::string& attribute_name, + const SessionState& subgraph_session_state) = 0; + +#endif + +#if defined(USE_CUDA) || defined(USE_ROCM) // From cpu/tensor/size.h virtual Status Size__Compute(const Size* p, OpKernelContext* context) = 0; diff --git a/onnxruntime/core/providers/openvino/openvino_execution_provider.cc b/onnxruntime/core/providers/openvino/openvino_execution_provider.cc index 05eec6b1013bd..cde2c0048c99e 100644 --- a/onnxruntime/core/providers/openvino/openvino_execution_provider.cc +++ b/onnxruntime/core/providers/openvino/openvino_execution_provider.cc @@ -2,13 +2,18 @@ // Licensed under the MIT License #include "core/providers/shared_library/provider_api.h" +#include "backend_manager.h" +#define ORT_API_MANUAL_INIT #include "openvino_execution_provider.h" #include "contexts.h" #include "backend_manager.h" #include "ov_versions/capabilities.h" +#include "contrib_ops/openvino/openvino_contrib_kernels.h" #define MEMCPY_S(dest, src, destsz, srcsz) memcpy(dest, src, std::min(destsz, srcsz)) +using namespace onnxruntime::common; + namespace onnxruntime { OpenVINOExecutionProvider::OpenVINOExecutionProvider(const OpenVINOExecutionProviderInfo& info) @@ -188,4 +193,32 @@ common::Status OpenVINOExecutionProvider::Compile( return Status::OK(); } +namespace openvino_ep { +static Status RegisterOpenVINOKernels(KernelRegistry& kernel_registry) { + ORT_RETURN_IF_ERROR(::onnxruntime::contrib::openvino_ep::RegisterOpenVINOContribKernels(kernel_registry)); + return Status::OK(); +} +} // namespace openvino_ep + +static std::shared_ptr o_kernel_registry; + +void InitializeRegistry() { + o_kernel_registry = KernelRegistry::Create(); + ORT_THROW_IF_ERROR(openvino_ep::RegisterOpenVINOKernels(*o_kernel_registry)); +} + +void DeleteRegistry() { + o_kernel_registry.reset(); +} + +std::shared_ptr OpenVINOExecutionProvider::GetKernelRegistry() const { + return o_kernel_registry; +} + + } // namespace onnxruntime + + + + + diff --git a/onnxruntime/core/providers/openvino/openvino_execution_provider.h b/onnxruntime/core/providers/openvino/openvino_execution_provider.h index 5eae9c78c91d4..1b2b18cc57fbb 100644 --- a/onnxruntime/core/providers/openvino/openvino_execution_provider.h +++ b/onnxruntime/core/providers/openvino/openvino_execution_provider.h @@ -3,10 +3,15 @@ #pragma once -#include "backend_manager.h" #include #include #include +#include +#include +#include "core/providers/shared_library/provider_api.h" +#include "contexts.h" +#include "backend_manager.h" +#include "ov_versions/capabilities.h" namespace onnxruntime { @@ -176,6 +181,8 @@ class OpenVINOExecutionProvider : public IExecutionProvider { const void* GetExecutionHandle() const noexcept override { return nullptr; } + + std::shared_ptr GetKernelRegistry() const override; }; } // namespace onnxruntime diff --git a/onnxruntime/core/providers/openvino/openvino_provider_factory.cc b/onnxruntime/core/providers/openvino/openvino_provider_factory.cc index d118b37f8ab6f..7c69bd6a2214a 100644 --- a/onnxruntime/core/providers/openvino/openvino_provider_factory.cc +++ b/onnxruntime/core/providers/openvino/openvino_provider_factory.cc @@ -7,6 +7,10 @@ #include "openvino_provider_factory_creator.h" namespace onnxruntime { + +void InitializeRegistry(); +void DeleteRegistry(); + struct OpenVINOProviderFactory : IExecutionProviderFactory { OpenVINOProviderFactory(const char* device_type, bool enable_vpu_fast_compile, const char* device_id, size_t num_of_threads, @@ -72,10 +76,12 @@ struct OpenVINO_Provider : Provider { } void Initialize() override { + InitializeRegistry(); } void Shutdown() override { openvino_ep::BackendManager::ReleaseGlobalContext(); + DeleteRegistry(); } } g_provider; diff --git a/onnxruntime/core/providers/openvino/ov_versions/data_ops.cc b/onnxruntime/core/providers/openvino/ov_versions/data_ops.cc index 88db95c1fbe92..6b793fe1709f0 100644 --- a/onnxruntime/core/providers/openvino/ov_versions/data_ops.cc +++ b/onnxruntime/core/providers/openvino/ov_versions/data_ops.cc @@ -57,13 +57,14 @@ std::set ops_supported_only_in_model = { "Slice", "Split", "Tile", - "TopK"}; + "TopK", + "BeamSearch"}; // Ops which are supported as functions (as composite ops) std::set ops_supported_as_function = { "LessOrEqual", "GreaterOrEqual", -}; + "BeamSearch"}; std::vector supported_op_mode = { {"Abs", V_2020_4, {"CPU", "GPU"}}, diff --git a/onnxruntime/core/providers/shared_library/provider_bridge_provider.cc b/onnxruntime/core/providers/shared_library/provider_bridge_provider.cc index 2e4d5b6bfa500..aa99b0c580fe2 100644 --- a/onnxruntime/core/providers/shared_library/provider_bridge_provider.cc +++ b/onnxruntime/core/providers/shared_library/provider_bridge_provider.cc @@ -490,6 +490,21 @@ Status NonMaxSuppressionBase::GetThresholdsFromInputs(const PrepareContext& pc, Status GatherBase::PrepareForCompute(OpKernelContext* context, GatherBase::Prepare& p) const { return g_host_cpu.GatherBase__PrepareForCompute(this, context, reinterpret_cast(p)); } Status UnsqueezeBase::PrepareCompute(OpKernelContext* ctx, UnsqueezeBase::Prepare& p) const { return g_host_cpu.UnsqueezeBase__PrepareCompute(this, ctx, reinterpret_cast(p)); } +#if defined(USE_OPENVINO) +namespace contrib { +namespace transformers { +void BeamSearch::Init(const OpKernelInfo& info) { g_host_cpu.BeamSearch__Init(this, info); } + +Status BeamSearch::Compute(OpKernelContext* ctx) const { return g_host_cpu.BeamSearch__Compute(this, ctx); } + +Status BeamSearch::SetupSubgraphExecutionInfo(const SessionState& session_state, const std::string& attribute_name, + const SessionState& subgraph_session_state) { + return g_host_cpu.BeamSearch__SetupSubgraphExecutionInfo(this, session_state, attribute_name, subgraph_session_state); +} +} // namespace transformers +} // namespace contrib +#endif + #if defined(USE_CUDA) || defined(USE_ROCM) bool TileOp::IsTileMemcpy(const TensorShape& input_shape, const int64_t* repeats, size_t rank, bool& is_batched_memcpy, size_t& num_of_elements_per_batch, size_t& num_of_copies_per_batch, size_t& num_of_batch_copies) { return g_host_cpu.TileOp__IsTileMemcpy(input_shape, repeats, rank, is_batched_memcpy, num_of_elements_per_batch, num_of_copies_per_batch, num_of_batch_copies);