From da803b81d4252e3a585e7a70ed9221e5972d6ce5 Mon Sep 17 00:00:00 2001 From: Cavus Mustafa Date: Mon, 4 Jan 2021 20:04:38 -0800 Subject: [PATCH] New backend module is added --- ngraph_bridge/CMakeLists.txt | 2 + ngraph_bridge/executable.cc | 41 ++++++----- ngraph_bridge/executable.h | 3 + ngraph_bridge/ie_backend_engine.cc | 102 +++++++++++++++++++++++++++ ngraph_bridge/ie_backend_engine.h | 74 ++++++++++++++++++++ ngraph_bridge/ie_basic_engine.cc | 80 +++++++++++++++++++++ ngraph_bridge/ie_basic_engine.h | 51 ++++++++++++++ ngraph_bridge/ie_utils.h | 109 +++++++++++++++++++++++++++++ 8 files changed, 444 insertions(+), 18 deletions(-) create mode 100644 ngraph_bridge/ie_backend_engine.cc create mode 100644 ngraph_bridge/ie_backend_engine.h create mode 100644 ngraph_bridge/ie_basic_engine.cc create mode 100644 ngraph_bridge/ie_basic_engine.h create mode 100644 ngraph_bridge/ie_utils.h diff --git a/ngraph_bridge/CMakeLists.txt b/ngraph_bridge/CMakeLists.txt index 21c869ea1..1a6b8f89c 100644 --- a/ngraph_bridge/CMakeLists.txt +++ b/ngraph_bridge/CMakeLists.txt @@ -49,6 +49,8 @@ set(SRC tf_graphcycles.cc tf_deadness_analysis.cc version.cc + ie_backend_engine.cc + ie_basic_engine.cc ) message(STATUS "NGRAPH_TF_USE_GRAPPLER_OPTIMIZER: ${NGRAPH_TF_USE_GRAPPLER_OPTIMIZER}") diff --git a/ngraph_bridge/executable.cc b/ngraph_bridge/executable.cc index 4cdd588af..bab62e90e 100644 --- a/ngraph_bridge/executable.cc +++ b/ngraph_bridge/executable.cc @@ -22,7 +22,9 @@ #include "logging/ngraph_log.h" #include "ngraph_bridge/default_opset.h" #include "ngraph_bridge/executable.h" +#include "ngraph_bridge/ie_basic_engine.h" #include "ngraph_bridge/ie_tensor.h" +#include "ngraph_bridge/ie_utils.h" #include "ngraph_bridge/ngraph_utils.h" using namespace std; @@ -139,12 +141,8 @@ Executable::Executable(shared_ptr func, string device) name + "_IE_" + m_device; } - NGRAPH_VLOG(2) << "Loading IE CNN network to device " << m_device; - - // Load network to the plugin (m_device) and create an infer request - InferenceEngine::ExecutableNetwork exe_network = - ie.LoadNetwork(m_network, m_device, options); - m_infer_req = exe_network.CreateInferRequest(); + NGRAPH_VLOG(2) << "Creating IE Execution Engine"; + m_ie_engine = make_shared(m_network, m_device); } bool Executable::Call(const vector>& inputs, @@ -167,7 +165,9 @@ bool Executable::Call(const vector>& inputs, } // Prepare input blobs - auto func = m_network.getFunction(); + auto func = m_ie_engine->GetFunc(); + std::vector> ie_inputs(inputs.size()); + std::vector input_names(inputs.size()); auto parameters = func->get_parameters(); int j = 0; for (int i = 0; i < inputs.size(); i++) { @@ -180,18 +180,23 @@ bool Executable::Call(const vector>& inputs, NGRAPH_VLOG(1) << "Skipping unused input " << input_name; continue; } - shared_ptr tv = static_pointer_cast(inputs[i]); - m_infer_req.SetBlob(input_name, tv->get_blob()); + ie_inputs[i] = nullptr; + ie_inputs[i] = static_pointer_cast(inputs[i]); + input_names[i] = input_name; } + std::vector> ie_hoisted_params( + m_hoisted_params.size()); + std::vector param_names(m_hoisted_params.size()); for (const auto& it : m_hoisted_params) { auto input_name = it.first; if (input_info.find(input_name) == input_info.end()) { NGRAPH_VLOG(1) << "Skipping unused hoisted param " << input_name; continue; } - shared_ptr tv = static_pointer_cast(it.second); - m_infer_req.SetBlob(input_name, tv->get_blob()); + ie_hoisted_params[j] = nullptr; + ie_hoisted_params[j] = static_pointer_cast(it.second); + param_names[j++] = input_name; } InferenceEngine::OutputsDataMap output_info = m_network.getOutputsInfo(); @@ -214,22 +219,22 @@ bool Executable::Call(const vector>& inputs, // Prepare output blobs auto results = func->get_results(); + std::vector> ie_outputs(outputs.size()); + std::vector output_names(outputs.size()); for (int i = 0; i < results.size(); i++) { if (outputs[i] != nullptr) { - NGRAPH_VLOG(4) << "Executable::call() SetBlob()"; - shared_ptr tv = static_pointer_cast(outputs[i]); - m_infer_req.SetBlob(get_output_name(results[i]), tv->get_blob()); + ie_outputs[i] = static_pointer_cast(outputs[i]); } + output_names[i] = get_output_name(results[i]); } - m_infer_req.Infer(); + m_ie_engine->Infer(ie_inputs, input_names, ie_outputs, output_names, + ie_hoisted_params, param_names); // Set dynamic output blobs for (int i = 0; i < results.size(); i++) { if (outputs[i] == nullptr) { - NGRAPH_VLOG(4) << "Executable::call() GetBlob()"; - auto blob = m_infer_req.GetBlob(get_output_name(results[i])); - outputs[i] = make_shared(blob); + outputs[i] = ie_outputs[i]; } } diff --git a/ngraph_bridge/executable.h b/ngraph_bridge/executable.h index feb60e91b..9680fdcb2 100644 --- a/ngraph_bridge/executable.h +++ b/ngraph_bridge/executable.h @@ -23,6 +23,8 @@ #include #include "ngraph/ngraph.hpp" +#include "ngraph_bridge/ie_backend_engine.h" + using namespace std; namespace tensorflow { @@ -56,6 +58,7 @@ class Executable { shared_ptr m_trivial_fn; // This is the original nGraph function corresponding to this executable shared_ptr m_function; + shared_ptr m_ie_engine; }; } } diff --git a/ngraph_bridge/ie_backend_engine.cc b/ngraph_bridge/ie_backend_engine.cc new file mode 100644 index 000000000..4ad8fa824 --- /dev/null +++ b/ngraph_bridge/ie_backend_engine.cc @@ -0,0 +1,102 @@ +/******************************************************************************* + * Copyright 2017-2020 Intel Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + *******************************************************************************/ + +#include + +#include "ngraph_bridge/ie_backend_engine.h" +#include "ngraph_bridge/ie_utils.h" + +namespace tensorflow { +namespace ngraph_bridge { + +IEBackendEngine::IEBackendEngine(InferenceEngine::CNNNetwork ie_network, + std::string device) + : m_network(ie_network), + m_func(ie_network.getFunction()), + m_device(device), + m_multi_req_execution(false), + m_network_ready(false) { + if (std::getenv("NGRAPH_TF_DUMP_GRAPHS")) { + auto& name = m_network.getName(); + m_network.serialize(name + ".xml", name + ".bin"); + } +} + +IEBackendEngine::~IEBackendEngine() {} + +void IEBackendEngine::LoadNetwork() { + if (m_network_ready) return; + + std::map config; + + if (m_device == "MYRIAD") { + // Set MYRIAD configurations + if (IEUtils::VPUConfigEnabled()) { + config["MYRIAD_DETECT_NETWORK_BATCH"] = "NO"; + } + + if (IEUtils::VPUFastCompileEnabled()) { + config["MYRIAD_HW_INJECT_STAGES"] = "NO"; + config["MYRIAD_COPY_OPTIMIZATION"] = "NO"; + } + } + + InferenceEngine::Core ie; + // Load network to the plugin (m_device) + m_exe_network = ie.LoadNetwork(m_network, m_device, config); + m_network_ready = true; +} + +void IEBackendEngine::StartAsyncInference(const int req_id) { + // Start Async inference + try { + m_infer_reqs[req_id].StartAsync(); + } catch (InferenceEngine::details::InferenceEngineException e) { + THROW_IE_EXCEPTION << "Couldn't start Inference: "; + } catch (...) { + THROW_IE_EXCEPTION << "Couldn't start Inference: "; + } +} + +void IEBackendEngine::CompleteAsyncInference(const int req_id) { + // Wait for Async inference completion + try { + m_infer_reqs[req_id].Wait( + InferenceEngine::IInferRequest::WaitMode::RESULT_READY); + } catch (InferenceEngine::details::InferenceEngineException e) { + THROW_IE_EXCEPTION << " Exception with completing Inference: "; + } catch (...) { + THROW_IE_EXCEPTION << " Exception with completing Inference: "; + } +} + +size_t IEBackendEngine::GetOutputBatchSize(size_t input_batch_size) const { + return m_network.getBatchSize() * + IEUtils::GetNumRequests(input_batch_size, m_device); +} + +// Enables multi request execution if the execution engine supprts +void IEBackendEngine::EnableMultiReqExecution() { + m_multi_req_execution = true; +} +// Disables multi request execution +void IEBackendEngine::DisableMultiReqExecution() { + m_multi_req_execution = false; +} + +std::shared_ptr IEBackendEngine::GetFunc() { return m_func; } +} +} diff --git a/ngraph_bridge/ie_backend_engine.h b/ngraph_bridge/ie_backend_engine.h new file mode 100644 index 000000000..fc324640f --- /dev/null +++ b/ngraph_bridge/ie_backend_engine.h @@ -0,0 +1,74 @@ +/******************************************************************************* + * Copyright 2017-2020 Intel Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + *******************************************************************************/ + +#ifndef IE_BACKEND_ENGINE_H_ +#define IE_BACKEND_ENGINE_H_ + +#include +#include +#include + +#include + +#include "ngraph_bridge/ie_tensor.h" + +namespace tensorflow { +namespace ngraph_bridge { + +class IEBackendEngine { + public: + IEBackendEngine(InferenceEngine::CNNNetwork ie_network, std::string device); + ~IEBackendEngine(); + + // Executes the inference + virtual void Infer(std::vector>& inputs, + std::vector& input_names, + std::vector>& outputs, + std::vector& output_names, + std::vector>& hoisted_params, + std::vector& param_names) = 0; + + // Returns output batch size based on the input batch size and the device + // FIXME: This may not be needed + virtual size_t GetOutputBatchSize(size_t input_batch_size) const; + + // Enables multi request execution if the execution engine supprts + void EnableMultiReqExecution(); + // Disables multi request execution + void DisableMultiReqExecution(); + + // Returns the NGraph Function from the CNNNetwork + std::shared_ptr GetFunc(); + + virtual const std::vector GetOutputShape(const int i) = 0; + + protected: + InferenceEngine::CNNNetwork m_network; + std::shared_ptr m_func; + std::vector m_infer_reqs; + std::string m_device; + bool m_multi_req_execution; + InferenceEngine::ExecutableNetwork m_exe_network; + bool m_network_ready; + + virtual void StartAsyncInference(const int req_id); + virtual void CompleteAsyncInference(const int req_id); + virtual void LoadNetwork(); +}; +} +} + +#endif // IE_BACKEND_ENGINE_H_ diff --git a/ngraph_bridge/ie_basic_engine.cc b/ngraph_bridge/ie_basic_engine.cc new file mode 100644 index 000000000..62dc07348 --- /dev/null +++ b/ngraph_bridge/ie_basic_engine.cc @@ -0,0 +1,80 @@ +/******************************************************************************* + * Copyright 2017-2020 Intel Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + *******************************************************************************/ + +#include + +#include "logging/ngraph_log.h" +#include "ngraph_bridge/ie_basic_engine.h" +#include "ngraph_bridge/ie_utils.h" + +namespace tensorflow { +namespace ngraph_bridge { + +IEBasicEngine::IEBasicEngine(InferenceEngine::CNNNetwork ie_network, + std::string device) + : IEBackendEngine(ie_network, device) {} + +IEBasicEngine::~IEBasicEngine() {} + +void IEBasicEngine::Infer( + std::vector>& inputs, + std::vector& input_names, + std::vector>& outputs, + std::vector& output_names, + std::vector>& hoisted_params, + std::vector& param_names) { + LoadNetwork(); + if (m_infer_reqs.empty()) { + m_infer_reqs.push_back(m_exe_network.CreateInferRequest()); + } + + // Prepare input blobs + auto func = m_network.getFunction(); + auto parameters = func->get_parameters(); + for (int i = 0; i < inputs.size(); i++) { + if (inputs[i] != nullptr) + m_infer_reqs[0].SetBlob(input_names[i], inputs[i]->get_blob()); + } + + for (int i = 0; i < hoisted_params.size(); i++) { + if (hoisted_params[i] != nullptr) + m_infer_reqs[0].SetBlob(param_names[i], hoisted_params[i]->get_blob()); + } + + // Prepare output blobs + auto results = func->get_results(); + for (int i = 0; i < results.size(); i++) { + if (outputs[i] != nullptr) { + NGRAPH_VLOG(4) << "Executable::call() SetBlob()"; + m_infer_reqs[0].SetBlob(output_names[i], outputs[i]->get_blob()); + } + } + + m_infer_reqs[0].Infer(); + + // Set dynamic output blobs + for (int i = 0; i < results.size(); i++) { + if (outputs[i] == nullptr) { + NGRAPH_VLOG(4) << "Executable::call() GetBlob()"; + auto blob = m_infer_reqs[0].GetBlob(output_names[i]); + outputs[i] = std::make_shared(blob); + } + } + + // return true; +} +} +} diff --git a/ngraph_bridge/ie_basic_engine.h b/ngraph_bridge/ie_basic_engine.h new file mode 100644 index 000000000..ae5d249ef --- /dev/null +++ b/ngraph_bridge/ie_basic_engine.h @@ -0,0 +1,51 @@ +/******************************************************************************* + * Copyright 2017-2020 Intel Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + *******************************************************************************/ + +#ifndef IE_BASIC_ENGINE_H_ +#define IE_BASIC_ENGINE_H_ + +#include +#include +#include + +#include + +#include "ngraph_bridge/ie_backend_engine.h" + +namespace tensorflow { +namespace ngraph_bridge { + +class IEBasicEngine : public IEBackendEngine { + public: + IEBasicEngine(InferenceEngine::CNNNetwork ie_network, std::string device); + ~IEBasicEngine(); + + // Executes the inference + virtual void Infer(std::vector>& inputs, + std::vector& input_names, + std::vector>& outputs, + std::vector& output_names, + std::vector>& hoisted_params, + std::vector& param_names); + + virtual const std::vector GetOutputShape(const int i) { + return m_func->get_results()[i]->get_shape(); + }; +}; +} +} + +#endif // IE_BASIC_ENGINE_H_ diff --git a/ngraph_bridge/ie_utils.h b/ngraph_bridge/ie_utils.h new file mode 100644 index 000000000..11ecadbd8 --- /dev/null +++ b/ngraph_bridge/ie_utils.h @@ -0,0 +1,109 @@ +/******************************************************************************* + * Copyright 2017-2020 Intel Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + *******************************************************************************/ + +// The backend manager class is a singelton class that interfaces with the +// bridge to provide necessary backend + +#ifndef IE_UTILS_H_ +#define IE_UTILS_H_ + +#include +#include +#include +#include + +#include + +class IEUtils { + public: + // Returns the maxiumum number of requests based on the device. + // TODO: The number of requests are hardcoded temporarly. + // This should dynamically look at the underlying architecture + // and compute the best performing number of requests. + static size_t GetMaxReq(std::string device) { + int max_req = 1; + if (device == "HDDL") max_req = 8; + return max_req; + } + + // Computes the input batch size per request best on the actual input batch + // size and the device. + static size_t GetInputBatchSize(size_t inputBatchSize, std::string device) { + int max_req = IEUtils::GetMaxReq(device); + return ((inputBatchSize + max_req - 1) / max_req); + } + + // Gets the actual number of requests + static size_t GetNumRequests(size_t inputBatchSize, std::string device) { + return inputBatchSize / GetInputBatchSize(inputBatchSize, device); + } + + static bool VPUConfigEnabled() { return true; } + + static bool VPUFastCompileEnabled() { return true; } + + // Creates a MemoryBlob for InferenceEngine + static void CreateBlob(InferenceEngine::TensorDesc& desc, + InferenceEngine::Precision& precision, + const void* data_ptr, size_t byte_size, + InferenceEngine::MemoryBlob::Ptr& blob_ptr) { +#define MAKE_IE_BLOB(type_, desc_, ptr_, size_) \ + do { \ + if (ptr_ == nullptr) { \ + blob_ptr = std::make_shared>(desc); \ + blob_ptr->allocate(); \ + } else { \ + blob_ptr = std::make_shared>( \ + desc, (type_*)ptr_, size_); \ + } \ + } while (0) + switch (precision) { + case InferenceEngine::Precision::FP32: + MAKE_IE_BLOB(float, desc, (float*)data_ptr, byte_size); + break; + case InferenceEngine::Precision::U8: + MAKE_IE_BLOB(uint8_t, desc, (uint8_t*)data_ptr, byte_size); + break; + case InferenceEngine::Precision::I8: + MAKE_IE_BLOB(int8_t, desc, (int8_t*)data_ptr, byte_size); + break; + case InferenceEngine::Precision::U16: + MAKE_IE_BLOB(uint16_t, desc, (uint16_t*)data_ptr, byte_size); + break; + case InferenceEngine::Precision::I16: + MAKE_IE_BLOB(int16_t, desc, (int16_t*)data_ptr, byte_size); + break; + case InferenceEngine::Precision::I32: + MAKE_IE_BLOB(int32_t, desc, (int32_t*)data_ptr, byte_size); + break; + case InferenceEngine::Precision::U64: + MAKE_IE_BLOB(uint64_t, desc, (uint64_t*)data_ptr, byte_size); + break; + case InferenceEngine::Precision::I64: + MAKE_IE_BLOB(int64_t, desc, (int64_t*)data_ptr, byte_size); + break; + case InferenceEngine::Precision::BOOL: + MAKE_IE_BLOB(uint8_t, desc, (uint8_t*)data_ptr, byte_size); + break; + default: + THROW_IE_EXCEPTION << "Can't create IE blob for type " + << precision.name(); + } + } +}; + +#endif +// IE_UTILS_H