From da803b81d4252e3a585e7a70ed9221e5972d6ce5 Mon Sep 17 00:00:00 2001
From: Cavus Mustafa <mustafa.cavus@intel.com>
Date: Mon, 4 Jan 2021 20:04:38 -0800
Subject: [PATCH] New backend module is added

---
 ngraph_bridge/CMakeLists.txt       |   2 +
 ngraph_bridge/executable.cc        |  41 ++++++-----
 ngraph_bridge/executable.h         |   3 +
 ngraph_bridge/ie_backend_engine.cc | 102 +++++++++++++++++++++++++++
 ngraph_bridge/ie_backend_engine.h  |  74 ++++++++++++++++++++
 ngraph_bridge/ie_basic_engine.cc   |  80 +++++++++++++++++++++
 ngraph_bridge/ie_basic_engine.h    |  51 ++++++++++++++
 ngraph_bridge/ie_utils.h           | 109 +++++++++++++++++++++++++++++
 8 files changed, 444 insertions(+), 18 deletions(-)
 create mode 100644 ngraph_bridge/ie_backend_engine.cc
 create mode 100644 ngraph_bridge/ie_backend_engine.h
 create mode 100644 ngraph_bridge/ie_basic_engine.cc
 create mode 100644 ngraph_bridge/ie_basic_engine.h
 create mode 100644 ngraph_bridge/ie_utils.h
diff --git a/ngraph_bridge/CMakeLists.txt b/ngraph_bridge/CMakeLists.txt
index 21c869ea1..1a6b8f89c 100644
--- a/ngraph_bridge/CMakeLists.txt
+++ b/ngraph_bridge/CMakeLists.txt
@@ -49,6 +49,8 @@ set(SRC
    tf_graphcycles.cc
    tf_deadness_analysis.cc
    version.cc
+   ie_backend_engine.cc
+   ie_basic_engine.cc
 )
 
 message(STATUS "NGRAPH_TF_USE_GRAPPLER_OPTIMIZER: ${NGRAPH_TF_USE_GRAPPLER_OPTIMIZER}")
diff --git a/ngraph_bridge/executable.cc b/ngraph_bridge/executable.cc
index 4cdd588af..bab62e90e 100644
--- a/ngraph_bridge/executable.cc
+++ b/ngraph_bridge/executable.cc
@@ -22,7 +22,9 @@
 #include "logging/ngraph_log.h"
 #include "ngraph_bridge/default_opset.h"
 #include "ngraph_bridge/executable.h"
+#include "ngraph_bridge/ie_basic_engine.h"
 #include "ngraph_bridge/ie_tensor.h"
+#include "ngraph_bridge/ie_utils.h"
 #include "ngraph_bridge/ngraph_utils.h"
 
 using namespace std;
@@ -139,12 +141,8 @@ Executable::Executable(shared_ptr<Function> func, string device)
         name + "_IE_" + m_device;
   }
 
-  NGRAPH_VLOG(2) << "Loading IE CNN network to device " << m_device;
-
-  // Load network to the plugin (m_device) and create an infer request
-  InferenceEngine::ExecutableNetwork exe_network =
-      ie.LoadNetwork(m_network, m_device, options);
-  m_infer_req = exe_network.CreateInferRequest();
+  NGRAPH_VLOG(2) << "Creating IE Execution Engine";
+  m_ie_engine = make_shared<IEBasicEngine>(m_network, m_device);
 }
 
 bool Executable::Call(const vector<shared_ptr<runtime::Tensor>>& inputs,
@@ -167,7 +165,9 @@ bool Executable::Call(const vector<shared_ptr<runtime::Tensor>>& inputs,
   }
 
   //  Prepare input blobs
-  auto func = m_network.getFunction();
+  auto func = m_ie_engine->GetFunc();
+  std::vector<std::shared_ptr<IETensor>> ie_inputs(inputs.size());
+  std::vector<std::string> input_names(inputs.size());
   auto parameters = func->get_parameters();
   int j = 0;
   for (int i = 0; i < inputs.size(); i++) {
@@ -180,18 +180,23 @@ bool Executable::Call(const vector<shared_ptr<runtime::Tensor>>& inputs,
       NGRAPH_VLOG(1) << "Skipping unused input " << input_name;
       continue;
     }
-    shared_ptr<IETensor> tv = static_pointer_cast<IETensor>(inputs[i]);
-    m_infer_req.SetBlob(input_name, tv->get_blob());
+    ie_inputs[i] = nullptr;
+    ie_inputs[i] = static_pointer_cast<IETensor>(inputs[i]);
+    input_names[i] = input_name;
   }
 
+  std::vector<std::shared_ptr<IETensor>> ie_hoisted_params(
+      m_hoisted_params.size());
+  std::vector<std::string> param_names(m_hoisted_params.size());
   for (const auto& it : m_hoisted_params) {
     auto input_name = it.first;
     if (input_info.find(input_name) == input_info.end()) {
       NGRAPH_VLOG(1) << "Skipping unused hoisted param " << input_name;
       continue;
     }
-    shared_ptr<IETensor> tv = static_pointer_cast<IETensor>(it.second);
-    m_infer_req.SetBlob(input_name, tv->get_blob());
+    ie_hoisted_params[j] = nullptr;
+    ie_hoisted_params[j] = static_pointer_cast<IETensor>(it.second);
+    param_names[j++] = input_name;
   }
 
   InferenceEngine::OutputsDataMap output_info = m_network.getOutputsInfo();
@@ -214,22 +219,22 @@ bool Executable::Call(const vector<shared_ptr<runtime::Tensor>>& inputs,
 
   //  Prepare output blobs
   auto results = func->get_results();
+  std::vector<std::shared_ptr<IETensor>> ie_outputs(outputs.size());
+  std::vector<std::string> output_names(outputs.size());
   for (int i = 0; i < results.size(); i++) {
     if (outputs[i] != nullptr) {
-      NGRAPH_VLOG(4) << "Executable::call() SetBlob()";
-      shared_ptr<IETensor> tv = static_pointer_cast<IETensor>(outputs[i]);
-      m_infer_req.SetBlob(get_output_name(results[i]), tv->get_blob());
+      ie_outputs[i] = static_pointer_cast<IETensor>(outputs[i]);
     }
+    output_names[i] = get_output_name(results[i]);
   }
 
-  m_infer_req.Infer();
+  m_ie_engine->Infer(ie_inputs, input_names, ie_outputs, output_names,
+                     ie_hoisted_params, param_names);
 
   // Set dynamic output blobs
   for (int i = 0; i < results.size(); i++) {
     if (outputs[i] == nullptr) {
-      NGRAPH_VLOG(4) << "Executable::call() GetBlob()";
-      auto blob = m_infer_req.GetBlob(get_output_name(results[i]));
-      outputs[i] = make_shared<IETensor>(blob);
+      outputs[i] = ie_outputs[i];
     }
   }
 
diff --git a/ngraph_bridge/executable.h b/ngraph_bridge/executable.h
index feb60e91b..9680fdcb2 100644
--- a/ngraph_bridge/executable.h
+++ b/ngraph_bridge/executable.h
@@ -23,6 +23,8 @@
 #include <ie_core.hpp>
 #include "ngraph/ngraph.hpp"
 
+#include "ngraph_bridge/ie_backend_engine.h"
+
 using namespace std;
 
 namespace tensorflow {
@@ -56,6 +58,7 @@ class Executable {
   shared_ptr<ngraph::Function> m_trivial_fn;
   // This is the original nGraph function corresponding to this executable
   shared_ptr<ngraph::Function> m_function;
+  shared_ptr<IEBackendEngine> m_ie_engine;
 };
 }
 }
diff --git a/ngraph_bridge/ie_backend_engine.cc b/ngraph_bridge/ie_backend_engine.cc
new file mode 100644
index 000000000..4ad8fa824
--- /dev/null
+++ b/ngraph_bridge/ie_backend_engine.cc
@@ -0,0 +1,102 @@
+/*******************************************************************************
+ * Copyright 2017-2020 Intel Corporation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *******************************************************************************/
+
+#include <iostream>
+
+#include "ngraph_bridge/ie_backend_engine.h"
+#include "ngraph_bridge/ie_utils.h"
+
+namespace tensorflow {
+namespace ngraph_bridge {
+
+IEBackendEngine::IEBackendEngine(InferenceEngine::CNNNetwork ie_network,
+                                 std::string device)
+    : m_network(ie_network),
+      m_func(ie_network.getFunction()),
+      m_device(device),
+      m_multi_req_execution(false),
+      m_network_ready(false) {
+  if (std::getenv("NGRAPH_TF_DUMP_GRAPHS")) {
+    auto& name = m_network.getName();
+    m_network.serialize(name + ".xml", name + ".bin");
+  }
+}
+
+IEBackendEngine::~IEBackendEngine() {}
+
+void IEBackendEngine::LoadNetwork() {
+  if (m_network_ready) return;
+
+  std::map<std::string, std::string> config;
+
+  if (m_device == "MYRIAD") {
+    // Set MYRIAD configurations
+    if (IEUtils::VPUConfigEnabled()) {
+      config["MYRIAD_DETECT_NETWORK_BATCH"] = "NO";
+    }
+
+    if (IEUtils::VPUFastCompileEnabled()) {
+      config["MYRIAD_HW_INJECT_STAGES"] = "NO";
+      config["MYRIAD_COPY_OPTIMIZATION"] = "NO";
+    }
+  }
+
+  InferenceEngine::Core ie;
+  // Load network to the plugin (m_device)
+  m_exe_network = ie.LoadNetwork(m_network, m_device, config);
+  m_network_ready = true;
+}
+
+void IEBackendEngine::StartAsyncInference(const int req_id) {
+  // Start Async inference
+  try {
+    m_infer_reqs[req_id].StartAsync();
+  } catch (InferenceEngine::details::InferenceEngineException e) {
+    THROW_IE_EXCEPTION << "Couldn't start Inference: ";
+  } catch (...) {
+    THROW_IE_EXCEPTION << "Couldn't start Inference: ";
+  }
+}
+
+void IEBackendEngine::CompleteAsyncInference(const int req_id) {
+  // Wait for Async inference completion
+  try {
+    m_infer_reqs[req_id].Wait(
+        InferenceEngine::IInferRequest::WaitMode::RESULT_READY);
+  } catch (InferenceEngine::details::InferenceEngineException e) {
+    THROW_IE_EXCEPTION << " Exception with completing Inference: ";
+  } catch (...) {
+    THROW_IE_EXCEPTION << " Exception with completing Inference: ";
+  }
+}
+
+size_t IEBackendEngine::GetOutputBatchSize(size_t input_batch_size) const {
+  return m_network.getBatchSize() *
+         IEUtils::GetNumRequests(input_batch_size, m_device);
+}
+
+// Enables multi request execution if the execution engine supprts
+void IEBackendEngine::EnableMultiReqExecution() {
+  m_multi_req_execution = true;
+}
+// Disables multi request execution
+void IEBackendEngine::DisableMultiReqExecution() {
+  m_multi_req_execution = false;
+}
+
+std::shared_ptr<ngraph::Function> IEBackendEngine::GetFunc() { return m_func; }
+}
+}
diff --git a/ngraph_bridge/ie_backend_engine.h b/ngraph_bridge/ie_backend_engine.h
new file mode 100644
index 000000000..fc324640f
--- /dev/null
+++ b/ngraph_bridge/ie_backend_engine.h
@@ -0,0 +1,74 @@
+/*******************************************************************************
+ * Copyright 2017-2020 Intel Corporation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *******************************************************************************/
+
+#ifndef IE_BACKEND_ENGINE_H_
+#define IE_BACKEND_ENGINE_H_
+
+#include <memory>
+#include <string>
+#include <vector>
+
+#include <ie_core.hpp>
+
+#include "ngraph_bridge/ie_tensor.h"
+
+namespace tensorflow {
+namespace ngraph_bridge {
+
+class IEBackendEngine {
+ public:
+  IEBackendEngine(InferenceEngine::CNNNetwork ie_network, std::string device);
+  ~IEBackendEngine();
+
+  // Executes the inference
+  virtual void Infer(std::vector<std::shared_ptr<IETensor>>& inputs,
+                     std::vector<std::string>& input_names,
+                     std::vector<std::shared_ptr<IETensor>>& outputs,
+                     std::vector<std::string>& output_names,
+                     std::vector<std::shared_ptr<IETensor>>& hoisted_params,
+                     std::vector<std::string>& param_names) = 0;
+
+  // Returns output batch size based on the input batch size and the device
+  // FIXME: This may not be needed
+  virtual size_t GetOutputBatchSize(size_t input_batch_size) const;
+
+  // Enables multi request execution if the execution engine supprts
+  void EnableMultiReqExecution();
+  // Disables multi request execution
+  void DisableMultiReqExecution();
+
+  // Returns the NGraph Function from the CNNNetwork
+  std::shared_ptr<ngraph::Function> GetFunc();
+
+  virtual const std::vector<size_t> GetOutputShape(const int i) = 0;
+
+ protected:
+  InferenceEngine::CNNNetwork m_network;
+  std::shared_ptr<ngraph::Function> m_func;
+  std::vector<InferenceEngine::InferRequest> m_infer_reqs;
+  std::string m_device;
+  bool m_multi_req_execution;
+  InferenceEngine::ExecutableNetwork m_exe_network;
+  bool m_network_ready;
+
+  virtual void StartAsyncInference(const int req_id);
+  virtual void CompleteAsyncInference(const int req_id);
+  virtual void LoadNetwork();
+};
+}
+}
+
+#endif  // IE_BACKEND_ENGINE_H_
diff --git a/ngraph_bridge/ie_basic_engine.cc b/ngraph_bridge/ie_basic_engine.cc
new file mode 100644
index 000000000..62dc07348
--- /dev/null
+++ b/ngraph_bridge/ie_basic_engine.cc
@@ -0,0 +1,80 @@
+/*******************************************************************************
+ * Copyright 2017-2020 Intel Corporation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *******************************************************************************/
+
+#include <iostream>
+
+#include "logging/ngraph_log.h"
+#include "ngraph_bridge/ie_basic_engine.h"
+#include "ngraph_bridge/ie_utils.h"
+
+namespace tensorflow {
+namespace ngraph_bridge {
+
+IEBasicEngine::IEBasicEngine(InferenceEngine::CNNNetwork ie_network,
+                             std::string device)
+    : IEBackendEngine(ie_network, device) {}
+
+IEBasicEngine::~IEBasicEngine() {}
+
+void IEBasicEngine::Infer(
+    std::vector<std::shared_ptr<IETensor>>& inputs,
+    std::vector<std::string>& input_names,
+    std::vector<std::shared_ptr<IETensor>>& outputs,
+    std::vector<std::string>& output_names,
+    std::vector<std::shared_ptr<IETensor>>& hoisted_params,
+    std::vector<std::string>& param_names) {
+  LoadNetwork();
+  if (m_infer_reqs.empty()) {
+    m_infer_reqs.push_back(m_exe_network.CreateInferRequest());
+  }
+
+  //  Prepare input blobs
+  auto func = m_network.getFunction();
+  auto parameters = func->get_parameters();
+  for (int i = 0; i < inputs.size(); i++) {
+    if (inputs[i] != nullptr)
+      m_infer_reqs[0].SetBlob(input_names[i], inputs[i]->get_blob());
+  }
+
+  for (int i = 0; i < hoisted_params.size(); i++) {
+    if (hoisted_params[i] != nullptr)
+      m_infer_reqs[0].SetBlob(param_names[i], hoisted_params[i]->get_blob());
+  }
+
+  //  Prepare output blobs
+  auto results = func->get_results();
+  for (int i = 0; i < results.size(); i++) {
+    if (outputs[i] != nullptr) {
+      NGRAPH_VLOG(4) << "Executable::call() SetBlob()";
+      m_infer_reqs[0].SetBlob(output_names[i], outputs[i]->get_blob());
+    }
+  }
+
+  m_infer_reqs[0].Infer();
+
+  // Set dynamic output blobs
+  for (int i = 0; i < results.size(); i++) {
+    if (outputs[i] == nullptr) {
+      NGRAPH_VLOG(4) << "Executable::call() GetBlob()";
+      auto blob = m_infer_reqs[0].GetBlob(output_names[i]);
+      outputs[i] = std::make_shared<IETensor>(blob);
+    }
+  }
+
+  // return true;
+}
+}
+}
diff --git a/ngraph_bridge/ie_basic_engine.h b/ngraph_bridge/ie_basic_engine.h
new file mode 100644
index 000000000..ae5d249ef
--- /dev/null
+++ b/ngraph_bridge/ie_basic_engine.h
@@ -0,0 +1,51 @@
+/*******************************************************************************
+ * Copyright 2017-2020 Intel Corporation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *******************************************************************************/
+
+#ifndef IE_BASIC_ENGINE_H_
+#define IE_BASIC_ENGINE_H_
+
+#include <memory>
+#include <string>
+#include <vector>
+
+#include <ie_core.hpp>
+
+#include "ngraph_bridge/ie_backend_engine.h"
+
+namespace tensorflow {
+namespace ngraph_bridge {
+
+class IEBasicEngine : public IEBackendEngine {
+ public:
+  IEBasicEngine(InferenceEngine::CNNNetwork ie_network, std::string device);
+  ~IEBasicEngine();
+
+  // Executes the inference
+  virtual void Infer(std::vector<std::shared_ptr<IETensor>>& inputs,
+                     std::vector<std::string>& input_names,
+                     std::vector<std::shared_ptr<IETensor>>& outputs,
+                     std::vector<std::string>& output_names,
+                     std::vector<std::shared_ptr<IETensor>>& hoisted_params,
+                     std::vector<std::string>& param_names);
+
+  virtual const std::vector<size_t> GetOutputShape(const int i) {
+    return m_func->get_results()[i]->get_shape();
+  };
+};
+}
+}
+
+#endif  // IE_BASIC_ENGINE_H_
diff --git a/ngraph_bridge/ie_utils.h b/ngraph_bridge/ie_utils.h
new file mode 100644
index 000000000..11ecadbd8
--- /dev/null
+++ b/ngraph_bridge/ie_utils.h
@@ -0,0 +1,109 @@
+/*******************************************************************************
+ * Copyright 2017-2020 Intel Corporation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *******************************************************************************/
+
+// The backend manager class is a singelton class that interfaces with the
+// bridge to provide necessary backend
+
+#ifndef IE_UTILS_H_
+#define IE_UTILS_H_
+
+#include <atomic>
+#include <mutex>
+#include <ostream>
+#include <vector>
+
+#include <ie_core.hpp>
+
+class IEUtils {
+ public:
+  // Returns the maxiumum number of requests based on the device.
+  // TODO: The number of requests are hardcoded temporarly.
+  // This should dynamically look at the underlying architecture
+  // and compute the best performing number of requests.
+  static size_t GetMaxReq(std::string device) {
+    int max_req = 1;
+    if (device == "HDDL") max_req = 8;
+    return max_req;
+  }
+
+  // Computes the input batch size per request best on the actual input batch
+  // size and the device.
+  static size_t GetInputBatchSize(size_t inputBatchSize, std::string device) {
+    int max_req = IEUtils::GetMaxReq(device);
+    return ((inputBatchSize + max_req - 1) / max_req);
+  }
+
+  // Gets the actual number of requests
+  static size_t GetNumRequests(size_t inputBatchSize, std::string device) {
+    return inputBatchSize / GetInputBatchSize(inputBatchSize, device);
+  }
+
+  static bool VPUConfigEnabled() { return true; }
+
+  static bool VPUFastCompileEnabled() { return true; }
+
+  // Creates a MemoryBlob for InferenceEngine
+  static void CreateBlob(InferenceEngine::TensorDesc& desc,
+                         InferenceEngine::Precision& precision,
+                         const void* data_ptr, size_t byte_size,
+                         InferenceEngine::MemoryBlob::Ptr& blob_ptr) {
+#define MAKE_IE_BLOB(type_, desc_, ptr_, size_)                         \
+  do {                                                                  \
+    if (ptr_ == nullptr) {                                              \
+      blob_ptr = std::make_shared<InferenceEngine::TBlob<type_>>(desc); \
+      blob_ptr->allocate();                                             \
+    } else {                                                            \
+      blob_ptr = std::make_shared<InferenceEngine::TBlob<type_>>(       \
+          desc, (type_*)ptr_, size_);                                   \
+    }                                                                   \
+  } while (0)
+    switch (precision) {
+      case InferenceEngine::Precision::FP32:
+        MAKE_IE_BLOB(float, desc, (float*)data_ptr, byte_size);
+        break;
+      case InferenceEngine::Precision::U8:
+        MAKE_IE_BLOB(uint8_t, desc, (uint8_t*)data_ptr, byte_size);
+        break;
+      case InferenceEngine::Precision::I8:
+        MAKE_IE_BLOB(int8_t, desc, (int8_t*)data_ptr, byte_size);
+        break;
+      case InferenceEngine::Precision::U16:
+        MAKE_IE_BLOB(uint16_t, desc, (uint16_t*)data_ptr, byte_size);
+        break;
+      case InferenceEngine::Precision::I16:
+        MAKE_IE_BLOB(int16_t, desc, (int16_t*)data_ptr, byte_size);
+        break;
+      case InferenceEngine::Precision::I32:
+        MAKE_IE_BLOB(int32_t, desc, (int32_t*)data_ptr, byte_size);
+        break;
+      case InferenceEngine::Precision::U64:
+        MAKE_IE_BLOB(uint64_t, desc, (uint64_t*)data_ptr, byte_size);
+        break;
+      case InferenceEngine::Precision::I64:
+        MAKE_IE_BLOB(int64_t, desc, (int64_t*)data_ptr, byte_size);
+        break;
+      case InferenceEngine::Precision::BOOL:
+        MAKE_IE_BLOB(uint8_t, desc, (uint8_t*)data_ptr, byte_size);
+        break;
+      default:
+        THROW_IE_EXCEPTION << "Can't create IE blob for type "
+                           << precision.name();
+    }
+  }
+};
+
+#endif
+// IE_UTILS_H