Samsung · mbencer · Oct 2, 2024 · Oct 2, 2024 · Oct 3, 2024 · Oct 3, 2024
diff --git a/runtime/onert/backend/cpu/Backend.h b/runtime/onert/backend/cpu/Backend.h
@@ -45,7 +45,7 @@ class Backend : public ::onert::backend::Backend
     auto &graph = *data.graph;
     auto context = std::make_unique<BackendContext>(this, std::move(data));
     auto tr = std::make_shared<basic::TensorRegistry>();
-    auto tb = std::make_shared<TensorBuilder>(tr);
+    auto tb = std::make_shared<TensorBuilder>(tr, context->data().shared_memory_operand_map);
     context->tensor_registry = tr;
     context->tensor_builder = tb;
     context->kernel_gen = std::make_shared<KernelGenerator>(graph, tb, tr, custom_kernel_builder,

diff --git a/runtime/onert/backend/cpu/ops/ExpandDimsLayer.cc b/runtime/onert/backend/cpu/ops/ExpandDimsLayer.cc
@@ -38,8 +38,12 @@ void ExpandDimsLayer::configure(const IPortableTensor *input, IPortableTensor *o
 
 void ExpandDimsLayer::run()
 {
-  size_t count = _input->total_size();
-  memcpy(_output->buffer(), _input->buffer(), count);
+  // output buffer equals to input buffer means that copy is not needed
+  if (_output->buffer() != _input->buffer())
+  {
+    size_t count = _input->total_size();
+    memcpy(_output->buffer(), _input->buffer(), count);
+  }
 }
 
 } // namespace ops

diff --git a/runtime/onert/backend/cpu/ops/ReshapeLayer.cc b/runtime/onert/backend/cpu/ops/ReshapeLayer.cc
@@ -32,8 +32,12 @@ ReshapeLayer::ReshapeLayer() : _input(nullptr), _shape(nullptr), _output(nullptr
 
 void ReshapeLayer::reshapeGeneric()
 {
-  size_t count = _input->total_size();
-  memcpy(_output->buffer(), _input->buffer(), count);
+  // output buffer equals to input buffer means that copy is not needed
+  if (_output->buffer() != _input->buffer())
+  {
+    size_t count = _input->total_size();
+    memcpy(_output->buffer(), _input->buffer(), count);
+  }
 }
 
 void ReshapeLayer::configure(const IPortableTensor *input, const IPortableTensor *shape,

diff --git a/runtime/onert/backend/ruy/Backend.h b/runtime/onert/backend/ruy/Backend.h
@@ -45,7 +45,7 @@ class Backend : public ::onert::backend::Backend
     auto &graph = *data.graph;
     auto context = std::make_unique<BackendContext>(this, std::move(data));
     auto tr = std::make_shared<basic::TensorRegistry>();
-    auto tb = std::make_shared<TensorBuilder>(tr);
+    auto tb = std::make_shared<TensorBuilder>(tr, context->data().shared_memory_operand_map);
     context->tensor_registry = tr;
     context->tensor_builder = tb;
     context->kernel_gen = std::make_shared<KernelGenerator>(graph, tb, tr, custom_kernel_builder,

diff --git a/runtime/onert/backend/trix/Backend.h b/runtime/onert/backend/trix/Backend.h
@@ -44,7 +44,7 @@ class Backend : public ::onert::backend::Backend
     auto &graph = *data.graph;
     auto context = std::make_unique<BackendContext>(this, std::move(data));
     auto tr = std::make_shared<basic::TensorRegistry>();
-    auto tb = std::make_shared<TensorBuilder>(tr);
+    auto tb = std::make_shared<TensorBuilder>(tr, context->data().shared_memory_operand_map);
     context->tensor_registry = tr;
     context->tensor_builder = tb;
     context->kernel_gen = std::make_shared<KernelGenerator>(graph, tb, tr, context->dev_context());

diff --git a/runtime/onert/backend/xnnpack/Backend.h b/runtime/onert/backend/xnnpack/Backend.h
@@ -45,7 +45,7 @@ class Backend : public ::onert::backend::Backend
     auto &graph = *data.graph;
     auto context = std::make_unique<BackendContext>(this, std::move(data));
     auto tr = std::make_shared<basic::TensorRegistry>();
-    auto tb = std::make_shared<TensorBuilder>(tr);
+    auto tb = std::make_shared<TensorBuilder>(tr, context->data().shared_memory_operand_map);
     context->tensor_registry = tr;
     context->tensor_builder = tb;
     context->kernel_gen = std::make_shared<KernelGenerator>(graph, tb, tr, custom_kernel_builder,

diff --git a/runtime/onert/core/include/backend/BackendContext.h b/runtime/onert/core/include/backend/BackendContext.h
@@ -46,6 +46,8 @@ struct ContextData
   std::shared_ptr<custom::IKernelBuilder> custom_kernel_builder;
   /* Is linear executor or not */
   bool is_linear_executor;
+  /* Map of operands which share memory where the values are sources of memory */
+  ir::OperandIndexMap<ir::OperandIndex> shared_memory_operand_map;
 };
 
 class BackendContext

diff --git a/runtime/onert/core/include/backend/basic/BackendContextHelpers.h b/runtime/onert/core/include/backend/basic/BackendContextHelpers.h
@@ -182,9 +182,24 @@ template <typename T_BackendContext> ITensorRegistry *genTensors(T_BackendContex
   const ir::Graph &graph = *ctx.graph();
   auto tensor_builder = ctx.tensor_builder;
 
+  // process source tensors for shared memory at first
+  std::vector<ir::OperandIndex> registered_source_ind;
+  for (const auto &[_, source_ind] : tensor_builder->getOperandsWithSharedMemory())
+  {
+    if (ctx.external_operands().contains(source_ind))
+      continue;
+    if (tensor_builder->isRegistered(source_ind)) // some tensors can have the same source
+      continue;
+    tensor_builder->registerTensorInfo(source_ind, graph.operands().at(source_ind).info());
+    registered_source_ind.emplace_back(source_ind);
+  }
+
   graph.operands().iterate([&](const ir::OperandIndex &ind, const ir::Operand &obj) {
     if (ctx.external_operands().contains(ind))
       return;
+    if (std::find(std::begin(registered_source_ind), std::end(registered_source_ind), ind) !=
+        std::end(registered_source_ind)) // skip tensors already registered
+      return;
     tensor_builder->registerTensorInfo(ind, obj.info());
   });
 
@@ -210,31 +225,46 @@ template <typename T_BackendContext> ITensorRegistry *genTensors(T_BackendContex
 
 inline void initConsts(const ir::Operands &operands,
                        const util::Set<ir::OperandIndex> &external_operands,
-                       ITensorRegistry *tensor_registry)
+                       ITensorRegistry *tensor_registry,
+                       const ir::OperandIndexMap<ir::OperandIndex> &shared_memory_operands_map)
 {
   operands.iterate([&](const ir::OperandIndex &ind, const ir::Operand &operand) {
-    if (external_operands.contains(ind) || !operand.isConstant())
+    const bool has_const_shared_memory =
+      shared_memory_operands_map.find(ind) != std::end(shared_memory_operands_map) &&
+      operands.at(shared_memory_operands_map.at(ind)).isConstant();
+    const bool can_be_initialized_as_const = operand.isConstant() || has_const_shared_memory;
+    if (external_operands.contains(ind) || !can_be_initialized_as_const)
       return;
 
     auto tensor = tensor_registry->getNativeITensor(ind);
     assert(tensor != nullptr);
 
     VERBOSE(FillOperandData) << "Fill data for " << ind << std::endl;
 
-    auto data = operand.shareData();
-    assert(data && data->base());
     ExternalTensor *ext_tensor = dynamic_cast<ExternalTensor *>(tensor);
-
     if (ext_tensor == nullptr)
       throw std::runtime_error{"This tensor is not external tensor"};
 
-    ext_tensor->setData(data);
+    if (has_const_shared_memory)
+    {
+      const auto &memory_source_operand = operands.at(shared_memory_operands_map.at(ind));
+      auto memory_source_data = memory_source_operand.shareData();
+      assert(memory_source_data && memory_source_data->base());
+      ext_tensor->setData(memory_source_data);
+    }
+    else
+    {
+      auto data = operand.shareData();
+      assert(data && data->base());
+      ext_tensor->setData(data);
+    }
   });
 }
 
 inline void initConsts(BackendContext &ctx)
 {
-  initConsts(ctx.graph()->operands(), ctx.external_operands(), ctx.tensor_registry.get());
+  initConsts(ctx.graph()->operands(), ctx.external_operands(), ctx.tensor_registry.get(),
+             ctx.data().shared_memory_operand_map);
 }
 
 } // namespace basic

diff --git a/runtime/onert/core/include/backend/basic/StaticTensorManager.h b/runtime/onert/core/include/backend/basic/StaticTensorManager.h
@@ -37,9 +37,11 @@ class StaticTensorManager
 {
 public:
   StaticTensorManager(const std::shared_ptr<TensorRegistry> &reg,
-                      DynamicTensorManager *dynamic_tensor_manager);
+                      DynamicTensorManager *dynamic_tensor_manager,
+                      const ir::OperandIndexMap<ir::OperandIndex> &operands_with_shared_memory);
   StaticTensorManager(const std::shared_ptr<TensorRegistry> &reg, const std::string planner_id,
-                      DynamicTensorManager *dynamic_tensor_manager);
+                      DynamicTensorManager *dynamic_tensor_manager,
+                      const ir::OperandIndexMap<ir::OperandIndex> &operands_with_shared_memory);
   virtual ~StaticTensorManager() = default;
 
   void allocateNonconsts(void);
@@ -57,6 +59,8 @@ class StaticTensorManager
   const std::shared_ptr<TensorRegistry> _tensors;
   ir::OperandIndexMap<bool> _as_constants;
   DynamicTensorManager *_dynamic_tensor_manager;
+  ir::OperandIndexMap<ir::OperandIndex> _operands_with_shared_memory;
+  ir::OperandIndexMap<uint32_t> _source_operands_ref_counter;
 };
 
 } // namespace basic

diff --git a/runtime/onert/core/include/backend/basic/TensorBuilder.h b/runtime/onert/core/include/backend/basic/TensorBuilder.h
@@ -37,8 +37,10 @@ namespace basic
 class TensorBuilder
 {
 public:
-  TensorBuilder(const std::shared_ptr<TensorRegistry> &tensor_reg);
-  TensorBuilder(const std::shared_ptr<TensorRegistry> &tensor_reg, const std::string planner_id);
+  TensorBuilder(const std::shared_ptr<TensorRegistry> &tensor_reg,
+                const ir::OperandIndexMap<ir::OperandIndex> &operands_with_shared_memory);
+  TensorBuilder(const std::shared_ptr<TensorRegistry> &tensor_reg, const std::string planner_id,
+                const ir::OperandIndexMap<ir::OperandIndex> &operands_with_shared_memory);
 
   /**
    * @brief     Register tensor information to allocate on CPU backend
@@ -54,13 +56,16 @@ class TensorBuilder
 
   void allocate(void);
 
+  const ir::OperandIndexMap<ir::OperandIndex> &getOperandsWithSharedMemory() const;
+
   DynamicTensorManager *dynamicTensorManager(void) { return _dynamic_tensor_mgr.get(); }
 
 private:
   const std::shared_ptr<TensorRegistry> _tensor_reg;
   std::unique_ptr<DynamicTensorManager> _dynamic_tensor_mgr;
   std::unique_ptr<StaticTensorManager> _static_tensor_mgr;
   ir::OperandIndexMap<ir::OperandInfo> _tensor_info_map;
+  ir::OperandIndexMap<ir::OperandIndex> _operands_with_shared_memory;
 };
 
 } // namespace basic

diff --git a/runtime/onert/core/src/backend/basic/StaticTensorManager.cc b/runtime/onert/core/src/backend/basic/StaticTensorManager.cc
@@ -14,6 +14,8 @@
  * limitations under the License.
  */
 
+#include <algorithm>
+
 #include "backend/basic/StaticTensorManager.h"
 
 #include "backend/basic/DynamicTensorManager.h"
@@ -27,19 +29,23 @@ namespace backend
 namespace basic
 {
 
-StaticTensorManager::StaticTensorManager(const std::shared_ptr<TensorRegistry> &reg,
-                                         DynamicTensorManager *dynamic_tensor_manager)
+StaticTensorManager::StaticTensorManager(
+  const std::shared_ptr<TensorRegistry> &reg, DynamicTensorManager *dynamic_tensor_manager,
+  const ir::OperandIndexMap<ir::OperandIndex> &operands_with_shared_memory)
   : _nonconst_mgr{new MemoryManager()}, _tensors{reg},
-    _dynamic_tensor_manager{dynamic_tensor_manager}
+    _dynamic_tensor_manager{dynamic_tensor_manager},
+    _operands_with_shared_memory{operands_with_shared_memory}
 {
   // DO NOTHING
 }
 
-StaticTensorManager::StaticTensorManager(const std::shared_ptr<TensorRegistry> &reg,
-                                         const std::string planner_id,
-                                         DynamicTensorManager *dynamic_tensor_manager)
+StaticTensorManager::StaticTensorManager(
+  const std::shared_ptr<TensorRegistry> &reg, const std::string planner_id,
+  DynamicTensorManager *dynamic_tensor_manager,
+  const ir::OperandIndexMap<ir::OperandIndex> &operands_with_shared_memory)
   : _nonconst_mgr{new MemoryManager(planner_id)}, _tensors{reg},
-    _dynamic_tensor_manager{dynamic_tensor_manager}
+    _dynamic_tensor_manager{dynamic_tensor_manager},
+    _operands_with_shared_memory{operands_with_shared_memory}
 {
   // DO NOTHING
 }
@@ -50,13 +56,28 @@ void StaticTensorManager::allocateNonconsts(void)
 
   for (auto &&[ind, tensor] : _tensors->native_tensors())
   {
-    if (!_as_constants[ind] && !tensor->is_dynamic())
+    bool buffer_set = false;
+    if (!tensor->is_dynamic())
     {
-      auto *buffer = _nonconst_mgr->getBuffer(ind);
-      tensor->setBuffer(buffer);
-
-      VERBOSE(CPU_StaticTensorManager)
-        << "TENSOR " << ind << " : " << static_cast<void *>(buffer) << std::endl;
+      if (_operands_with_shared_memory.find(ind) != std::end(_operands_with_shared_memory))
+      {
+        const auto &shared_memory_ind = _operands_with_shared_memory[ind];
+        if (!_as_constants[shared_memory_ind])
+        {
+          tensor->setBuffer(_nonconst_mgr->getBuffer(shared_memory_ind));
+          buffer_set = true;
+        }
+      }
+      else if (!_as_constants[ind])
+      {
+        tensor->setBuffer(_nonconst_mgr->getBuffer(ind));
+        buffer_set = true;
+      }
+      if (buffer_set)
+      {
+        VERBOSE(CPU_StaticTensorManager)
+          << "TENSOR " << ind << " : " << static_cast<void *>(tensor->buffer()) << std::endl;
+      }
     }
   }
 }
@@ -67,17 +88,30 @@ void StaticTensorManager::buildTensor(const ir::OperandIndex &ind,
                                       const ir::OperandInfo &tensor_info, bool as_const)
 {
   assert(!_tensors->getNativeTensor(ind));
+  std::unique_ptr<Tensor> tensor = nullptr;
   if (as_const)
   {
-    auto tensor = std::make_unique<ExternalTensor>(tensor_info);
-    _tensors->setNativeTensor(ind, std::move(tensor));
+    tensor = std::make_unique<ExternalTensor>(tensor_info);
   }
   else
   {
-    auto tensor =
-      std::make_unique<Tensor>(tensor_info, _dynamic_tensor_manager->dynamic_mem_mgr().get());
-    _tensors->setNativeTensor(ind, std::move(tensor));
+    const auto source_operand = _operands_with_shared_memory.find(ind);
+    if (source_operand != std::end(_operands_with_shared_memory) &&
+        _as_constants[source_operand->second])
+    {
+      as_const = _as_constants[source_operand->second];
+      auto new_tensor_info = tensor_info;
+      new_tensor_info.setAsConstant();
+      tensor = std::make_unique<ExternalTensor>(new_tensor_info);
+    }
+    else
+    {
+      tensor =
+        std::make_unique<Tensor>(tensor_info, _dynamic_tensor_manager->dynamic_mem_mgr().get());
+    }
   }
+  assert(tensor);
+  _tensors->setNativeTensor(ind, std::move(tensor));
   _as_constants[ind] = as_const;
 }
 
@@ -88,8 +122,22 @@ void StaticTensorManager::claimPlan(const ir::OperandIndex &ind, uint32_t size)
   // This method is called only when a tensor has proper shape
   assert(!_tensors->getNativeTensor(ind)->is_dynamic());
 
-  if (!_as_constants[ind])
-    _nonconst_mgr->claimPlan(ind, size);
+  const auto source_ind = _operands_with_shared_memory.find(ind);
+  if (source_ind == std::end(_operands_with_shared_memory))
+  {
+    if (!_as_constants[ind])
+    {
+      _nonconst_mgr->claimPlan(ind, size);
+      ++_source_operands_ref_counter[ind];
+    }
+  }
+  else
+  {
+    if (!_as_constants[source_ind->second])
+    {
+      ++_source_operands_ref_counter[source_ind->second];
+    }
+  }
 }
 
 void StaticTensorManager::releasePlan(const ir::OperandIndex &ind)
@@ -99,8 +147,23 @@ void StaticTensorManager::releasePlan(const ir::OperandIndex &ind)
   // This method is called only when a tensor has proper shape
   assert(!_tensors->getNativeTensor(ind)->is_dynamic());
 
-  if (!_as_constants[ind])
-    _nonconst_mgr->releasePlan(ind);
+  const auto source_operand_ind =
+    std::find_if(std::begin(_operands_with_shared_memory), std::end(_operands_with_shared_memory),
+                 [&ind](const auto &op) { return op.second == ind; });
+
+  ir::OperandIndex release_ind;
+  if (source_operand_ind == std::end(_operands_with_shared_memory))
+  {
+    release_ind = ind;
+  }
+  else
+  {
+    release_ind = source_operand_ind->second;
+  }
+  if (!_as_constants[release_ind] && 0 == _source_operands_ref_counter[release_ind])
+  {
+    _nonconst_mgr->releasePlan(release_ind);
+  }
 }
 
 void StaticTensorManager::iterate(const std::function<void(const ir::OperandIndex &)> &fn)