[onert] Share memory for Reshape, ExapndDims and Squeeze

This commit extends current tensor memory management infrastructure to allow tensor memory sharing if possible. ONE-DCO-1.0-Signed-off-by: Mateusz Bencer m.bencer@partner.samsung.com
Samsung · Oct 2, 2024 · c8d8a75 · c8d8a75
1 parent 8753418
commit c8d8a75
Show file tree

Hide file tree

Showing 14 changed files with 186 additions and 49 deletions.
diff --git a/runtime/onert/backend/cpu/Backend.h b/runtime/onert/backend/cpu/Backend.h
@@ -45,7 +45,7 @@ class Backend : public ::onert::backend::Backend
     auto &graph = *data.graph;
     auto context = std::make_unique<BackendContext>(this, std::move(data));
     auto tr = std::make_shared<basic::TensorRegistry>();
-    auto tb = std::make_shared<TensorBuilder>(tr);
+    auto tb = std::make_shared<TensorBuilder>(tr, context->data().shared_memory_operand_map);
     context->tensor_registry = tr;
     context->tensor_builder = tb;
     context->kernel_gen = std::make_shared<KernelGenerator>(graph, tb, tr, custom_kernel_builder,

diff --git a/runtime/onert/backend/cpu/ops/ExpandDimsLayer.cc b/runtime/onert/backend/cpu/ops/ExpandDimsLayer.cc
@@ -38,8 +38,12 @@ void ExpandDimsLayer::configure(const IPortableTensor *input, IPortableTensor *o
 
 void ExpandDimsLayer::run()
 {
-  size_t count = _input->total_size();
-  memcpy(_output->buffer(), _input->buffer(), count);
+  // output buffer equals to input buffer means that copy is not needed
+  if (_output->buffer() != _input->buffer())
+  {
+    size_t count = _input->total_size();
+    memcpy(_output->buffer(), _input->buffer(), count);
+  }
 }
 
 } // namespace ops

diff --git a/runtime/onert/backend/cpu/ops/ReshapeLayer.cc b/runtime/onert/backend/cpu/ops/ReshapeLayer.cc
@@ -32,8 +32,12 @@ ReshapeLayer::ReshapeLayer() : _input(nullptr), _shape(nullptr), _output(nullptr
 
 void ReshapeLayer::reshapeGeneric()
 {
-  size_t count = _input->total_size();
-  memcpy(_output->buffer(), _input->buffer(), count);
+  // output buffer equals to input buffer means that copy is not needed
+  if (_output->buffer() != _input->buffer())
+  {
+    size_t count = _input->total_size();
+    memcpy(_output->buffer(), _input->buffer(), count);
+  }
 }
 
 void ReshapeLayer::configure(const IPortableTensor *input, const IPortableTensor *shape,

diff --git a/runtime/onert/backend/ruy/Backend.h b/runtime/onert/backend/ruy/Backend.h
@@ -45,7 +45,7 @@ class Backend : public ::onert::backend::Backend
     auto &graph = *data.graph;
     auto context = std::make_unique<BackendContext>(this, std::move(data));
     auto tr = std::make_shared<basic::TensorRegistry>();
-    auto tb = std::make_shared<TensorBuilder>(tr);
+    auto tb = std::make_shared<TensorBuilder>(tr, context->data().shared_memory_operand_map);
     context->tensor_registry = tr;
     context->tensor_builder = tb;
     context->kernel_gen = std::make_shared<KernelGenerator>(graph, tb, tr, custom_kernel_builder,

diff --git a/runtime/onert/core/include/backend/BackendContext.h b/runtime/onert/core/include/backend/BackendContext.h
@@ -46,6 +46,8 @@ struct ContextData
   std::shared_ptr<custom::IKernelBuilder> custom_kernel_builder;
   /* Is linear executor or not */
   bool is_linear_executor;
+  /* Map of operands which share memory where the values are sources of memory */
+  ir::OperandIndexMap<ir::OperandIndex> shared_memory_operand_map;
 };
 
 class BackendContext

diff --git a/runtime/onert/core/include/backend/basic/BackendContextHelpers.h b/runtime/onert/core/include/backend/basic/BackendContextHelpers.h
@@ -210,31 +210,46 @@ template <typename T_BackendContext> ITensorRegistry *genTensors(T_BackendContex
 
 inline void initConsts(const ir::Operands &operands,
                        const util::Set<ir::OperandIndex> &external_operands,
-                       ITensorRegistry *tensor_registry)
+                       ITensorRegistry *tensor_registry,
+                       const ir::OperandIndexMap<ir::OperandIndex> &shared_memory_operands_map)
 {
   operands.iterate([&](const ir::OperandIndex &ind, const ir::Operand &operand) {
-    if (external_operands.contains(ind) || !operand.isConstant())
+    const bool has_const_shared_memory =
+      shared_memory_operands_map.find(ind) != std::end(shared_memory_operands_map) &&
+      operands.at(shared_memory_operands_map.at(ind)).isConstant();
+    const bool can_be_initialized_as_const = operand.isConstant() || has_const_shared_memory;
+    if (external_operands.contains(ind) || !can_be_initialized_as_const)
       return;
 
     auto tensor = tensor_registry->getNativeITensor(ind);
     assert(tensor != nullptr);
 
     VERBOSE(FillOperandData) << "Fill data for " << ind << std::endl;
 
-    auto data = operand.shareData();
-    assert(data && data->base());
     ExternalTensor *ext_tensor = dynamic_cast<ExternalTensor *>(tensor);
-
     if (ext_tensor == nullptr)
       throw std::runtime_error{"This tensor is not external tensor"};
 
-    ext_tensor->setData(data);
+    if (has_const_shared_memory)
+    {
+      const auto &memory_source_operand = operands.at(shared_memory_operands_map.at(ind));
+      auto memory_source_data = memory_source_operand.shareData();
+      assert(memory_source_data && memory_source_data->base());
+      ext_tensor->setData(memory_source_data);
+    }
+    else
+    {
+      auto data = operand.shareData();
+      assert(data && data->base());
+      ext_tensor->setData(data);
+    }
   });
 }
 
 inline void initConsts(BackendContext &ctx)
 {
-  initConsts(ctx.graph()->operands(), ctx.external_operands(), ctx.tensor_registry.get());
+  initConsts(ctx.graph()->operands(), ctx.external_operands(), ctx.tensor_registry.get(),
+             ctx.data().shared_memory_operand_map);
 }
 
 } // namespace basic

diff --git a/runtime/onert/core/include/backend/basic/StaticTensorManager.h b/runtime/onert/core/include/backend/basic/StaticTensorManager.h
@@ -37,9 +37,11 @@ class StaticTensorManager
 {
 public:
   StaticTensorManager(const std::shared_ptr<TensorRegistry> &reg,
-                      DynamicTensorManager *dynamic_tensor_manager);
+                      DynamicTensorManager *dynamic_tensor_manager,
+                      const ir::OperandIndexMap<ir::OperandIndex> &operands_with_shared_memory);
   StaticTensorManager(const std::shared_ptr<TensorRegistry> &reg, const std::string planner_id,
-                      DynamicTensorManager *dynamic_tensor_manager);
+                      DynamicTensorManager *dynamic_tensor_manager,
+                      const ir::OperandIndexMap<ir::OperandIndex> &operands_with_shared_memory);
   virtual ~StaticTensorManager() = default;
 
   void allocateNonconsts(void);
@@ -57,6 +59,8 @@ class StaticTensorManager
   const std::shared_ptr<TensorRegistry> _tensors;
   ir::OperandIndexMap<bool> _as_constants;
   DynamicTensorManager *_dynamic_tensor_manager;
+  ir::OperandIndexMap<ir::OperandIndex> _operands_with_shared_memory;
+  ir::OperandIndexMap<uint32_t> _source_operands_ref_counter;
 };
 
 } // namespace basic

diff --git a/runtime/onert/core/include/backend/basic/TensorBuilder.h b/runtime/onert/core/include/backend/basic/TensorBuilder.h
@@ -37,8 +37,10 @@ namespace basic
 class TensorBuilder
 {
 public:
-  TensorBuilder(const std::shared_ptr<TensorRegistry> &tensor_reg);
-  TensorBuilder(const std::shared_ptr<TensorRegistry> &tensor_reg, const std::string planner_id);
+  TensorBuilder(const std::shared_ptr<TensorRegistry> &tensor_reg,
+                const ir::OperandIndexMap<ir::OperandIndex> &operands_with_shared_memory);
+  TensorBuilder(const std::shared_ptr<TensorRegistry> &tensor_reg, const std::string planner_id,
+                const ir::OperandIndexMap<ir::OperandIndex> &operands_with_shared_memory);
 
   /**
    * @brief     Register tensor information to allocate on CPU backend

diff --git a/runtime/onert/core/src/backend/basic/StaticTensorManager.cc b/runtime/onert/core/src/backend/basic/StaticTensorManager.cc
@@ -14,6 +14,8 @@
  * limitations under the License.
  */
 
+#include <algorithm>
+
 #include "backend/basic/StaticTensorManager.h"
 
 #include "backend/basic/DynamicTensorManager.h"
@@ -27,19 +29,23 @@ namespace backend
 namespace basic
 {
 
-StaticTensorManager::StaticTensorManager(const std::shared_ptr<TensorRegistry> &reg,
-                                         DynamicTensorManager *dynamic_tensor_manager)
+StaticTensorManager::StaticTensorManager(
+  const std::shared_ptr<TensorRegistry> &reg, DynamicTensorManager *dynamic_tensor_manager,
+  const ir::OperandIndexMap<ir::OperandIndex> &operands_with_shared_memory)
   : _nonconst_mgr{new MemoryManager()}, _tensors{reg},
-    _dynamic_tensor_manager{dynamic_tensor_manager}
+    _dynamic_tensor_manager{dynamic_tensor_manager},
+    _operands_with_shared_memory{operands_with_shared_memory}
 {
   // DO NOTHING
 }
 
-StaticTensorManager::StaticTensorManager(const std::shared_ptr<TensorRegistry> &reg,
-                                         const std::string planner_id,
-                                         DynamicTensorManager *dynamic_tensor_manager)
+StaticTensorManager::StaticTensorManager(
+  const std::shared_ptr<TensorRegistry> &reg, const std::string planner_id,
+  DynamicTensorManager *dynamic_tensor_manager,
+  const ir::OperandIndexMap<ir::OperandIndex> &operands_with_shared_memory)
   : _nonconst_mgr{new MemoryManager(planner_id)}, _tensors{reg},
-    _dynamic_tensor_manager{dynamic_tensor_manager}
+    _dynamic_tensor_manager{dynamic_tensor_manager},
+    _operands_with_shared_memory{operands_with_shared_memory}
 {
   // DO NOTHING
 }
@@ -50,13 +56,28 @@ void StaticTensorManager::allocateNonconsts(void)
 
   for (auto &&[ind, tensor] : _tensors->native_tensors())
   {
-    if (!_as_constants[ind] && !tensor->is_dynamic())
+    bool buffer_set = false;
+    if (!tensor->is_dynamic())
     {
-      auto *buffer = _nonconst_mgr->getBuffer(ind);
-      tensor->setBuffer(buffer);
-
-      VERBOSE(CPU_StaticTensorManager)
-        << "TENSOR " << ind << " : " << static_cast<void *>(buffer) << std::endl;
+      if (_operands_with_shared_memory.find(ind) != std::end(_operands_with_shared_memory))
+      {
+        const auto &shared_memory_ind = _operands_with_shared_memory[ind];
+        if (!_as_constants[shared_memory_ind])
+        {
+          tensor->setBuffer(_nonconst_mgr->getBuffer(shared_memory_ind));
+          buffer_set = true;
+        }
+      }
+      else if (!_as_constants[ind])
+      {
+        tensor->setBuffer(_nonconst_mgr->getBuffer(ind));
+        buffer_set = true;
+      }
+      if (buffer_set)
+      {
+        VERBOSE(CPU_StaticTensorManager)
+          << "TENSOR " << ind << " : " << static_cast<void *>(tensor->buffer()) << std::endl;
+      }
     }
   }
 }
@@ -67,17 +88,30 @@ void StaticTensorManager::buildTensor(const ir::OperandIndex &ind,
                                       const ir::OperandInfo &tensor_info, bool as_const)
 {
   assert(!_tensors->getNativeTensor(ind));
+  std::unique_ptr<Tensor> tensor = nullptr;
   if (as_const)
   {
-    auto tensor = std::make_unique<ExternalTensor>(tensor_info);
-    _tensors->setNativeTensor(ind, std::move(tensor));
+    tensor = std::make_unique<ExternalTensor>(tensor_info);
   }
   else
   {
-    auto tensor =
-      std::make_unique<Tensor>(tensor_info, _dynamic_tensor_manager->dynamic_mem_mgr().get());
-    _tensors->setNativeTensor(ind, std::move(tensor));
+    const auto source_operand = _operands_with_shared_memory.find(ind);
+    if (source_operand != std::end(_operands_with_shared_memory) &&
+        _as_constants[source_operand->second])
+    {
+      as_const = _as_constants[source_operand->second];
+      auto new_tensor_info = tensor_info;
+      new_tensor_info.setAsConstant();
+      tensor = std::make_unique<ExternalTensor>(new_tensor_info);
+    }
+    else
+    {
+      tensor =
+        std::make_unique<Tensor>(tensor_info, _dynamic_tensor_manager->dynamic_mem_mgr().get());
+    }
   }
+  assert(tensor);
+  _tensors->setNativeTensor(ind, std::move(tensor));
   _as_constants[ind] = as_const;
 }
 
@@ -88,8 +122,22 @@ void StaticTensorManager::claimPlan(const ir::OperandIndex &ind, uint32_t size)
   // This method is called only when a tensor has proper shape
   assert(!_tensors->getNativeTensor(ind)->is_dynamic());
 
-  if (!_as_constants[ind])
-    _nonconst_mgr->claimPlan(ind, size);
+  const auto source_ind = _operands_with_shared_memory.find(ind);
+  if (source_ind == std::end(_operands_with_shared_memory))
+  {
+    if (!_as_constants[ind])
+    {
+      _nonconst_mgr->claimPlan(ind, size);
+      ++_source_operands_ref_counter[ind];
+    }
+  }
+  else
+  {
+    if (!_as_constants[source_ind->second])
+    {
+      ++_source_operands_ref_counter[source_ind->second];
+    }
+  }
 }
 
 void StaticTensorManager::releasePlan(const ir::OperandIndex &ind)
@@ -99,8 +147,23 @@ void StaticTensorManager::releasePlan(const ir::OperandIndex &ind)
   // This method is called only when a tensor has proper shape
   assert(!_tensors->getNativeTensor(ind)->is_dynamic());
 
-  if (!_as_constants[ind])
-    _nonconst_mgr->releasePlan(ind);
+  const auto source_operand_ind =
+    std::find_if(std::begin(_operands_with_shared_memory), std::end(_operands_with_shared_memory),
+                 [&ind](const auto &op) { return op.second == ind; });
+
+  ir::OperandIndex release_ind;
+  if (source_operand_ind == std::end(_operands_with_shared_memory))
+  {
+    release_ind = ind;
+  }
+  else
+  {
+    release_ind = source_operand_ind->second;
+  }
+  if (!_as_constants[release_ind] && 0 == _source_operands_ref_counter[release_ind])
+  {
+    _nonconst_mgr->releasePlan(release_ind);
+  }
 }
 
 void StaticTensorManager::iterate(const std::function<void(const ir::OperandIndex &)> &fn)

diff --git a/runtime/onert/core/src/backend/basic/TensorBuilder.cc b/runtime/onert/core/src/backend/basic/TensorBuilder.cc
@@ -27,17 +27,22 @@ namespace backend
 namespace basic
 {
 
-TensorBuilder::TensorBuilder(const std::shared_ptr<TensorRegistry> &tensor_reg)
+TensorBuilder::TensorBuilder(
+  const std::shared_ptr<TensorRegistry> &tensor_reg,
+  const ir::OperandIndexMap<ir::OperandIndex> &operands_with_shared_memory)
   : _tensor_reg{tensor_reg}, _dynamic_tensor_mgr{new DynamicTensorManager(_tensor_reg)},
-    _static_tensor_mgr{new StaticTensorManager(_tensor_reg, _dynamic_tensor_mgr.get())}
+    _static_tensor_mgr{
+      new StaticTensorManager(_tensor_reg, _dynamic_tensor_mgr.get(), operands_with_shared_memory)}
 {
   /* empty */
 }
 
-TensorBuilder::TensorBuilder(const std::shared_ptr<TensorRegistry> &tensor_reg,
-                             const std::string planner_id)
+TensorBuilder::TensorBuilder(
+  const std::shared_ptr<TensorRegistry> &tensor_reg, const std::string planner_id,
+  const ir::OperandIndexMap<ir::OperandIndex> &operands_with_shared_memory)
   : _tensor_reg{tensor_reg}, _dynamic_tensor_mgr{new DynamicTensorManager(_tensor_reg)},
-    _static_tensor_mgr{new StaticTensorManager(_tensor_reg, planner_id, _dynamic_tensor_mgr.get())}
+    _static_tensor_mgr{new StaticTensorManager(_tensor_reg, planner_id, _dynamic_tensor_mgr.get(),
+                                               operands_with_shared_memory)}
 {
   /* empty */
 }

diff --git a/runtime/onert/core/src/backend/builtin/Backend.h b/runtime/onert/core/src/backend/builtin/Backend.h
@@ -66,7 +66,7 @@ class Backend : public ::onert::backend::Backend, public backend::train::ITraina
     // TODO Remove TensorBuilder and ConstantInitializer
     // TODO Support Consecutive controflow operation's intermediate tensor
     auto tr = std::make_shared<TensorRegistry>();
-    auto tb = std::make_shared<TensorBuilder>(tr);
+    auto tb = std::make_shared<TensorBuilder>(tr, context->data().shared_memory_operand_map);
     context->tensor_registry = tr;
     context->tensor_builder = tb;
     context->kernel_gen = std::make_shared<KernelGenerator>(

diff --git a/runtime/onert/core/src/backend/builtin/TensorBuilder.cc b/runtime/onert/core/src/backend/builtin/TensorBuilder.cc
@@ -27,10 +27,12 @@ namespace backend
 namespace builtin
 {
 
-TensorBuilder::TensorBuilder(const std::shared_ptr<TensorRegistry> &tensor_reg)
+TensorBuilder::TensorBuilder(
+  const std::shared_ptr<TensorRegistry> &tensor_reg,
+  const ir::OperandIndexMap<ir::OperandIndex> &operands_with_shared_memory)
   : _tensor_reg{tensor_reg}, _dynamic_tensor_mgr{new DynamicTensorManager(_tensor_reg->base_reg())},
-    _static_tensor_mgr{
-      new basic::StaticTensorManager(_tensor_reg->base_reg(), _dynamic_tensor_mgr.get())}
+    _static_tensor_mgr{new basic::StaticTensorManager(
+      _tensor_reg->base_reg(), _dynamic_tensor_mgr.get(), operands_with_shared_memory)}
 {
   /* empty */
 }

diff --git a/runtime/onert/core/src/backend/builtin/TensorBuilder.h b/runtime/onert/core/src/backend/builtin/TensorBuilder.h
@@ -37,7 +37,8 @@ namespace builtin
 class TensorBuilder
 {
 public:
-  TensorBuilder(const std::shared_ptr<TensorRegistry> &tensor_reg);
+  TensorBuilder(const std::shared_ptr<TensorRegistry> &tensor_reg,
+                const ir::OperandIndexMap<ir::OperandIndex> &operands_with_shared_memory);
 
   /**
    * @brief     Register tensor information to allocate on CPU backend