diff --git a/runtime/onert/backend/cpu/Backend.h b/runtime/onert/backend/cpu/Backend.h index 398c188a897..ea8f1a5cd69 100644 --- a/runtime/onert/backend/cpu/Backend.h +++ b/runtime/onert/backend/cpu/Backend.h @@ -45,7 +45,7 @@ class Backend : public ::onert::backend::Backend auto &graph = *data.graph; auto context = std::make_unique(this, std::move(data)); auto tr = std::make_shared(); - auto tb = std::make_shared(tr); + auto tb = std::make_shared(tr, context->data().shared_memory_operand_map); context->tensor_registry = tr; context->tensor_builder = tb; context->kernel_gen = std::make_shared(graph, tb, tr, custom_kernel_builder, diff --git a/runtime/onert/backend/cpu/ops/ExpandDimsLayer.cc b/runtime/onert/backend/cpu/ops/ExpandDimsLayer.cc index 5ea0ea89364..543c402dd22 100644 --- a/runtime/onert/backend/cpu/ops/ExpandDimsLayer.cc +++ b/runtime/onert/backend/cpu/ops/ExpandDimsLayer.cc @@ -38,8 +38,12 @@ void ExpandDimsLayer::configure(const IPortableTensor *input, IPortableTensor *o void ExpandDimsLayer::run() { - size_t count = _input->total_size(); - memcpy(_output->buffer(), _input->buffer(), count); + // output buffer equals to input buffer means that copy is not needed + if (_output->buffer() != _input->buffer()) + { + size_t count = _input->total_size(); + memcpy(_output->buffer(), _input->buffer(), count); + } } } // namespace ops diff --git a/runtime/onert/backend/cpu/ops/ReshapeLayer.cc b/runtime/onert/backend/cpu/ops/ReshapeLayer.cc index 3c2b115f417..a50cf0eaceb 100644 --- a/runtime/onert/backend/cpu/ops/ReshapeLayer.cc +++ b/runtime/onert/backend/cpu/ops/ReshapeLayer.cc @@ -32,8 +32,12 @@ ReshapeLayer::ReshapeLayer() : _input(nullptr), _shape(nullptr), _output(nullptr void ReshapeLayer::reshapeGeneric() { - size_t count = _input->total_size(); - memcpy(_output->buffer(), _input->buffer(), count); + // output buffer equals to input buffer means that copy is not needed + if (_output->buffer() != _input->buffer()) + { + size_t count = _input->total_size(); + memcpy(_output->buffer(), _input->buffer(), count); + } } void ReshapeLayer::configure(const IPortableTensor *input, const IPortableTensor *shape, diff --git a/runtime/onert/backend/ruy/Backend.h b/runtime/onert/backend/ruy/Backend.h index 4077965c450..ec7c471b2e2 100644 --- a/runtime/onert/backend/ruy/Backend.h +++ b/runtime/onert/backend/ruy/Backend.h @@ -45,7 +45,7 @@ class Backend : public ::onert::backend::Backend auto &graph = *data.graph; auto context = std::make_unique(this, std::move(data)); auto tr = std::make_shared(); - auto tb = std::make_shared(tr); + auto tb = std::make_shared(tr, context->data().shared_memory_operand_map); context->tensor_registry = tr; context->tensor_builder = tb; context->kernel_gen = std::make_shared(graph, tb, tr, custom_kernel_builder, diff --git a/runtime/onert/core/include/backend/BackendContext.h b/runtime/onert/core/include/backend/BackendContext.h index 052809f7d11..21e26c1d7fa 100644 --- a/runtime/onert/core/include/backend/BackendContext.h +++ b/runtime/onert/core/include/backend/BackendContext.h @@ -46,6 +46,8 @@ struct ContextData std::shared_ptr custom_kernel_builder; /* Is linear executor or not */ bool is_linear_executor; + /* Map of operands which share memory where the values are sources of memory */ + ir::OperandIndexMap shared_memory_operand_map; }; class BackendContext diff --git a/runtime/onert/core/include/backend/basic/BackendContextHelpers.h b/runtime/onert/core/include/backend/basic/BackendContextHelpers.h index 79a535559e1..43c66d44e77 100644 --- a/runtime/onert/core/include/backend/basic/BackendContextHelpers.h +++ b/runtime/onert/core/include/backend/basic/BackendContextHelpers.h @@ -210,10 +210,15 @@ template ITensorRegistry *genTensors(T_BackendContex inline void initConsts(const ir::Operands &operands, const util::Set &external_operands, - ITensorRegistry *tensor_registry) + ITensorRegistry *tensor_registry, + const ir::OperandIndexMap &shared_memory_operands_map) { operands.iterate([&](const ir::OperandIndex &ind, const ir::Operand &operand) { - if (external_operands.contains(ind) || !operand.isConstant()) + const bool has_const_shared_memory = + shared_memory_operands_map.find(ind) != std::end(shared_memory_operands_map) && + operands.at(shared_memory_operands_map.at(ind)).isConstant(); + const bool can_be_initialized_as_const = operand.isConstant() || has_const_shared_memory; + if (external_operands.contains(ind) || !can_be_initialized_as_const) return; auto tensor = tensor_registry->getNativeITensor(ind); @@ -221,20 +226,30 @@ inline void initConsts(const ir::Operands &operands, VERBOSE(FillOperandData) << "Fill data for " << ind << std::endl; - auto data = operand.shareData(); - assert(data && data->base()); ExternalTensor *ext_tensor = dynamic_cast(tensor); - if (ext_tensor == nullptr) throw std::runtime_error{"This tensor is not external tensor"}; - ext_tensor->setData(data); + if (has_const_shared_memory) + { + const auto &memory_source_operand = operands.at(shared_memory_operands_map.at(ind)); + auto memory_source_data = memory_source_operand.shareData(); + assert(memory_source_data && memory_source_data->base()); + ext_tensor->setData(memory_source_data); + } + else + { + auto data = operand.shareData(); + assert(data && data->base()); + ext_tensor->setData(data); + } }); } inline void initConsts(BackendContext &ctx) { - initConsts(ctx.graph()->operands(), ctx.external_operands(), ctx.tensor_registry.get()); + initConsts(ctx.graph()->operands(), ctx.external_operands(), ctx.tensor_registry.get(), + ctx.data().shared_memory_operand_map); } } // namespace basic diff --git a/runtime/onert/core/include/backend/basic/StaticTensorManager.h b/runtime/onert/core/include/backend/basic/StaticTensorManager.h index 2aab4303131..25ea483e773 100644 --- a/runtime/onert/core/include/backend/basic/StaticTensorManager.h +++ b/runtime/onert/core/include/backend/basic/StaticTensorManager.h @@ -37,9 +37,11 @@ class StaticTensorManager { public: StaticTensorManager(const std::shared_ptr ®, - DynamicTensorManager *dynamic_tensor_manager); + DynamicTensorManager *dynamic_tensor_manager, + const ir::OperandIndexMap &operands_with_shared_memory); StaticTensorManager(const std::shared_ptr ®, const std::string planner_id, - DynamicTensorManager *dynamic_tensor_manager); + DynamicTensorManager *dynamic_tensor_manager, + const ir::OperandIndexMap &operands_with_shared_memory); virtual ~StaticTensorManager() = default; void allocateNonconsts(void); @@ -57,6 +59,8 @@ class StaticTensorManager const std::shared_ptr _tensors; ir::OperandIndexMap _as_constants; DynamicTensorManager *_dynamic_tensor_manager; + ir::OperandIndexMap _operands_with_shared_memory; + ir::OperandIndexMap _source_operands_ref_counter; }; } // namespace basic diff --git a/runtime/onert/core/include/backend/basic/TensorBuilder.h b/runtime/onert/core/include/backend/basic/TensorBuilder.h index 5d5b521ae7b..cba2aeda40e 100644 --- a/runtime/onert/core/include/backend/basic/TensorBuilder.h +++ b/runtime/onert/core/include/backend/basic/TensorBuilder.h @@ -37,8 +37,10 @@ namespace basic class TensorBuilder { public: - TensorBuilder(const std::shared_ptr &tensor_reg); - TensorBuilder(const std::shared_ptr &tensor_reg, const std::string planner_id); + TensorBuilder(const std::shared_ptr &tensor_reg, + const ir::OperandIndexMap &operands_with_shared_memory); + TensorBuilder(const std::shared_ptr &tensor_reg, const std::string planner_id, + const ir::OperandIndexMap &operands_with_shared_memory); /** * @brief Register tensor information to allocate on CPU backend diff --git a/runtime/onert/core/src/backend/basic/StaticTensorManager.cc b/runtime/onert/core/src/backend/basic/StaticTensorManager.cc index 92b3f286b2c..5ebeb710446 100644 --- a/runtime/onert/core/src/backend/basic/StaticTensorManager.cc +++ b/runtime/onert/core/src/backend/basic/StaticTensorManager.cc @@ -14,6 +14,8 @@ * limitations under the License. */ +#include + #include "backend/basic/StaticTensorManager.h" #include "backend/basic/DynamicTensorManager.h" @@ -27,19 +29,23 @@ namespace backend namespace basic { -StaticTensorManager::StaticTensorManager(const std::shared_ptr ®, - DynamicTensorManager *dynamic_tensor_manager) +StaticTensorManager::StaticTensorManager( + const std::shared_ptr ®, DynamicTensorManager *dynamic_tensor_manager, + const ir::OperandIndexMap &operands_with_shared_memory) : _nonconst_mgr{new MemoryManager()}, _tensors{reg}, - _dynamic_tensor_manager{dynamic_tensor_manager} + _dynamic_tensor_manager{dynamic_tensor_manager}, + _operands_with_shared_memory{operands_with_shared_memory} { // DO NOTHING } -StaticTensorManager::StaticTensorManager(const std::shared_ptr ®, - const std::string planner_id, - DynamicTensorManager *dynamic_tensor_manager) +StaticTensorManager::StaticTensorManager( + const std::shared_ptr ®, const std::string planner_id, + DynamicTensorManager *dynamic_tensor_manager, + const ir::OperandIndexMap &operands_with_shared_memory) : _nonconst_mgr{new MemoryManager(planner_id)}, _tensors{reg}, - _dynamic_tensor_manager{dynamic_tensor_manager} + _dynamic_tensor_manager{dynamic_tensor_manager}, + _operands_with_shared_memory{operands_with_shared_memory} { // DO NOTHING } @@ -50,13 +56,28 @@ void StaticTensorManager::allocateNonconsts(void) for (auto &&[ind, tensor] : _tensors->native_tensors()) { - if (!_as_constants[ind] && !tensor->is_dynamic()) + bool buffer_set = false; + if (!tensor->is_dynamic()) { - auto *buffer = _nonconst_mgr->getBuffer(ind); - tensor->setBuffer(buffer); - - VERBOSE(CPU_StaticTensorManager) - << "TENSOR " << ind << " : " << static_cast(buffer) << std::endl; + if (_operands_with_shared_memory.find(ind) != std::end(_operands_with_shared_memory)) + { + const auto &shared_memory_ind = _operands_with_shared_memory[ind]; + if (!_as_constants[shared_memory_ind]) + { + tensor->setBuffer(_nonconst_mgr->getBuffer(shared_memory_ind)); + buffer_set = true; + } + } + else if (!_as_constants[ind]) + { + tensor->setBuffer(_nonconst_mgr->getBuffer(ind)); + buffer_set = true; + } + if (buffer_set) + { + VERBOSE(CPU_StaticTensorManager) + << "TENSOR " << ind << " : " << static_cast(tensor->buffer()) << std::endl; + } } } } @@ -67,17 +88,30 @@ void StaticTensorManager::buildTensor(const ir::OperandIndex &ind, const ir::OperandInfo &tensor_info, bool as_const) { assert(!_tensors->getNativeTensor(ind)); + std::unique_ptr tensor = nullptr; if (as_const) { - auto tensor = std::make_unique(tensor_info); - _tensors->setNativeTensor(ind, std::move(tensor)); + tensor = std::make_unique(tensor_info); } else { - auto tensor = - std::make_unique(tensor_info, _dynamic_tensor_manager->dynamic_mem_mgr().get()); - _tensors->setNativeTensor(ind, std::move(tensor)); + const auto source_operand = _operands_with_shared_memory.find(ind); + if (source_operand != std::end(_operands_with_shared_memory) && + _as_constants[source_operand->second]) + { + as_const = _as_constants[source_operand->second]; + auto new_tensor_info = tensor_info; + new_tensor_info.setAsConstant(); + tensor = std::make_unique(new_tensor_info); + } + else + { + tensor = + std::make_unique(tensor_info, _dynamic_tensor_manager->dynamic_mem_mgr().get()); + } } + assert(tensor); + _tensors->setNativeTensor(ind, std::move(tensor)); _as_constants[ind] = as_const; } @@ -88,8 +122,22 @@ void StaticTensorManager::claimPlan(const ir::OperandIndex &ind, uint32_t size) // This method is called only when a tensor has proper shape assert(!_tensors->getNativeTensor(ind)->is_dynamic()); - if (!_as_constants[ind]) - _nonconst_mgr->claimPlan(ind, size); + const auto source_ind = _operands_with_shared_memory.find(ind); + if (source_ind == std::end(_operands_with_shared_memory)) + { + if (!_as_constants[ind]) + { + _nonconst_mgr->claimPlan(ind, size); + ++_source_operands_ref_counter[ind]; + } + } + else + { + if (!_as_constants[source_ind->second]) + { + ++_source_operands_ref_counter[source_ind->second]; + } + } } void StaticTensorManager::releasePlan(const ir::OperandIndex &ind) @@ -99,8 +147,23 @@ void StaticTensorManager::releasePlan(const ir::OperandIndex &ind) // This method is called only when a tensor has proper shape assert(!_tensors->getNativeTensor(ind)->is_dynamic()); - if (!_as_constants[ind]) - _nonconst_mgr->releasePlan(ind); + const auto source_operand_ind = + std::find_if(std::begin(_operands_with_shared_memory), std::end(_operands_with_shared_memory), + [&ind](const auto &op) { return op.second == ind; }); + + ir::OperandIndex release_ind; + if (source_operand_ind == std::end(_operands_with_shared_memory)) + { + release_ind = ind; + } + else + { + release_ind = source_operand_ind->second; + } + if (!_as_constants[release_ind] && 0 == _source_operands_ref_counter[release_ind]) + { + _nonconst_mgr->releasePlan(release_ind); + } } void StaticTensorManager::iterate(const std::function &fn) diff --git a/runtime/onert/core/src/backend/basic/TensorBuilder.cc b/runtime/onert/core/src/backend/basic/TensorBuilder.cc index c94076dfbf1..4d25e7083cd 100644 --- a/runtime/onert/core/src/backend/basic/TensorBuilder.cc +++ b/runtime/onert/core/src/backend/basic/TensorBuilder.cc @@ -27,17 +27,22 @@ namespace backend namespace basic { -TensorBuilder::TensorBuilder(const std::shared_ptr &tensor_reg) +TensorBuilder::TensorBuilder( + const std::shared_ptr &tensor_reg, + const ir::OperandIndexMap &operands_with_shared_memory) : _tensor_reg{tensor_reg}, _dynamic_tensor_mgr{new DynamicTensorManager(_tensor_reg)}, - _static_tensor_mgr{new StaticTensorManager(_tensor_reg, _dynamic_tensor_mgr.get())} + _static_tensor_mgr{ + new StaticTensorManager(_tensor_reg, _dynamic_tensor_mgr.get(), operands_with_shared_memory)} { /* empty */ } -TensorBuilder::TensorBuilder(const std::shared_ptr &tensor_reg, - const std::string planner_id) +TensorBuilder::TensorBuilder( + const std::shared_ptr &tensor_reg, const std::string planner_id, + const ir::OperandIndexMap &operands_with_shared_memory) : _tensor_reg{tensor_reg}, _dynamic_tensor_mgr{new DynamicTensorManager(_tensor_reg)}, - _static_tensor_mgr{new StaticTensorManager(_tensor_reg, planner_id, _dynamic_tensor_mgr.get())} + _static_tensor_mgr{new StaticTensorManager(_tensor_reg, planner_id, _dynamic_tensor_mgr.get(), + operands_with_shared_memory)} { /* empty */ } diff --git a/runtime/onert/core/src/backend/builtin/Backend.h b/runtime/onert/core/src/backend/builtin/Backend.h index 85d389505d3..e181afa535f 100644 --- a/runtime/onert/core/src/backend/builtin/Backend.h +++ b/runtime/onert/core/src/backend/builtin/Backend.h @@ -66,7 +66,7 @@ class Backend : public ::onert::backend::Backend, public backend::train::ITraina // TODO Remove TensorBuilder and ConstantInitializer // TODO Support Consecutive controflow operation's intermediate tensor auto tr = std::make_shared(); - auto tb = std::make_shared(tr); + auto tb = std::make_shared(tr, context->data().shared_memory_operand_map); context->tensor_registry = tr; context->tensor_builder = tb; context->kernel_gen = std::make_shared( diff --git a/runtime/onert/core/src/backend/builtin/TensorBuilder.cc b/runtime/onert/core/src/backend/builtin/TensorBuilder.cc index ca1c0179439..dbff0b98b08 100644 --- a/runtime/onert/core/src/backend/builtin/TensorBuilder.cc +++ b/runtime/onert/core/src/backend/builtin/TensorBuilder.cc @@ -27,10 +27,12 @@ namespace backend namespace builtin { -TensorBuilder::TensorBuilder(const std::shared_ptr &tensor_reg) +TensorBuilder::TensorBuilder( + const std::shared_ptr &tensor_reg, + const ir::OperandIndexMap &operands_with_shared_memory) : _tensor_reg{tensor_reg}, _dynamic_tensor_mgr{new DynamicTensorManager(_tensor_reg->base_reg())}, - _static_tensor_mgr{ - new basic::StaticTensorManager(_tensor_reg->base_reg(), _dynamic_tensor_mgr.get())} + _static_tensor_mgr{new basic::StaticTensorManager( + _tensor_reg->base_reg(), _dynamic_tensor_mgr.get(), operands_with_shared_memory)} { /* empty */ } diff --git a/runtime/onert/core/src/backend/builtin/TensorBuilder.h b/runtime/onert/core/src/backend/builtin/TensorBuilder.h index 295e91da1fc..1a177623d62 100644 --- a/runtime/onert/core/src/backend/builtin/TensorBuilder.h +++ b/runtime/onert/core/src/backend/builtin/TensorBuilder.h @@ -37,7 +37,8 @@ namespace builtin class TensorBuilder { public: - TensorBuilder(const std::shared_ptr &tensor_reg); + TensorBuilder(const std::shared_ptr &tensor_reg, + const ir::OperandIndexMap &operands_with_shared_memory); /** * @brief Register tensor information to allocate on CPU backend diff --git a/runtime/onert/core/src/compiler/ExecutorFactory.cc b/runtime/onert/core/src/compiler/ExecutorFactory.cc index 48878011850..facbcd991fd 100644 --- a/runtime/onert/core/src/compiler/ExecutorFactory.cc +++ b/runtime/onert/core/src/compiler/ExecutorFactory.cc @@ -217,6 +217,33 @@ createBackendContexts(compiler::ILoweredGraph &lgraph, bool linear_executor, // Create contexts auto whole_op_order = lgraph.graph().topolSortOperations(); + // find operands which can share memory + const std::unordered_set ops_with_possible_memory_sharing = { + ir::OpCode::Reshape, ir::OpCode::ExpandDims, ir::OpCode::Squeeze}; + const auto memory_sharing_allowed = [&ops_with_possible_memory_sharing, + &lgraph](const ir::IOperation &op) { + if (ops_with_possible_memory_sharing.find(op.opcode()) == + std::end(ops_with_possible_memory_sharing)) + { + return false; + } + if (lgraph.graph().operands().at(op.getInputs().at(0)).info().isDynamic()) + { + return false; + } + if (lgraph.graph().operands().at(op.getOutputs().at(0)).info().isDynamic()) + { + return false; + } + const auto op_input_output = {op.getInputs().at(0), op.getOutputs().at(0)}; + const bool is_model_input_output = + std::any_of(std::begin(op_input_output), std::end(op_input_output), + [&lgraph](const ir::OperandIndex &ind) { + return lgraph.graph().getInputs().contains(ind) || + lgraph.graph().getOutputs().contains(ind); + }); + return !is_model_input_output; + }; for (auto &&[backend, data] : context_data_map) { auto graph = data.graph.get(); @@ -242,6 +269,14 @@ createBackendContexts(compiler::ILoweredGraph &lgraph, bool linear_executor, [&](const auto &ind) { return graph->operations().exist(ind); }); data.is_linear_executor = linear_executor; data.custom_kernel_builder = custom_kernel_builder; + for (const auto &op_ind : op_order) + { + const auto &op = graph->operations().at(op_ind); + if (memory_sharing_allowed(op)) + { + data.shared_memory_operand_map[op.getOutputs().at(0)] = op.getInputs().at(0); + } + } contexts.emplace(backend, backend->newContext(std::move(data))); } return contexts;