Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[onert] Share memory for Reshape, ExapndDims and Squeeze #14057

Draft
wants to merge 24 commits into
base: master
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from 12 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion runtime/onert/backend/cpu/Backend.h
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@ class Backend : public ::onert::backend::Backend
auto &graph = *data.graph;
auto context = std::make_unique<BackendContext>(this, std::move(data));
auto tr = std::make_shared<basic::TensorRegistry>();
auto tb = std::make_shared<TensorBuilder>(tr);
auto tb = std::make_shared<TensorBuilder>(tr, context->data().shared_memory_operand_map);
context->tensor_registry = tr;
context->tensor_builder = tb;
context->kernel_gen = std::make_shared<KernelGenerator>(graph, tb, tr, custom_kernel_builder,
Expand Down
8 changes: 6 additions & 2 deletions runtime/onert/backend/cpu/ops/ExpandDimsLayer.cc
Original file line number Diff line number Diff line change
Expand Up @@ -38,8 +38,12 @@ void ExpandDimsLayer::configure(const IPortableTensor *input, IPortableTensor *o

void ExpandDimsLayer::run()
{
size_t count = _input->total_size();
memcpy(_output->buffer(), _input->buffer(), count);
// output buffer equals to input buffer means that copy is not needed
if (_output->buffer() != _input->buffer())
{
size_t count = _input->total_size();
memcpy(_output->buffer(), _input->buffer(), count);
}
}

} // namespace ops
Expand Down
8 changes: 6 additions & 2 deletions runtime/onert/backend/cpu/ops/ReshapeLayer.cc
Original file line number Diff line number Diff line change
Expand Up @@ -32,8 +32,12 @@ ReshapeLayer::ReshapeLayer() : _input(nullptr), _shape(nullptr), _output(nullptr

void ReshapeLayer::reshapeGeneric()
{
size_t count = _input->total_size();
memcpy(_output->buffer(), _input->buffer(), count);
// output buffer equals to input buffer means that copy is not needed
if (_output->buffer() != _input->buffer())
{
size_t count = _input->total_size();
memcpy(_output->buffer(), _input->buffer(), count);
}
}

void ReshapeLayer::configure(const IPortableTensor *input, const IPortableTensor *shape,
Expand Down
2 changes: 1 addition & 1 deletion runtime/onert/backend/ruy/Backend.h
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@ class Backend : public ::onert::backend::Backend
auto &graph = *data.graph;
auto context = std::make_unique<BackendContext>(this, std::move(data));
auto tr = std::make_shared<basic::TensorRegistry>();
auto tb = std::make_shared<TensorBuilder>(tr);
auto tb = std::make_shared<TensorBuilder>(tr, context->data().shared_memory_operand_map);
context->tensor_registry = tr;
context->tensor_builder = tb;
context->kernel_gen = std::make_shared<KernelGenerator>(graph, tb, tr, custom_kernel_builder,
Expand Down
2 changes: 1 addition & 1 deletion runtime/onert/backend/trix/Backend.h
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@ class Backend : public ::onert::backend::Backend
auto &graph = *data.graph;
auto context = std::make_unique<BackendContext>(this, std::move(data));
auto tr = std::make_shared<basic::TensorRegistry>();
auto tb = std::make_shared<TensorBuilder>(tr);
auto tb = std::make_shared<TensorBuilder>(tr, context->data().shared_memory_operand_map);
context->tensor_registry = tr;
context->tensor_builder = tb;
context->kernel_gen = std::make_shared<KernelGenerator>(graph, tb, tr, context->dev_context());
Expand Down
2 changes: 1 addition & 1 deletion runtime/onert/backend/xnnpack/Backend.h
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@ class Backend : public ::onert::backend::Backend
auto &graph = *data.graph;
auto context = std::make_unique<BackendContext>(this, std::move(data));
auto tr = std::make_shared<basic::TensorRegistry>();
auto tb = std::make_shared<TensorBuilder>(tr);
auto tb = std::make_shared<TensorBuilder>(tr, context->data().shared_memory_operand_map);
context->tensor_registry = tr;
context->tensor_builder = tb;
context->kernel_gen = std::make_shared<KernelGenerator>(graph, tb, tr, custom_kernel_builder,
Expand Down
2 changes: 2 additions & 0 deletions runtime/onert/core/include/backend/BackendContext.h
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,8 @@ struct ContextData
std::shared_ptr<custom::IKernelBuilder> custom_kernel_builder;
/* Is linear executor or not */
bool is_linear_executor;
/* Map of operands which share memory where the values are sources of memory */
ir::OperandIndexMap<ir::OperandIndex> shared_memory_operand_map;
};

class BackendContext
Expand Down
44 changes: 37 additions & 7 deletions runtime/onert/core/include/backend/basic/BackendContextHelpers.h
Original file line number Diff line number Diff line change
Expand Up @@ -182,9 +182,24 @@ template <typename T_BackendContext> ITensorRegistry *genTensors(T_BackendContex
const ir::Graph &graph = *ctx.graph();
auto tensor_builder = ctx.tensor_builder;

// process source tensors for shared memory at first
std::vector<ir::OperandIndex> registered_source_ind;
for (const auto &[_, source_ind] : tensor_builder->getOperandsWithSharedMemory())
{
if (ctx.external_operands().contains(source_ind))
continue;
if (tensor_builder->isRegistered(source_ind)) // some tensors can have the same source
continue;
tensor_builder->registerTensorInfo(source_ind, graph.operands().at(source_ind).info());
registered_source_ind.emplace_back(source_ind);
}

graph.operands().iterate([&](const ir::OperandIndex &ind, const ir::Operand &obj) {
if (ctx.external_operands().contains(ind))
return;
if (std::find(std::begin(registered_source_ind), std::end(registered_source_ind), ind) !=
std::end(registered_source_ind)) // skip tensors already registered
return;
tensor_builder->registerTensorInfo(ind, obj.info());
});

Expand All @@ -210,31 +225,46 @@ template <typename T_BackendContext> ITensorRegistry *genTensors(T_BackendContex

inline void initConsts(const ir::Operands &operands,
const util::Set<ir::OperandIndex> &external_operands,
ITensorRegistry *tensor_registry)
ITensorRegistry *tensor_registry,
const ir::OperandIndexMap<ir::OperandIndex> &shared_memory_operands_map)
{
operands.iterate([&](const ir::OperandIndex &ind, const ir::Operand &operand) {
if (external_operands.contains(ind) || !operand.isConstant())
const bool has_const_shared_memory =
shared_memory_operands_map.find(ind) != std::end(shared_memory_operands_map) &&
operands.at(shared_memory_operands_map.at(ind)).isConstant();
const bool can_be_initialized_as_const = operand.isConstant() || has_const_shared_memory;
if (external_operands.contains(ind) || !can_be_initialized_as_const)
return;

auto tensor = tensor_registry->getNativeITensor(ind);
assert(tensor != nullptr);

VERBOSE(FillOperandData) << "Fill data for " << ind << std::endl;

auto data = operand.shareData();
assert(data && data->base());
ExternalTensor *ext_tensor = dynamic_cast<ExternalTensor *>(tensor);

if (ext_tensor == nullptr)
throw std::runtime_error{"This tensor is not external tensor"};

ext_tensor->setData(data);
if (has_const_shared_memory)
{
const auto &memory_source_operand = operands.at(shared_memory_operands_map.at(ind));
auto memory_source_data = memory_source_operand.shareData();
assert(memory_source_data && memory_source_data->base());
ext_tensor->setData(memory_source_data);
}
else
{
auto data = operand.shareData();
assert(data && data->base());
ext_tensor->setData(data);
}
});
}

inline void initConsts(BackendContext &ctx)
{
initConsts(ctx.graph()->operands(), ctx.external_operands(), ctx.tensor_registry.get());
initConsts(ctx.graph()->operands(), ctx.external_operands(), ctx.tensor_registry.get(),
ctx.data().shared_memory_operand_map);
}

} // namespace basic
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -37,9 +37,11 @@ class StaticTensorManager
{
public:
StaticTensorManager(const std::shared_ptr<TensorRegistry> &reg,
DynamicTensorManager *dynamic_tensor_manager);
DynamicTensorManager *dynamic_tensor_manager,
const ir::OperandIndexMap<ir::OperandIndex> &operands_with_shared_memory);
StaticTensorManager(const std::shared_ptr<TensorRegistry> &reg, const std::string planner_id,
DynamicTensorManager *dynamic_tensor_manager);
DynamicTensorManager *dynamic_tensor_manager,
const ir::OperandIndexMap<ir::OperandIndex> &operands_with_shared_memory);
virtual ~StaticTensorManager() = default;

void allocateNonconsts(void);
Expand All @@ -57,6 +59,8 @@ class StaticTensorManager
const std::shared_ptr<TensorRegistry> _tensors;
ir::OperandIndexMap<bool> _as_constants;
DynamicTensorManager *_dynamic_tensor_manager;
ir::OperandIndexMap<ir::OperandIndex> _operands_with_shared_memory;
ir::OperandIndexMap<uint32_t> _source_operands_ref_counter;
};

} // namespace basic
Expand Down
9 changes: 7 additions & 2 deletions runtime/onert/core/include/backend/basic/TensorBuilder.h
Original file line number Diff line number Diff line change
Expand Up @@ -37,8 +37,10 @@ namespace basic
class TensorBuilder
{
public:
TensorBuilder(const std::shared_ptr<TensorRegistry> &tensor_reg);
TensorBuilder(const std::shared_ptr<TensorRegistry> &tensor_reg, const std::string planner_id);
TensorBuilder(const std::shared_ptr<TensorRegistry> &tensor_reg,
const ir::OperandIndexMap<ir::OperandIndex> &operands_with_shared_memory);
TensorBuilder(const std::shared_ptr<TensorRegistry> &tensor_reg, const std::string planner_id,
const ir::OperandIndexMap<ir::OperandIndex> &operands_with_shared_memory);

/**
* @brief Register tensor information to allocate on CPU backend
Expand All @@ -54,13 +56,16 @@ class TensorBuilder

void allocate(void);

const ir::OperandIndexMap<ir::OperandIndex> &getOperandsWithSharedMemory() const;

DynamicTensorManager *dynamicTensorManager(void) { return _dynamic_tensor_mgr.get(); }

private:
const std::shared_ptr<TensorRegistry> _tensor_reg;
std::unique_ptr<DynamicTensorManager> _dynamic_tensor_mgr;
std::unique_ptr<StaticTensorManager> _static_tensor_mgr;
ir::OperandIndexMap<ir::OperandInfo> _tensor_info_map;
ir::OperandIndexMap<ir::OperandIndex> _operands_with_shared_memory;
};

} // namespace basic
Expand Down
107 changes: 85 additions & 22 deletions runtime/onert/core/src/backend/basic/StaticTensorManager.cc
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,8 @@
* limitations under the License.
*/

#include <algorithm>

#include "backend/basic/StaticTensorManager.h"

#include "backend/basic/DynamicTensorManager.h"
Expand All @@ -27,19 +29,23 @@ namespace backend
namespace basic
{

StaticTensorManager::StaticTensorManager(const std::shared_ptr<TensorRegistry> &reg,
DynamicTensorManager *dynamic_tensor_manager)
StaticTensorManager::StaticTensorManager(
const std::shared_ptr<TensorRegistry> &reg, DynamicTensorManager *dynamic_tensor_manager,
const ir::OperandIndexMap<ir::OperandIndex> &operands_with_shared_memory)
: _nonconst_mgr{new MemoryManager()}, _tensors{reg},
_dynamic_tensor_manager{dynamic_tensor_manager}
_dynamic_tensor_manager{dynamic_tensor_manager},
_operands_with_shared_memory{operands_with_shared_memory}
{
// DO NOTHING
}

StaticTensorManager::StaticTensorManager(const std::shared_ptr<TensorRegistry> &reg,
const std::string planner_id,
DynamicTensorManager *dynamic_tensor_manager)
StaticTensorManager::StaticTensorManager(
const std::shared_ptr<TensorRegistry> &reg, const std::string planner_id,
DynamicTensorManager *dynamic_tensor_manager,
const ir::OperandIndexMap<ir::OperandIndex> &operands_with_shared_memory)
: _nonconst_mgr{new MemoryManager(planner_id)}, _tensors{reg},
_dynamic_tensor_manager{dynamic_tensor_manager}
_dynamic_tensor_manager{dynamic_tensor_manager},
_operands_with_shared_memory{operands_with_shared_memory}
{
// DO NOTHING
}
Expand All @@ -50,13 +56,28 @@ void StaticTensorManager::allocateNonconsts(void)

for (auto &&[ind, tensor] : _tensors->native_tensors())
{
if (!_as_constants[ind] && !tensor->is_dynamic())
bool buffer_set = false;
if (!tensor->is_dynamic())
{
auto *buffer = _nonconst_mgr->getBuffer(ind);
tensor->setBuffer(buffer);

VERBOSE(CPU_StaticTensorManager)
<< "TENSOR " << ind << " : " << static_cast<void *>(buffer) << std::endl;
if (_operands_with_shared_memory.find(ind) != std::end(_operands_with_shared_memory))
{
const auto &shared_memory_ind = _operands_with_shared_memory[ind];
if (!_as_constants[shared_memory_ind])
{
tensor->setBuffer(_nonconst_mgr->getBuffer(shared_memory_ind));
buffer_set = true;
}
}
else if (!_as_constants[ind])
{
tensor->setBuffer(_nonconst_mgr->getBuffer(ind));
buffer_set = true;
}
if (buffer_set)
{
VERBOSE(CPU_StaticTensorManager)
<< "TENSOR " << ind << " : " << static_cast<void *>(tensor->buffer()) << std::endl;
}
}
}
}
Expand All @@ -67,17 +88,30 @@ void StaticTensorManager::buildTensor(const ir::OperandIndex &ind,
const ir::OperandInfo &tensor_info, bool as_const)
{
assert(!_tensors->getNativeTensor(ind));
std::unique_ptr<Tensor> tensor = nullptr;
if (as_const)
{
auto tensor = std::make_unique<ExternalTensor>(tensor_info);
_tensors->setNativeTensor(ind, std::move(tensor));
tensor = std::make_unique<ExternalTensor>(tensor_info);
}
else
{
auto tensor =
std::make_unique<Tensor>(tensor_info, _dynamic_tensor_manager->dynamic_mem_mgr().get());
_tensors->setNativeTensor(ind, std::move(tensor));
const auto source_operand = _operands_with_shared_memory.find(ind);
if (source_operand != std::end(_operands_with_shared_memory) &&
_as_constants[source_operand->second])
{
as_const = _as_constants[source_operand->second];
auto new_tensor_info = tensor_info;
new_tensor_info.setAsConstant();
tensor = std::make_unique<ExternalTensor>(new_tensor_info);
}
else
{
tensor =
std::make_unique<Tensor>(tensor_info, _dynamic_tensor_manager->dynamic_mem_mgr().get());
}
}
assert(tensor);
_tensors->setNativeTensor(ind, std::move(tensor));
_as_constants[ind] = as_const;
}

Expand All @@ -88,8 +122,22 @@ void StaticTensorManager::claimPlan(const ir::OperandIndex &ind, uint32_t size)
// This method is called only when a tensor has proper shape
assert(!_tensors->getNativeTensor(ind)->is_dynamic());

if (!_as_constants[ind])
_nonconst_mgr->claimPlan(ind, size);
const auto source_ind = _operands_with_shared_memory.find(ind);
if (source_ind == std::end(_operands_with_shared_memory))
{
if (!_as_constants[ind])
{
_nonconst_mgr->claimPlan(ind, size);
++_source_operands_ref_counter[ind];
}
}
else
{
if (!_as_constants[source_ind->second])
{
++_source_operands_ref_counter[source_ind->second];
}
}
}

void StaticTensorManager::releasePlan(const ir::OperandIndex &ind)
Expand All @@ -99,8 +147,23 @@ void StaticTensorManager::releasePlan(const ir::OperandIndex &ind)
// This method is called only when a tensor has proper shape
assert(!_tensors->getNativeTensor(ind)->is_dynamic());

if (!_as_constants[ind])
_nonconst_mgr->releasePlan(ind);
const auto source_operand_ind =
std::find_if(std::begin(_operands_with_shared_memory), std::end(_operands_with_shared_memory),
[&ind](const auto &op) { return op.second == ind; });

ir::OperandIndex release_ind;
if (source_operand_ind == std::end(_operands_with_shared_memory))
{
release_ind = ind;
}
else
{
release_ind = source_operand_ind->second;
}
if (!_as_constants[release_ind] && 0 == _source_operands_ref_counter[release_ind])
{
_nonconst_mgr->releasePlan(release_ind);
}
}

void StaticTensorManager::iterate(const std::function<void(const ir::OperandIndex &)> &fn)
Expand Down
Loading