Skip to content

Commit

Permalink
Fix mse gradient
Browse files Browse the repository at this point in the history
  • Loading branch information
ragmani committed Sep 6, 2024
1 parent 93b5d13 commit cf23704
Show file tree
Hide file tree
Showing 12 changed files with 149 additions and 32 deletions.
37 changes: 37 additions & 0 deletions compute/cker/include/cker/train/Types.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
/*
* Copyright (c) 2024 Samsung Electronics Co., Ltd. All Rights Reserved
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#ifndef __NNFW_CKER_TRAIN_TYPES_H__
#define __NNFW_CKER_TRAIN_TYPES_H__

namespace nnfw
{
namespace cker
{
namespace train
{

enum class LossReductionType
{
SUM_OVER_BATCH_SIZE,
SUM,
};

} // namespace train
} // namespace cker
} // namespace nnfw

#endif // __NNFW_CKER_TYPES_H__
31 changes: 27 additions & 4 deletions compute/cker/include/cker/train/operation/Loss.h
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@
#include "cker/eigen/Utils.h"
#include "cker/eigen/xent_op.h"
#include "cker/operation/Helper/BCast.h"
#include "cker/train/Types.h"

namespace nnfw
{
Expand Down Expand Up @@ -63,17 +64,39 @@ inline void MSE(const Shape &y_pred_shape, const T *y_pred_data, const Shape &y_

template <typename T>
inline void MSEGrad(const Shape &y_pred_shape, const T *y_pred_data, const Shape &y_true_shape,
const T *y_true_data, const Shape &grad_shape, T *grad_data)
const T *y_true_data, const Shape &grad_shape, T *grad_data,
LossReductionType reduction_type)
{
if (y_pred_shape != y_true_shape)
throw std::runtime_error("cker::MSEGrad: y_pred_shape != y_true_shape");
if (y_pred_shape != grad_shape)
throw std::runtime_error("cker::MSEGrad: y_pred_shape != grad_shape");

const int size = grad_shape.FlatSize();
for (int i = 0; i < size; ++i)
// TODO Optimize
const int batch_size = grad_shape.Dims(0);
const auto flat_size = FlatSizeSkipDim(grad_shape, 0);
auto reduction_size = 1;
switch (reduction_type)
{
grad_data[i] = static_cast<T>(-2 * (y_true_data[i] - y_pred_data[i]) / size);
case LossReductionType::SUM_OVER_BATCH_SIZE:
reduction_size = batch_size * flat_size;
break;
case LossReductionType::SUM:
reduction_size = flat_size;
break;
default:
throw std::runtime_error("Unsupported reduction type");
}

for (int b = 0; b < batch_size; ++b)
{
for (int i = 0; i < flat_size; ++i)
{
const int offset = b * flat_size + i;
assert(offset >= 0);
grad_data[offset] =
static_cast<T>(-2 * (y_true_data[offset] - y_pred_data[offset]) / reduction_size);
}
}
}

Expand Down
43 changes: 32 additions & 11 deletions compute/cker/src/train/Loss.test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -264,7 +264,8 @@ TEST(CKer_Operation, LossMSEGrad)
std::vector<int> expected = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0};

nnfw::cker::train::MSEGrad(nnfw::cker::Shape{1, 10}, y_pred.data(), nnfw::cker::Shape{1, 10},
y_true.data(), nnfw::cker::Shape{1, 10}, deriv_y_pred.data());
y_true.data(), nnfw::cker::Shape{1, 10}, deriv_y_pred.data(),
nnfw::cker::train::LossReductionType::SUM_OVER_BATCH_SIZE);

for (size_t i = 0; i < deriv_y_pred.size(); ++i)
EXPECT_EQ(deriv_y_pred[i], expected[i]);
Expand All @@ -278,21 +279,38 @@ TEST(CKer_Operation, LossMSEGrad)
std::vector<float> expected = {0.2, 0.2, 0.2, 0.2, 0.2, 0.2, 0.2, 0.2, 0.2, 0.2};

nnfw::cker::train::MSEGrad(nnfw::cker::Shape{1, 10}, y_pred.data(), nnfw::cker::Shape{1, 10},
y_true.data(), nnfw::cker::Shape{1, 10}, deriv_y_pred.data());
y_true.data(), nnfw::cker::Shape{1, 10}, deriv_y_pred.data(),
nnfw::cker::train::LossReductionType::SUM_OVER_BATCH_SIZE);

for (size_t i = 0; i < deriv_y_pred.size(); ++i)
EXPECT_FLOAT_EQ(deriv_y_pred[i], expected[i]);
}

{
// Shape: {2, 3} -> m_rows:3, m_cols:2
// Shape: {2, 3} -> m_rows:3, m_cols:2, LossReductionType::SUM_OVER_BATCH_SIZE
std::vector<float> y_pred = {27.2, 31.8, 51.9, 10.2, 34.2, 12.4};
std::vector<float> y_true = {31.3, 40.3, 29.7, 12.9, 25.8, 11.9};
std::vector<float> deriv_y_pred(6);
std::vector<float> expected = {-1.3666667, -2.8333333, 7.4, -0.9, 2.8, 0.1666667};

nnfw::cker::train::MSEGrad(nnfw::cker::Shape{2, 3}, y_pred.data(), nnfw::cker::Shape{2, 3},
y_true.data(), nnfw::cker::Shape{2, 3}, deriv_y_pred.data());
y_true.data(), nnfw::cker::Shape{2, 3}, deriv_y_pred.data(),
nnfw::cker::train::LossReductionType::SUM_OVER_BATCH_SIZE);

for (size_t i = 0; i < deriv_y_pred.size(); ++i)
EXPECT_FLOAT_EQ(deriv_y_pred[i], expected[i]);
}

{
// Shape: {2, 3} -> m_rows:3, m_cols:2, LossReductionType::SUM_OVER_BATCH_SIZE
std::vector<float> y_pred = {27.2, 31.8, 51.9, 10.2, 34.2, 12.4};
std::vector<float> y_true = {31.3, 40.3, 29.7, 12.9, 25.8, 11.9};
std::vector<float> deriv_y_pred(6);
std::vector<float> expected = {-2.7333324, -5.6666665, 14.8, -1.7999998, 5.6, 0.33333334};

nnfw::cker::train::MSEGrad(nnfw::cker::Shape{2, 3}, y_pred.data(), nnfw::cker::Shape{2, 3},
y_true.data(), nnfw::cker::Shape{2, 3}, deriv_y_pred.data(),
nnfw::cker::train::LossReductionType::SUM);

for (size_t i = 0; i < deriv_y_pred.size(); ++i)
EXPECT_FLOAT_EQ(deriv_y_pred[i], expected[i]);
Expand All @@ -309,7 +327,8 @@ TEST(CKer_Operation, neg_LossMSEGrad)
std::vector<float> expected = {1., 1., 1., 1., 1., 1.};

nnfw::cker::train::MSEGrad(nnfw::cker::Shape{2, 3}, y_pred.data(), nnfw::cker::Shape{2, 3},
y_true.data(), nnfw::cker::Shape{2, 3}, deriv_y_pred.data());
y_true.data(), nnfw::cker::Shape{2, 3}, deriv_y_pred.data(),
nnfw::cker::train::LossReductionType::SUM_OVER_BATCH_SIZE);

for (size_t i = 0; i < deriv_y_pred.size(); ++i)
EXPECT_NE(deriv_y_pred[i], expected[i]);
Expand All @@ -321,9 +340,10 @@ TEST(CKer_Operation, neg_LossMSEGrad)
std::vector<float> y_true = {0., 1., 2., 3., 4., 5.};
std::vector<float> deriv_y_pred(10);

EXPECT_ANY_THROW(nnfw::cker::train::MSEGrad(nnfw::cker::Shape{1, 10}, y_pred.data(),
nnfw::cker::Shape{2, 3}, y_true.data(),
nnfw::cker::Shape{1, 10}, deriv_y_pred.data()));
EXPECT_ANY_THROW(
nnfw::cker::train::MSEGrad(nnfw::cker::Shape{1, 10}, y_pred.data(), nnfw::cker::Shape{2, 3},
y_true.data(), nnfw::cker::Shape{1, 10}, deriv_y_pred.data(),
nnfw::cker::train::LossReductionType::SUM_OVER_BATCH_SIZE));
}

{
Expand All @@ -332,9 +352,10 @@ TEST(CKer_Operation, neg_LossMSEGrad)
std::vector<float> y_true = {0., 1., 2., 3., 4., 5., 6., 7., 8., 9.};
std::vector<float> deriv_y_pred(6);

EXPECT_ANY_THROW(nnfw::cker::train::MSEGrad(nnfw::cker::Shape{1, 10}, y_pred.data(),
nnfw::cker::Shape{1, 10}, y_true.data(),
nnfw::cker::Shape{2, 3}, deriv_y_pred.data()));
EXPECT_ANY_THROW(
nnfw::cker::train::MSEGrad(nnfw::cker::Shape{1, 10}, y_pred.data(), nnfw::cker::Shape{1, 10},
y_true.data(), nnfw::cker::Shape{2, 3}, deriv_y_pred.data(),
nnfw::cker::train::LossReductionType::SUM_OVER_BATCH_SIZE));
}
}

Expand Down
7 changes: 5 additions & 2 deletions runtime/onert/backend/train/KernelGenerator.cc
Original file line number Diff line number Diff line change
Expand Up @@ -405,13 +405,15 @@ void KernelGenerator::visit(const ir::train::operation::Loss &node)

auto loss_code = node.param().loss_code;
auto loss_param = node.param().loss_param;
const auto reduction_type = node.param().reduction_type;

switch (loss_code)
{
case ir::train::LossCode::MeanSquaredError:
{
auto fn = std::make_unique<ops::LossMeanSquaredErrorLayer>();
fn->configure(y_pred_tensor, y_true_tensor, output_tensor, back_prop_y_pred_tensor);
fn->configure(y_pred_tensor, y_true_tensor, output_tensor, back_prop_y_pred_tensor,
reduction_type);
_return_fn = std::move(fn);
break;
}
Expand All @@ -421,7 +423,8 @@ void KernelGenerator::visit(const ir::train::operation::Loss &node)
bool is_required_normalization = (last_node != ir::OpCode::Softmax);
auto fn = std::make_unique<ops::LossCategoricalCrossentropyLayer>();
fn->configure(y_pred_tensor, y_true_tensor, output_tensor, back_prop_y_pred_tensor,
loss_param.cce.axis, loss_param.cce.label_smoothing, is_required_normalization);
reduction_type, loss_param.cce.axis, loss_param.cce.label_smoothing,
is_required_normalization);
_return_fn = std::move(fn);
break;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -28,14 +28,12 @@ namespace train
namespace ops
{

void LossCategoricalCrossentropyLayer::configure(const IPortableTensor *y_pred,
const IPortableTensor *y_true,
IPortableTensor *output,
IPortableTensor *back_prop_y_pred, int32_t axis,
float label_smoothing,
bool is_required_normalization)
void LossCategoricalCrossentropyLayer::configure(
const IPortableTensor *y_pred, const IPortableTensor *y_true, IPortableTensor *output,
IPortableTensor *back_prop_y_pred, ir::train::LossReductionType reduction_type, int32_t axis,
float label_smoothing, bool is_required_normalization)
{
LossLayer::configure(y_pred, y_true, output, back_prop_y_pred);
LossLayer::configure(y_pred, y_true, output, back_prop_y_pred, reduction_type);

_axis = axis;
_label_smoothing = label_smoothing;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@

#include "LossLayer.h"
#include "../Tensor.h"
#include <ir/train/LossInfo.h>

namespace onert
{
Expand All @@ -35,8 +36,9 @@ class LossCategoricalCrossentropyLayer : public LossLayer
LossCategoricalCrossentropyLayer() = default;

void configure(const IPortableTensor *y_pred, const IPortableTensor *y_true,
IPortableTensor *output, IPortableTensor *back_prop_y_pred, int32_t axis,
float label_smoothing, bool is_required_normalization);
IPortableTensor *output, IPortableTensor *back_prop_y_pred,
ir::train::LossReductionType reduction_type, int32_t axis, float label_smoothing,
bool is_required_normalization);
void forward(bool training) override;
void backward() override;

Expand Down
4 changes: 3 additions & 1 deletion runtime/onert/backend/train/ops/LossLayer.cc
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,8 @@ LossLayer::LossLayer()
}

void LossLayer::configure(const IPortableTensor *y_pred, const IPortableTensor *y_true,
IPortableTensor *output, IPortableTensor *back_prop_y_pred)
IPortableTensor *output, IPortableTensor *back_prop_y_pred,
ir::train::LossReductionType reduction_type)
{
assert(y_pred != nullptr);
assert(y_true != nullptr);
Expand All @@ -43,6 +44,7 @@ void LossLayer::configure(const IPortableTensor *y_pred, const IPortableTensor *
_y_true = y_true;
_output = output;
_back_prop_y_pred = back_prop_y_pred;
_reduction_type = reduction_type;
}

} // namespace ops
Expand Down
5 changes: 4 additions & 1 deletion runtime/onert/backend/train/ops/LossLayer.h
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
#include <ops/ElementwiseActivationLayer.h>

#include <exec/train/ITrainableFunction.h>
#include <ir/train/LossInfo.h>

namespace onert
{
Expand All @@ -42,13 +43,15 @@ class LossLayer : public ::onert::exec::train::ITrainableFunction
LossLayer();

void configure(const IPortableTensor *y_pred, const IPortableTensor *y_true,
IPortableTensor *output, IPortableTensor *back_prop_y_pred);
IPortableTensor *output, IPortableTensor *back_prop_y_pred,
ir::train::LossReductionType reduction_type);

protected:
const IPortableTensor *_y_pred;
const IPortableTensor *_y_true;
IPortableTensor *_output;
IPortableTensor *_back_prop_y_pred;
ir::train::LossReductionType _reduction_type;
};

} // namespace ops
Expand Down
8 changes: 5 additions & 3 deletions runtime/onert/backend/train/ops/LossMeanSquaredErrorLayer.cc
Original file line number Diff line number Diff line change
Expand Up @@ -30,9 +30,10 @@ namespace ops

void LossMeanSquaredErrorLayer::configure(const IPortableTensor *y_pred,
const IPortableTensor *y_true, IPortableTensor *output,
IPortableTensor *back_prop_y_pred)
IPortableTensor *back_prop_y_pred,
ir::train::LossReductionType reduction_type)
{
LossLayer::configure(y_pred, y_true, output, back_prop_y_pred);
LossLayer::configure(y_pred, y_true, output, back_prop_y_pred, reduction_type);
}

void LossMeanSquaredErrorLayer::forward(bool)
Expand All @@ -53,11 +54,12 @@ void LossMeanSquaredErrorLayer::backward()
{
assert(_back_prop_y_pred != nullptr);

const auto reduction_type = convertLossReductionType(_reduction_type);
if (_y_pred->data_type() == OperandType::FLOAT32)
{
nnfw::cker::train::MSEGrad(getShape(_y_pred), getBuffer<float>(_y_pred), getShape(_y_true),
getBuffer<float>(_y_true), getShape(_back_prop_y_pred),
getBuffer<float>(_back_prop_y_pred));
getBuffer<float>(_back_prop_y_pred), reduction_type);
}
else
{
Expand Down
3 changes: 2 additions & 1 deletion runtime/onert/backend/train/ops/LossMeanSquaredErrorLayer.h
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,8 @@ class LossMeanSquaredErrorLayer : public LossLayer
LossMeanSquaredErrorLayer() = default;

void configure(const IPortableTensor *y_pred, const IPortableTensor *y_true,
IPortableTensor *output, IPortableTensor *back_prop_y_pred);
IPortableTensor *output, IPortableTensor *back_prop_y_pred,
ir::train::LossReductionType reduction_type);
void forward(bool training) override;
void backward() override;
};
Expand Down
15 changes: 15 additions & 0 deletions runtime/onert/backend/train/ops/OperationUtils.cc
Original file line number Diff line number Diff line change
Expand Up @@ -97,6 +97,21 @@ void biasGrad(const IPortableTensor *input_backprop, IPortableTensor *bias_grad)
bias_grad_buffer, bias_grad_shape);
}

nnfw::cker::train::LossReductionType convertLossReductionType(ir::train::LossReductionType type)
{
switch (type)
{
case ir::train::LossReductionType::SumOverBatchSize:
return nnfw::cker::train::LossReductionType::SUM_OVER_BATCH_SIZE;
break;
case ir::train::LossReductionType::Sum:
return nnfw::cker::train::LossReductionType::SUM;
break;
default:
throw std::runtime_error("Unsupported LossReductionType");
}
}

} // namespace ops
} // namespace train
} // namespace backend
Expand Down
10 changes: 10 additions & 0 deletions runtime/onert/backend/train/ops/OperationUtils.h
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,8 @@
#ifndef __ONERT_BACKEND_TRAIN_OPS_OPERATION_UTILS_H__
#define __ONERT_BACKEND_TRAIN_OPS_OPERATION_UTILS_H__

#include <cker/train/Types.h>
#include <ir/train/LossInfo.h>
#include <ops/OperationUtils.h>

namespace onert
Expand Down Expand Up @@ -77,6 +79,14 @@ const IPortableTensor *backpropActivation(const ir::Activation &activation,
*/
void biasGrad(const IPortableTensor *input_backprop, IPortableTensor *bias_grad);

/**
* @brief convert loss reduction type
*
* @param type loss reduction type defined in ir::train::LossReductionType
* @return corresponding type defined in cker::train::LossReductionType
*/
nnfw::cker::train::LossReductionType convertLossReductionType(ir::train::LossReductionType type);

} // namespace ops
} // namespace train
} // namespace backend
Expand Down

0 comments on commit cf23704

Please sign in to comment.