Skip to content

Commit

Permalink
[onert] Support block quantization operand size calculation (#13886)
Browse files Browse the repository at this point in the history
This commit update total_size() method to calculate block quantization type operand size.

ONE-DCO-1.0-Signed-off-by: Hyeongseok Oh <hseok82.oh@samsung.com>
  • Loading branch information
hseok-oh authored Sep 3, 2024
1 parent 48fd493 commit 8593953
Show file tree
Hide file tree
Showing 4 changed files with 108 additions and 1 deletion.
2 changes: 1 addition & 1 deletion runtime/onert/core/include/ir/OperandInfo.h
Original file line number Diff line number Diff line change
Expand Up @@ -120,7 +120,7 @@ class OperandInfo
* @brief Return size of tensor (bytes)
* @return Tensor size
*/
size_t total_size() const { return _shape.num_elements() * sizeOfDataType(_typeInfo.type()); }
size_t total_size() const;

MemAllocType memAllocType() const { return _alloc_type; }
void setAsConstant() { _const = true; }
Expand Down
1 change: 1 addition & 0 deletions runtime/onert/core/src/ir/DataType.cc
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,7 @@ size_t sizeOfDataType(DataType data_type)
case DataType::QUANT_INT16_SYMM:
return sizeof(int16_t);
default:
// ggml block quantize type data size is not supported
throw std::runtime_error{"Unsupported type size"};
}
}
Expand Down
51 changes: 51 additions & 0 deletions runtime/onert/core/src/ir/OperandInfo.cc
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
/*
* Copyright (c) 2024 Samsung Electronics Co., Ltd. All Rights Reserved
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#include "ir/OperandInfo.h"

namespace onert
{
namespace ir
{

size_t OperandInfo::total_size() const
{
const auto data_type = _typeInfo.type();
try
{
return _shape.num_elements() * sizeOfDataType(data_type);
}
catch (const std::runtime_error &e)
{
// Calculate total size for ggml block quantization type on exception handling
// because it is rare case and we should care about performance on non-block case.
if (data_type != DataType::QUANT_GGML_Q4_0 && data_type != DataType::QUANT_GGML_Q8_0)
throw e;

if (_shape.dim(_shape.rank() - 1) % 32 != 0)
throw std::runtime_error{
"Block quantization requires the last dimension to be a multiple of 32"};

const auto num_blocks = _shape.num_elements() / 32;
const auto block_size = data_type == DataType::QUANT_GGML_Q4_0
? (sizeof(uint8_t) * 32 / 2 + sizeof(uint16_t))
: (sizeof(uint8_t) * 32 + sizeof(uint16_t));
return num_blocks * block_size;
}
}

} // namespace ir
} // namespace onert
55 changes: 55 additions & 0 deletions runtime/onert/core/src/ir/OperandInfo.test.cc
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
/*
* Copyright (c) 2024 Samsung Electronics Co., Ltd. All Rights Reserved
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#include "ir/OperandInfo.h"

#include <gtest/gtest.h>

using namespace onert::ir;

TEST(ir_OperandInfo, total_size)
{
auto info = OperandInfo::createStaticInfo(Shape{1, 2, 3}, TypeInfo{DataType::FLOAT32});
EXPECT_EQ(info.total_size(), 24);

info = OperandInfo::createStaticInfo(Shape{1, 2, 3}, TypeInfo{DataType::QUANT_INT8_SYMM});
EXPECT_EQ(info.total_size(), 6);

// Block quantization type operand
info = OperandInfo::createStaticInfo(Shape{1, 4, 32}, TypeInfo{DataType::QUANT_GGML_Q4_0});
EXPECT_EQ(info.total_size(), 18 * 4);
}

// Unsupported type
TEST(ir_OperandInfo, neg_total_size_type)
{
auto info = OperandInfo::createStaticInfo(Shape{1, 2, 3}, TypeInfo{DataType{-1}});
EXPECT_THROW(info.total_size(), std::runtime_error);
}

// Unsupported shape
TEST(ir_OperandInfo, neg_total_size_dimension)
{
// Unspecified shape
auto info = OperandInfo::createStaticInfo(Shape{1, -1, 3}, TypeInfo{DataType::FLOAT32});
EXPECT_THROW(info.total_size(), std::runtime_error);

// Block quantization operand
info = OperandInfo::createStaticInfo(Shape{1, 2, 3}, TypeInfo{DataType::QUANT_GGML_Q4_0});
EXPECT_THROW(info.total_size(), std::runtime_error);
info = OperandInfo::createStaticInfo(Shape{1, 2, 5}, TypeInfo{DataType::QUANT_GGML_Q8_0});
EXPECT_THROW(info.total_size(), std::runtime_error);
}

0 comments on commit 8593953

Please sign in to comment.