Files
carbon e25f20f7a3 add cviruntime
commit 3f4938648950a7f3bf9a19c320ca9fae7c52de20
Author: sophgo-forum-service <forum_service@sophgo.com>
Date:   Mon May 13 13:44:23 2024 +0800

    [feat] cviruntime opensource for cv18xx soc.

    - a4b6a3, add cumsum and gatherelements_pt.
2024-05-31 11:51:34 +08:00

191 lines
6.3 KiB
C++

/*
* Copyright (C) Cvitek Co., Ltd. 2019-2020. All rights reserved.
*/
#ifndef CVI_QUANT_HELPER_H
#define CVI_QUANT_HELPER_H
#include <assert.h>
#include <stdint.h>
#include <cmath>
#include <limits>
#include <iostream>
static int RoundingDivideByPOT(int x, int exponent) {
if (x == 0) {
return 0;
}
if (exponent == 0) {
return x;
}
assert(exponent > 0);
const int shift_vec = -exponent;
const int fixup = (x & shift_vec) >> 31;
const int fixed_up_x = x + fixup;
int nudge = 1 << (exponent - 1);
int val = (fixed_up_x + nudge) >> exponent;
return val;
}
static int SaturatingRoundingDoublingHighMul(int a, int b) {
int64_t a_64(a);
int64_t b_64(b);
int64_t ab_64 = a_64 * b_64;
int nudge = ab_64 >= 0 ? (1 << 30) : (1 - (1 << 30));
int ab_x2_high32 = static_cast<int>((ab_64 + nudge) / (1ll << 31));
return ab_x2_high32;
}
/// saturate a float to range [-128, 127]
static int8_t saturateInt8(float f) {
#if 0
// cast
int q = (int)f;
#elif 0
// away_from_zero
int q = (f >= 0) ? (int)std::ceil(f) : (int)std::floor(f);
#elif 0
// round
int q = (int)std::roundf(f);
#elif 0
// trancate, (towards zero)
int q = (f >= 0) ? (int)std::floor(f) : (int)std::ceil(f);
#elif 1
// from caffe_int8
int q = (int)std::floor(f + 0.5);
#else
// looks HW is different than std::round()
// we shall apply round only for input quant()
int q = (int)std::round(f);
#endif
if (q > 127)
q = 127;
if (q < -128)
q = -128;
return (int8_t)q;
}
/// Simulate HW behavior, after accumuation
/// apply multiplier, do rshift, and then saturate to INT8
/// used in BM1880v2 per-channel mode (32bit bias)
/// qdm mode
/// use GOOGLE GEMMLOWP QDM multiply and shift
/// during multiply, a factor of (1 << 31) has been devided
static int8_t applyMultiplierAndRShiftAndSaturateInt8(float v, uint32_t rshift,
uint32_t multiplier, bool qdm) {
if (qdm) {
int32_t q = RoundingDivideByPOT(
SaturatingRoundingDoublingHighMul((int32_t)v, (int32_t)multiplier), rshift);
// llvm::errs() << "v,rshift,multiplier,q = " << v << ","
// << rshift << "," << multiplier << "," << q << "\n";
return saturateInt8((float)q);
} else {
return saturateInt8(v * multiplier / (1 << rshift));
}
}
// reference to reference to [arxiv 1712.05877]
// This implementation comes from tensorflow
// https://github.com/tensorflow/tensorflow/blob/98ff991500a0247f8f57c60db9a206204268bc42/tensorflow/lite/kernels/internal/quantization_util.cc#L52-L90
#define Tensorflow_QuantizeMultiplier QuantizeMultiplier
static void QuantizeMultiplier(double double_multiplier, int32_t *quantized_multiplier,
int *shift) {
if (double_multiplier == 0.) {
*quantized_multiplier = 0;
*shift = 0;
return;
}
const double q = std::frexp(double_multiplier, shift);
auto q_fixed = static_cast<int64_t>(std::round(q * (1ll << 31)));
assert(q_fixed <= (1ll << 31));
if (q_fixed == (1ll << 31)) {
q_fixed /= 2;
++*shift;
}
assert(q_fixed <= std::numeric_limits<int32_t>::max());
// A shift amount smaller than -31 would cause all bits to be shifted out
// and thus all results would be zero. We implement that instead with
// q_fixed==0, so as to avoid hitting issues with right-shift
// operations with shift amounts greater than 31. Note that this happens
// roughly when abs(double_multiplier) < 2^-31 and the present handling means
// that we're effectively flushing tiny double_multiplier's to zero.
// We could conceivably handle values in the range (roughly) [32, 63]
// as 'denormals' i.e. (shift==0, q_fixed < 2^30). In that point of view
// the present handling is just doing 'flush denormals to zero'. We could
// reconsider and actually generate nonzero denormals if a need arises.
if (*shift < -31) {
*shift = 0;
q_fixed = 0;
}
*quantized_multiplier = static_cast<int32_t>(q_fixed);
}
/// find RShift and Multiplier from QScale
/// QScale = Multiplier / (1 << RShift)
/// Multiplier is an integer
/// case 1: specifically multiply a int8/uint8 multplier, then rshift
/// used in layers like element_wise, pooling, concat, etc
/// qdm is false
/// a max_multiplier (127 or 255 normally) has to be provided
/// case 2: qdm mode
/// used in BM1880v2 per-channel conv mode
/// qdm is true
/// reference to [arxiv 1712.05877]
/// choose the int32 value nearest to 2^31 * M0, M0 in [0.5, 1]
/// this value is always at least 2^30 and have at least 30 bits accuracy
/// the max_multiplier argument is ignored, fixed to (1 << 31)
/// if 'uint32_t *multiplier' is present, return multipler alongside
static int8_t findRShiftAndMultiplierFromQScale(double qscale,
uint32_t *multiplier = nullptr,
bool qdm = false,
uint32_t max_multiplier = 127) {
if (qdm) {
#if 0
max_multiplier = (1 << 31);
for (uint32_t rshift = 0; rshift < 63; ++rshift) {
if ( ((double)qscale * (1ULL << (rshift + 1))) >= (double)max_multiplier ) {
if (multiplier) {
*multiplier = (uint32_t)((double)qscale * (1ULL << rshift));
}
return rshift - 31;
}
}
#endif
// this ensures if qscale is 0, both multiplier and shift will be 0
int32_t quantized_multiplier = 0;
int lshift = 0;
Tensorflow_QuantizeMultiplier(qscale, &quantized_multiplier, &lshift);
if (multiplier)
*multiplier = quantized_multiplier;
int rshift = -lshift;
assert(rshift >= 0);
if (rshift > 25) {
std::cout << "WARNING: large rshift = " << rshift << ", qscale = " << qscale
<< "\n";
}
return (int8_t)rshift;
} else {
assert(qscale < max_multiplier);
for (int8_t rshift = 0; rshift < 63; ++rshift) {
if (((double)qscale * (1ULL << (rshift + 1))) >= (double)max_multiplier) {
if (multiplier) {
*multiplier = (uint32_t)((double)qscale * (1ULL << rshift));
}
return rshift;
}
}
// assert(false);
std::cout << "WARNING: failed to find rshift, qscale = " << std::to_string(qscale)
<< "\n";
// we are here because qscale is too small, return 0 for both shift and multiplier
if (multiplier) {
*multiplier = 0;
}
return 0;
}
}
#endif