commit ce8705f49da5e5f59c2ddb3253ef88323a0cd9c4 Author: sophgo-forum-service <forum_service@sophgo.com> Date: Mon May 13 14:04:10 2024 +0800 [feat] cvimath opensource for cv18xx soc. - 9e8967
166 lines
5.1 KiB
C++
166 lines
5.1 KiB
C++
// \file implement activation function(sigmoid) by interpolation lookup table,
|
|
// please refer [here](https://en.wikipedia.org/wiki/Linear_interpolation) for more details
|
|
|
|
// header include
|
|
#include <assert.h>
|
|
#include <cvimath_internal.h> // math
|
|
#include <test_cvikernel_util.h> // kerenl
|
|
|
|
// ========== user config ============
|
|
#define MAX_ERROR (0.004) // tolerance
|
|
// for current example, we quauntize data to -8 ~ +8
|
|
// range depend on ur activation
|
|
static int range_start = -8;
|
|
static int range_end = 8;
|
|
// ========== end of user config ============
|
|
|
|
// gen reference by cpu
|
|
static double _gen_sigmoid(float x) { return 1.0 / (1.0 + exp(-(x))); }
|
|
|
|
// gen reference
|
|
static void gen_ref(uint16_t *ofmap, uint16_t *ifmap, cvk_tl_shape_t ifmap_shape) {
|
|
for (uint64_t i = 0; i < tl_shape_size(&ifmap_shape); i++) {
|
|
ofmap[i] = convert_fp32_bf16(_gen_sigmoid(convert_bf16_fp32(ifmap[i])));
|
|
}
|
|
}
|
|
|
|
// verify cpu data with tpu
|
|
static bool verify(uint16_t *ofmap_data, uint16_t *ref_data, uint64_t ofmap_size) {
|
|
int count = 0;
|
|
uint64_t size = ofmap_size;
|
|
|
|
for (uint64_t i = 0; i < size; i++) {
|
|
float got = convert_bf16_fp32(ofmap_data[i]);
|
|
float exp = convert_bf16_fp32(ref_data[i]);
|
|
|
|
if (fabs(got - exp) > MAX_ERROR) {
|
|
fprintf(stderr,
|
|
"[%d] comparing failed at ofmap_data[%u], got %x, exp %x, "
|
|
"diff(%f - %f) is %f\n",
|
|
count, (uint32_t)i, ofmap_data[i], ref_data[i], got, exp, fabs(got - exp));
|
|
count++;
|
|
}
|
|
}
|
|
|
|
// exit if fail
|
|
if (count != 0) {
|
|
printf("error count is %d\n", count);
|
|
exit(-1);
|
|
}
|
|
|
|
return true;
|
|
}
|
|
|
|
// gen random input for test
|
|
static void gen_input(uint16_t *ifmap, uint64_t ifmap_size) {
|
|
int table_hw = 256;
|
|
for (uint64_t i = 0; i < ifmap_size; i++) {
|
|
// input range is -8 ~ +8
|
|
float input = ((int)i % 7) * (((int)i % 2) ? 1 : -1) + 0.03 + (i % table_hw) * 0.002;
|
|
ifmap[i] = convert_fp32_bf16(input);
|
|
}
|
|
}
|
|
|
|
// main code for test sigmoid interpolate implement by lookup table
|
|
static void testbench(CVI_RT_HANDLE *ctx, cvk_context_t *bmk) {
|
|
// example for input tensor
|
|
cvk_tl_shape_t ifmap_shape = {1, 32, 16, 16};
|
|
cvk_fmt_t fmt = CVK_FMT_BF16;
|
|
|
|
// get table / input shape
|
|
cvk_tl_shape_t table_shape;
|
|
cvm_table_shape(bmk, &table_shape);
|
|
cvk_tl_shape_t ofmap_shape = ifmap_shape;
|
|
|
|
uint64_t ifmap_size = tl_shape_size(&ifmap_shape);
|
|
uint64_t table_size = tl_shape_size(&table_shape);
|
|
uint64_t ofmap_size = tl_shape_size(&ofmap_shape);
|
|
|
|
// get table/input size
|
|
int data_type_size = 1;
|
|
if (fmt == CVK_FMT_BF16) {
|
|
// bf16 takes 2 bytes
|
|
data_type_size = 2;
|
|
}
|
|
|
|
uint64_t ifmap_bytesize = ifmap_size * data_type_size;
|
|
uint64_t table_bytesize = table_size * data_type_size;
|
|
uint64_t ofmap_bytesize = ofmap_size * data_type_size;
|
|
|
|
// alloc host memory
|
|
uint16_t *ifmap = (uint16_t *)xmalloc(ifmap_bytesize);
|
|
uint16_t *table_data = (uint16_t *)xmalloc(table_bytesize);
|
|
uint16_t *table_data_slope = (uint16_t *)xmalloc(table_bytesize);
|
|
uint16_t *ref_data = (uint16_t *)xmalloc(ofmap_bytesize);
|
|
|
|
// gen input and assign data in host
|
|
gen_input(ifmap, ifmap_size);
|
|
|
|
// gen table, interpolation need 2 tables, one for lookup, another one is slope
|
|
cvm_sigmoid_tbl(table_data, table_data_slope, &table_shape, range_start, range_end);
|
|
|
|
// gen reference
|
|
gen_ref(ref_data, ifmap, ofmap_shape);
|
|
|
|
// alloc input / output / tmp / lookup table / slope table
|
|
cvk_tl_t *tl_ifmap = test_alloc_tl(bmk, ifmap_shape, fmt, /*align*/ 1);
|
|
cvk_tl_t *cvk_tl_table_answer = test_alloc_tl(bmk, table_shape, fmt, /*align*/ 1);
|
|
cvk_tl_t *cvk_tl_table_answer_slope = test_alloc_tl(bmk, table_shape, fmt, /*align*/ 1);
|
|
cvk_tl_t *tl_buf = test_alloc_tl(bmk, ofmap_shape, fmt, /*align*/ 1);
|
|
cvk_tl_t *tl_ofmap_bf16 = test_alloc_tl(bmk, ofmap_shape, fmt, /*align*/ 1);
|
|
|
|
// device memory load to local memory
|
|
test_put_tensor_g2l_comp(ctx, bmk, tl_ifmap, (uint8_t *)ifmap);
|
|
test_put_tensor_g2l_comp(ctx, bmk, cvk_tl_table_answer, (uint8_t *)table_data);
|
|
test_put_tensor_g2l_comp(ctx, bmk, cvk_tl_table_answer_slope, (uint8_t *)table_data_slope);
|
|
|
|
// get quantize(scale) value
|
|
float scale = cvm_sigmoid_scale(range_start, range_end);
|
|
|
|
// emit core function
|
|
cvm_emit_sigmoid(bmk, tl_ifmap, tl_buf, cvk_tl_table_answer, cvk_tl_table_answer_slope,
|
|
tl_ofmap_bf16, scale);
|
|
|
|
// get result from device to host
|
|
uint16_t *ofmap_data = (uint16_t *)test_get_tensor_l2g_comp(ctx, bmk, tl_ofmap_bf16);
|
|
|
|
// verify data with tolerance
|
|
verify(ofmap_data, ref_data, ofmap_size);
|
|
|
|
// release device memory in revert order
|
|
free_tl(bmk, tl_ofmap_bf16);
|
|
free_tl(bmk, tl_buf);
|
|
free_tl(bmk, cvk_tl_table_answer_slope);
|
|
free_tl(bmk, cvk_tl_table_answer);
|
|
free_tl(bmk, tl_ifmap);
|
|
|
|
// release host memory
|
|
free(ifmap);
|
|
free(table_data);
|
|
free(table_data_slope);
|
|
free(ref_data);
|
|
free(ofmap_data);
|
|
}
|
|
|
|
int main() {
|
|
CVI_RT_HANDLE ctx;
|
|
cvk_context_t *bmk;
|
|
int round_mode;
|
|
|
|
round_mode = set_store_feround();
|
|
|
|
// init runtime / kerenl structure
|
|
test_init(&ctx, &bmk);
|
|
|
|
// emit test case
|
|
testbench(&ctx, bmk);
|
|
|
|
// de-init runtime / kerenl structure
|
|
test_exit(&ctx, bmk);
|
|
|
|
// restore rounding
|
|
restore_feround(round_mode);
|
|
|
|
return 0;
|
|
}
|