add cvimath
commit ce8705f49da5e5f59c2ddb3253ef88323a0cd9c4 Author: sophgo-forum-service <forum_service@sophgo.com> Date: Mon May 13 14:04:10 2024 +0800 [feat] cvimath opensource for cv18xx soc. - 9e8967
This commit is contained in:
34
cvimath/tests/CMakeLists.txt
Normal file
34
cvimath/tests/CMakeLists.txt
Normal file
@ -0,0 +1,34 @@
|
||||
project(cvimath)
|
||||
|
||||
include(CTest)
|
||||
|
||||
include_directories("${CMAKE_CURRENT_SOURCE_DIR}/include")
|
||||
|
||||
file(GLOB _TEST_UTILS "${CMAKE_CURRENT_SOURCE_DIR}/common/*c")
|
||||
|
||||
# cvi1835 test
|
||||
include_directories(
|
||||
${CMAKE_SOURCE_DIR}/include
|
||||
${CMAKE_SOURCE_DIR}/src
|
||||
)
|
||||
file(GLOB CVI1835_TESTS cvi1835/*.cpp)
|
||||
|
||||
# FIXME: repair test case
|
||||
list(FILTER CVI1835_TESTS EXCLUDE REGEX ".*atan2.*")
|
||||
list(FILTER CVI1835_TESTS EXCLUDE REGEX ".*depthwise_reshape_same.*")
|
||||
|
||||
foreach(TEST_SRC ${CVI1835_TESTS})
|
||||
get_filename_component(TEST_NAME ${TEST_SRC} NAME_WE)
|
||||
|
||||
add_executable(${TEST_NAME} ${_TEST_UTILS} ${TEST_SRC})
|
||||
target_link_libraries(${TEST_NAME} ${TPU_KERNEL_LIB} ${TEST_LIBS})
|
||||
set_target_properties(${TEST_NAME} PROPERTIES COMPILE_FLAGS "-Werror -Wall -Wextra")
|
||||
install(TARGETS ${TEST_NAME} DESTINATION bin)
|
||||
|
||||
add_test(${TEST_NAME} ${TEST_NAME} ctest_test)
|
||||
endforeach()
|
||||
|
||||
#add_library(${PROJECT_NAME} SHARED ${SRC})
|
||||
#target_link_libraries(${PROJECT_NAME} ${TPU_KERNEL_LIB})
|
||||
#install(TARGETS ${PROJECT_NAME} DESTINATION tests)
|
||||
|
||||
980
cvimath/tests/common/test_native_ref.c
Normal file
980
cvimath/tests/common/test_native_ref.c
Normal file
@ -0,0 +1,980 @@
|
||||
#include <assert.h>
|
||||
#include <errno.h>
|
||||
#include <float.h>
|
||||
#include <math.h>
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
|
||||
#include <test_native_ref.h>
|
||||
|
||||
#define math_min(x, y) ((x) < (y) ? (x) : (y))
|
||||
#define math_max(x, y) ((x) > (y) ? (x) : (y))
|
||||
|
||||
typedef uint8_t uint8_t;
|
||||
typedef uint16_t uint16_t;
|
||||
typedef uint32_t uint32_t;
|
||||
typedef uint64_t uint64_t;
|
||||
|
||||
typedef int8_t int8_t;
|
||||
typedef int16_t int16_t;
|
||||
typedef int32_t int32_t;
|
||||
typedef int64_t s64;
|
||||
typedef uint32_t bmerr_t;
|
||||
|
||||
#define BM_SUCCESS 0 // The operation was successful
|
||||
#define BM_ERR_AGAIN 1 // Not ready yet
|
||||
#define BM_ERR_FAILURE 2 // General failure
|
||||
#define BM_ERR_TIMEOUT 3 // Timeout
|
||||
#define BM_ERR_UNINITIALIZED 4 // Uninitialzed
|
||||
#define BM_ERR_INVALID_ARGUMENT 5 // Arguments invalid
|
||||
#define BM_ERR_NOMEM 6 // Not enough memory
|
||||
#define BM_ERR_DATA 7 // Data error
|
||||
#define BM_ERR_BUSY 8 // Busy
|
||||
#define BM_ERR_NOT_SUPPORTED 9 // Not supported yet
|
||||
|
||||
typedef uint32_t BLOB_OP;
|
||||
#define BLOB_ADD 0
|
||||
#define BLOB_SUB 1
|
||||
#define BLOB_MUL 2
|
||||
#define BLOB_DIV 3
|
||||
#define BLOB_INVALID 4
|
||||
|
||||
static inline int calc_offset(int *shape, int *offset) {
|
||||
return ((offset[0] * shape[1] + offset[1]) * shape[2] + offset[2]) * shape[3] + offset[3];
|
||||
}
|
||||
|
||||
static int index_get(int h, int w1, int w2) { return h * w1 + w2; }
|
||||
|
||||
int array_cmp_float_rel(const char *const info, float *p_exp, float *p_got, int count,
|
||||
float delta) {
|
||||
int idx = 0;
|
||||
for (idx = 0; idx < count; idx++) {
|
||||
if (math_max(fabs(p_exp[idx]), fabs(p_got[idx])) > 1.0) {
|
||||
// compare rel
|
||||
if (math_min(fabs(p_exp[idx]), fabs(p_got[idx])) < 1e-20) {
|
||||
printf("%s rel error at index %d exp %.20f got %.20f\n", info, idx, p_exp[idx], p_got[idx]);
|
||||
if (isnan(p_exp[idx]) && isnan(p_got[idx])) {
|
||||
printf("both exp and got are NAN");
|
||||
return 0;
|
||||
}
|
||||
return -1;
|
||||
}
|
||||
if (fabs(p_exp[idx] - p_got[idx]) > delta * math_min(fabs(p_exp[idx]), fabs(p_got[idx]))) {
|
||||
printf("%s rel error at index %d exp %.20f got %.20f\n", info, idx, p_exp[idx], p_got[idx]);
|
||||
if (isnan(p_exp[idx]) && isnan(p_got[idx])) {
|
||||
printf("both exp and got are NAN");
|
||||
return 0;
|
||||
}
|
||||
return -1;
|
||||
}
|
||||
} else {
|
||||
if (fabs(p_exp[idx] - p_got[idx]) > delta) {
|
||||
printf("%s abs error at index %d exp %.20f got %.20f\n", info, idx, p_exp[idx], p_got[idx]);
|
||||
if (isnan(p_exp[idx]) && isnan(p_got[idx])) {
|
||||
printf("both exp and got are NAN");
|
||||
return 0;
|
||||
}
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
if (isnan(p_got[idx]) && !isnan(p_exp[idx])) {
|
||||
printf("%s, found nans idx %d\n", info, idx);
|
||||
printf("floating from exp %.10f got %.10f\n", p_exp[idx], p_got[idx]);
|
||||
IF_VAL exp, got;
|
||||
exp.fval = p_exp[idx];
|
||||
got.fval = p_got[idx];
|
||||
printf("hex form exp %8.8x got %8.8x\n", exp.ival, got.ival);
|
||||
return -2;
|
||||
}
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
int array_cmp_float(const char *const info, float *p_exp, float *p_got, int count, float delta) {
|
||||
if (delta == 0.0f) {
|
||||
for (int idx = 0; idx < count; idx++) {
|
||||
if (p_exp[idx] != p_got[idx]) {
|
||||
printf("%s error at index %d exp %.20f got %.20f\n", info, idx, p_exp[idx], p_got[idx]);
|
||||
if (isnan(p_exp[idx]) && isnan(p_got[idx])) {
|
||||
printf("both exp and got are NAN\n");
|
||||
return 0;
|
||||
}
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
} else {
|
||||
return array_cmp_float_rel(info, p_exp, p_got, count, delta);
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
int array_cmp_int(const char *const info, int *p_exp, int *p_got, int count) {
|
||||
int idx;
|
||||
for (idx = 0; idx < count; idx++) {
|
||||
if (p_exp[idx] != p_got[idx]) {
|
||||
printf("%s error at index %d exp %d got %d\n", info, idx, p_exp[idx], p_got[idx]);
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
int array_cmp_int8(const char *const info, const int8_t *p_exp, const int8_t *p_got, int count) {
|
||||
int idx;
|
||||
for (idx = 0; idx < count; idx++) {
|
||||
if (p_exp[idx] != p_got[idx]) {
|
||||
printf("%s error at index %d exp %d got %d\n", info, idx, p_exp[idx], p_got[idx]);
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
int calc_dilute_hw(int h, int ins_h, int ins_h_l, int pad_h_b, int pad_h_t) {
|
||||
return (h - 1) * (ins_h + 1) + ins_h_l + 1 + pad_h_t + pad_h_b;
|
||||
}
|
||||
|
||||
int calc_output_hw(int hw, int khw, int stride) { return (hw - khw) / stride + 1; }
|
||||
|
||||
int fill_pad_fmap_int8(const int8_t *before, int8_t **pafter, int val, int pad_l, int pad_r,
|
||||
int pad_t, int pad_b, int ins_h, int ins_w, int ins_h_last, int ins_w_last,
|
||||
int h_before, int w_before) {
|
||||
int w_after = (w_before - 1) * (ins_w + 1) + ins_w_last + 1 + pad_l + pad_r;
|
||||
int h_after = (h_before - 1) * (ins_h + 1) + ins_h_last + 1 + pad_t + pad_b;
|
||||
int8_t *after = *pafter;
|
||||
if (!before || !pafter) return BM_ERR_INVALID_ARGUMENT;
|
||||
|
||||
if (!after) {
|
||||
after = malloc(sizeof(int8_t) * w_after * h_after);
|
||||
if (!after) return BM_ERR_NOMEM;
|
||||
}
|
||||
|
||||
memset(after, val, w_after * h_after);
|
||||
for (int h = 0; h < h_before; h++) {
|
||||
for (int w = 0; w < w_before; w++) {
|
||||
int i = (h * (ins_h + 1) + pad_t) * w_after + w * (ins_w + 1) + pad_l;
|
||||
after[i] = before[h * w_before + w];
|
||||
}
|
||||
}
|
||||
|
||||
*pafter = after;
|
||||
return BM_SUCCESS;
|
||||
}
|
||||
|
||||
int fill_pad_fmap_bf16(const uint16_t *before, uint16_t **pafter, int val, int pad_l, int pad_r,
|
||||
int pad_t, int pad_b, int ins_h, int ins_w, int ins_h_last, int ins_w_last,
|
||||
int h_before, int w_before) {
|
||||
int w_after = (w_before - 1) * (ins_w + 1) + ins_w_last + 1 + pad_l + pad_r;
|
||||
int h_after = (h_before - 1) * (ins_h + 1) + ins_h_last + 1 + pad_t + pad_b;
|
||||
uint16_t *after = *pafter;
|
||||
if (!before || !pafter) return BM_ERR_INVALID_ARGUMENT;
|
||||
if (!after) {
|
||||
after = malloc(sizeof(uint16_t) * w_after * h_after);
|
||||
if (!after) return BM_ERR_NOMEM;
|
||||
}
|
||||
for (int i = 0; i < w_after * h_after; i++) after[i] = val;
|
||||
|
||||
for (int h = 0; h < h_before; h++) {
|
||||
for (int w = 0; w < w_before; w++) {
|
||||
int i = (h * (ins_h + 1) + pad_t) * w_after + w * (ins_w + 1) + pad_l;
|
||||
after[i] = before[h * w_before + w];
|
||||
}
|
||||
}
|
||||
#if 0
|
||||
printf("bf16 padding:\n");
|
||||
for(int i=0;i<h_after;i++) {
|
||||
printf("[\n");
|
||||
for(int j=0;j<w_after;j++)
|
||||
printf("%04x ", (after[i*w_after+j]));
|
||||
printf("\n");
|
||||
}
|
||||
printf("]\n");
|
||||
#endif
|
||||
*pafter = after;
|
||||
return BM_SUCCESS;
|
||||
}
|
||||
|
||||
void fill_int_with_int8(int *pdest, int8_t *psrc, int len) {
|
||||
for (int ii = 0; ii < len; ii++) pdest[ii] = (int)psrc[ii];
|
||||
}
|
||||
|
||||
void fill_int_with_uint8(int *pdest, uint8_t *psrc, int len) {
|
||||
for (int ii = 0; ii < len; ii++) pdest[ii] = psrc[ii];
|
||||
}
|
||||
|
||||
void fill_int_with_int16(int *pdest, int16_t *psrc, int len) {
|
||||
for (int ii = 0; ii < len; ii++) {
|
||||
pdest[ii] = (int16_t)psrc[ii];
|
||||
}
|
||||
}
|
||||
|
||||
void inner_product(const int *a, const int *b, int len, int *c) {
|
||||
*c = 0;
|
||||
for (int ii = 0; ii < len; ii++) {
|
||||
*c += (a[ii] * b[ii]);
|
||||
}
|
||||
}
|
||||
|
||||
void inner_float_product(const float *a, const float *b, int len, float *c) {
|
||||
*c = 0;
|
||||
for (int ii = 0; ii < len; ii++) {
|
||||
*c += (a[ii] * b[ii]);
|
||||
}
|
||||
}
|
||||
|
||||
int fill_pad_fmap_fp32(const float *before, float **after, float pad_value, int pad_h_t,
|
||||
int pad_h_b, int pad_w_l, int pad_w_r, int ins_h, int ins_w, int ins_h_l,
|
||||
int ins_w_l, int h, int w) {
|
||||
int h_after = calc_dilute_hw(h, ins_h, ins_h_l, pad_h_b, pad_h_t);
|
||||
int w_after = calc_dilute_hw(w, ins_w, ins_w_l, pad_w_l, pad_w_r);
|
||||
float *ofmap = NULL;
|
||||
|
||||
if (before == NULL || after == NULL) {
|
||||
return BM_ERR_INVALID_ARGUMENT;
|
||||
}
|
||||
if (*after == NULL && (*after = malloc(sizeof(float) * h_after * w_after)) == NULL) {
|
||||
printf("No enough memory: [h_after, w_after]=[%i, %i].\n", h_after, w_after);
|
||||
return BM_ERR_NOMEM;
|
||||
}
|
||||
|
||||
ofmap = *after;
|
||||
for (int i = 0; i < h_after * w_after; i++) {
|
||||
ofmap[i] = pad_value;
|
||||
}
|
||||
for (int i = 0; i < h; i++) {
|
||||
float *start_addr = ofmap + (pad_h_t + i * (ins_h + 1)) * w_after + pad_w_l;
|
||||
int ins_h_count = (i == h - 1) ? ins_h_l : ins_h;
|
||||
|
||||
for (int j = 0; j < ins_h_count + 1; j++) {
|
||||
memset(start_addr + j * w_after, 0, sizeof(float) * (w_after - pad_w_l - pad_w_r));
|
||||
}
|
||||
for (int j = 0; j < w; j++) {
|
||||
start_addr[j * (ins_w + 1)] = before[i * w + j];
|
||||
}
|
||||
}
|
||||
|
||||
return BM_SUCCESS;
|
||||
}
|
||||
|
||||
void native_md_scalar(float *a, float *b, float *r, int N, int C, int H, int W, int op,
|
||||
bool result_add) {
|
||||
int count = N * C * H * W;
|
||||
for (int i = 0; i < count; i++) {
|
||||
switch (op) {
|
||||
case BLOB_ADD:
|
||||
r[i] = a[i] + b[i];
|
||||
break;
|
||||
case BLOB_SUB:
|
||||
r[i] = a[i] - b[i];
|
||||
break;
|
||||
case BLOB_MUL:
|
||||
r[i] = result_add ? r[i] : 0;
|
||||
r[i] += a[i] * b[i];
|
||||
break;
|
||||
case BLOB_DIV:
|
||||
r[i] = a[i] / b[i];
|
||||
break;
|
||||
default:
|
||||
assert(0);
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void native_md_scalar_int8(int8_t *a, int8_t *b, int8_t *r, int N, int C, int H, int W, int op,
|
||||
bool result_add) {
|
||||
int count = N * C * H * W;
|
||||
for (int i = 0; i < count; i++) {
|
||||
switch (op) {
|
||||
case BLOB_ADD:
|
||||
r[i] = a[i] + b[i];
|
||||
break;
|
||||
case BLOB_SUB:
|
||||
r[i] = a[i] - b[i];
|
||||
break;
|
||||
case BLOB_MUL:
|
||||
r[i] = result_add ? r[i] : 0;
|
||||
r[i] += a[i] * b[i];
|
||||
break;
|
||||
case BLOB_DIV:
|
||||
r[i] = a[i] / b[i];
|
||||
break;
|
||||
default:
|
||||
assert(0);
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static int matrix_dot_mult(int8_t *A, int8_t *B, int dim_n, int dim_m, int opd0_sign) {
|
||||
int sum = 0;
|
||||
for (int i = 0; i < dim_n; i++) {
|
||||
for (int j = 0; j < dim_m; j++) {
|
||||
int index = index_get(i, dim_m, j);
|
||||
if (opd0_sign) {
|
||||
sum += A[index] * B[index];
|
||||
} else {
|
||||
sum += (int)((uint8_t)A[index]) * B[index];
|
||||
}
|
||||
}
|
||||
}
|
||||
return sum;
|
||||
}
|
||||
|
||||
int native_conv_int8(const int8_t *ifmap, const int8_t *weight, const int16_t *bias, int8_t *ofmap,
|
||||
int in, int ic, int ih, int iw, int oc, int kh, int kw, int dh, int dw,
|
||||
int pad_h_t, int pad_h_b, int pad_w_l, int pad_w_r, int stride_h, int stride_w,
|
||||
int ins_h, int ins_w, int ins_h_last, int ins_w_last, int input_sign,
|
||||
int r_shift_width, int do_relu) {
|
||||
int ih_ext = calc_dilute_hw(ih, ins_h, ins_h_last, pad_h_t, pad_h_b);
|
||||
int iw_ext = calc_dilute_hw(iw, ins_w, ins_w_last, pad_w_l, pad_w_r);
|
||||
int kh_ext = calc_dilute_hw(kh, dh - 1, 0, 0, 0);
|
||||
int kw_ext = calc_dilute_hw(kw, dw - 1, 0, 0, 0);
|
||||
|
||||
int oh = calc_output_hw(ih_ext, kh_ext, stride_h);
|
||||
int ow = calc_output_hw(iw_ext, kw_ext, stride_w);
|
||||
|
||||
int *result = (int *)malloc(sizeof(int) * in * oc * oh * ow);
|
||||
memset(result, 0, sizeof(int) * in * oc * oh * ow);
|
||||
int8_t *i_fmap_pad_ker = (int8_t *)malloc(kh_ext * kw_ext);
|
||||
|
||||
int ret = BM_SUCCESS;
|
||||
|
||||
int8_t *i_fmap_pad = NULL;
|
||||
int8_t *kernel_after = NULL;
|
||||
for (int n = 0; n < in; ++n) {
|
||||
for (int c = 0; c < oc; ++c) {
|
||||
for (int cc = 0; cc < ic; ++cc) {
|
||||
fill_pad_fmap_int8((int8_t *)ifmap + n * ic * ih * iw + cc * ih * iw, &i_fmap_pad, 0,
|
||||
pad_w_l, pad_w_r, pad_h_t, pad_h_b, ins_h, ins_w, ins_h_last, ins_w_last,
|
||||
ih, iw);
|
||||
|
||||
// kernel_dilation(
|
||||
fill_pad_fmap_int8((weight + c * ic * kh * kw + cc * kh * kw), &kernel_after, 0, 0, 0, 0,
|
||||
0, // no padding
|
||||
dh - 1, dw - 1, 0, 0, kh, kw);
|
||||
|
||||
for (int ph = 0; ph < oh; ++ph) {
|
||||
for (int pw = 0; pw < ow; ++pw) {
|
||||
for (int idxh = 0; idxh < kh_ext; ++idxh)
|
||||
for (int idxw = 0; idxw < kw_ext; ++idxw) {
|
||||
i_fmap_pad_ker[idxh * kw_ext + idxw] =
|
||||
i_fmap_pad[(idxh + ph * stride_h) * iw_ext + idxw + pw * stride_w];
|
||||
}
|
||||
result[n * oc * oh * ow + c * oh * ow + ph * ow + pw] +=
|
||||
matrix_dot_mult(i_fmap_pad_ker, kernel_after, kh_ext, kw_ext, input_sign);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (bias) {
|
||||
for (int ph = 0; ph < oh; ++ph) {
|
||||
for (int pw = 0; pw < ow; ++pw) {
|
||||
result[n * oc * oh * ow + c * oh * ow + ph * ow + pw] += bias[c]; // bias+c ;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
ret = satu_2_8bit(&result[n * oc * oh * ow + c * oh * ow], oh * ow,
|
||||
&ofmap[n * oc * oh * ow + c * oh * ow], r_shift_width, 1, !do_relu);
|
||||
|
||||
if (ret != BM_SUCCESS) goto error_release;
|
||||
} // end for (int c = 0; c < oc; ++c)
|
||||
} // end for (int n = 0; n < in; n++)
|
||||
|
||||
error_release:
|
||||
free(i_fmap_pad);
|
||||
free(kernel_after);
|
||||
free(i_fmap_pad_ker);
|
||||
free(result);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
int native_depthwise_fp32(const float *ifmap, const float *weight, const float *bias, float *ofmap,
|
||||
int in, int ic, int ih, int iw, int kh, int kw, int dh, int dw,
|
||||
int pad_h_t, int pad_h_b, int pad_w_l, int pad_w_r, int stride_h,
|
||||
int stride_w, int ins_h, int ins_w, int ins_h_last, int ins_w_last) {
|
||||
int h_after = calc_dilute_hw(ih, ins_h, ins_h_last, pad_h_t, pad_h_b);
|
||||
int w_after = calc_dilute_hw(iw, ins_w, ins_w_last, pad_w_l, pad_w_r);
|
||||
int kh_dilation = (kh - 1) * dh + 1, kw_dilatoin = (kw - 1) * dw + 1;
|
||||
int oh = calc_output_hw(h_after, kh_dilation, stride_h);
|
||||
int ow = calc_output_hw(w_after, kw_dilatoin, stride_w);
|
||||
float *ifmap_after = malloc(sizeof(float) * h_after * w_after);
|
||||
float *weight_dilation = malloc(sizeof(float) * kh_dilation * kw_dilatoin);
|
||||
|
||||
if (ifmap_after == NULL || weight_dilation == NULL) {
|
||||
printf("No enough memory.\n");
|
||||
free(ifmap_after);
|
||||
free(weight_dilation);
|
||||
|
||||
return BM_ERR_NOMEM;
|
||||
}
|
||||
|
||||
for (int n = 0; n < in; n++) {
|
||||
for (int c = 0; c < ic; c++, ifmap += ih * iw, ofmap += oh * ow) {
|
||||
float init_value = bias ? bias[c] : 0;
|
||||
int ret_ifmap = fill_pad_fmap_fp32(ifmap, &ifmap_after, 0, pad_h_t, pad_h_b, pad_w_l, pad_w_r,
|
||||
ins_h, ins_w, ins_h_last, ins_w_last, ih, iw);
|
||||
int ret_weight = fill_pad_fmap_fp32(weight + c * kh * kw, &weight_dilation, 0, 0, 0, 0, 0,
|
||||
dh - 1, dw - 1, 0, 0, kh, kw);
|
||||
|
||||
if ((ret_ifmap != BM_SUCCESS) || (ret_weight != BM_SUCCESS)) {
|
||||
printf("failed to pad ifmap or weight.\n");
|
||||
return BM_ERR_FAILURE;
|
||||
}
|
||||
|
||||
for (int h = 0; h < oh; h++) {
|
||||
for (int w = 0; w < ow; w++) {
|
||||
int rf_h = h * stride_h, rf_w = w * stride_w;
|
||||
int kh_end = math_min(kh_dilation, h_after - rf_h);
|
||||
int kw_end = math_min(kw_dilatoin, w_after - rf_w);
|
||||
float *rf_addr = ifmap_after + rf_h * w_after + rf_w;
|
||||
float dot_product_even = 0.0, dot_product_odd = 0.0;
|
||||
|
||||
for (int i = 0; i < kh_end; i++) {
|
||||
for (int j = 0; j < kw_end; j++) {
|
||||
if ((i * kw_end + j) % 2) {
|
||||
dot_product_odd += rf_addr[i * w_after + j] * weight_dilation[i * kw_dilatoin + j];
|
||||
} else {
|
||||
dot_product_even += rf_addr[i * w_after + j] * weight_dilation[i * kw_dilatoin + j];
|
||||
}
|
||||
}
|
||||
}
|
||||
ofmap[h * ow + w] = dot_product_even + dot_product_odd + init_value;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
free(ifmap_after);
|
||||
free(weight_dilation);
|
||||
|
||||
return BM_SUCCESS;
|
||||
}
|
||||
|
||||
void native_conv_ref(const void *ifmap, void *ofmap, const void *weight, int input_n, int input_c,
|
||||
int input_h, int input_w, int output_c, int output_h, int output_w, int groups,
|
||||
int kh, int kw, int dilation_h, int dilation_w, int pad_h, int pad_w,
|
||||
int stride_h, int stride_w, int flip, int using_bias, const void *bias,
|
||||
int result_add) {
|
||||
int kh_extent = dilation_h * (kh - 1) + 1;
|
||||
int kw_extent = dilation_w * (kw - 1) + 1;
|
||||
int output_h_expect = (input_h + 2 * pad_h - kh_extent) / stride_h + 1;
|
||||
int output_w_expect = (input_w + 2 * pad_w - kw_extent) / stride_w + 1;
|
||||
(void)output_h_expect;
|
||||
(void)output_w_expect;
|
||||
assert(output_h == output_h_expect && "Expect same output_h");
|
||||
assert(output_w == output_w_expect && "Expect same output_w");
|
||||
|
||||
if (!result_add) {
|
||||
memset(ofmap, 0, input_n * output_c * output_h * output_w * sizeof(float));
|
||||
}
|
||||
|
||||
float *ifmap_f = (float *)ifmap;
|
||||
float *ofmap_f = (float *)ofmap;
|
||||
float *weight_f = (float *)weight;
|
||||
float *bias_f = (float *)bias;
|
||||
int i_shape[4];
|
||||
i_shape[0] = input_n;
|
||||
i_shape[1] = input_c;
|
||||
i_shape[2] = input_h;
|
||||
i_shape[3] = input_w;
|
||||
int o_shape[4];
|
||||
o_shape[0] = input_n;
|
||||
o_shape[1] = output_c;
|
||||
o_shape[2] = output_h;
|
||||
o_shape[3] = output_w;
|
||||
int k_shape[4];
|
||||
k_shape[0] = output_c;
|
||||
k_shape[1] = input_c / groups;
|
||||
k_shape[2] = kh;
|
||||
k_shape[3] = kw;
|
||||
|
||||
int o_g = output_c / groups;
|
||||
int k_g = input_c / groups;
|
||||
int o_head, k_head;
|
||||
int weight_offset[4];
|
||||
int in_offset[4];
|
||||
int out_offset[4];
|
||||
|
||||
for (int n = 0; n < input_n; n++) {
|
||||
for (int g = 0; g < groups; g++) {
|
||||
o_head = o_g * g;
|
||||
k_head = k_g * g;
|
||||
for (int o = 0; o < o_g; o++) {
|
||||
for (int y = 0; y < output_h; y++) {
|
||||
for (int x = 0; x < output_w; x++) {
|
||||
out_offset[0] = n;
|
||||
out_offset[1] = o + o_head;
|
||||
out_offset[2] = y;
|
||||
out_offset[3] = x;
|
||||
float result_init = ofmap_f[calc_offset(o_shape, out_offset)];
|
||||
ofmap_f[calc_offset(o_shape, out_offset)] = 0.0f;
|
||||
for (int k = 0; k < k_g; k++) {
|
||||
for (int p = 0; p < kh; p++) {
|
||||
for (int q = 0; q < kw; q++) {
|
||||
int in_y = y * stride_h - pad_h + p * dilation_h;
|
||||
int in_x = x * stride_w - pad_w + q * dilation_w;
|
||||
if (in_y >= 0 && in_y < input_h && in_x >= 0 && in_x < input_w) {
|
||||
weight_offset[0] = o + o_head;
|
||||
weight_offset[1] = k;
|
||||
if (flip) {
|
||||
weight_offset[2] = (kh - 1 - p);
|
||||
weight_offset[3] = (kw - 1 - q);
|
||||
} else {
|
||||
weight_offset[2] = p;
|
||||
weight_offset[3] = q;
|
||||
}
|
||||
in_offset[0] = n;
|
||||
in_offset[1] = k + k_head;
|
||||
in_offset[2] = in_y;
|
||||
in_offset[3] = in_x;
|
||||
ofmap_f[calc_offset(o_shape, out_offset)] +=
|
||||
ifmap_f[calc_offset(i_shape, in_offset)] *
|
||||
weight_f[calc_offset(k_shape, weight_offset)];
|
||||
if (k_g == 1 && kh == 1 && kw == 1) {
|
||||
ofmap_f[calc_offset(o_shape, out_offset)] =
|
||||
ifmap_f[calc_offset(i_shape, in_offset)] *
|
||||
weight_f[calc_offset(k_shape, weight_offset)];
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
if (using_bias) {
|
||||
ofmap_f[calc_offset(o_shape, out_offset)] += bias_f[o + o_head];
|
||||
}
|
||||
if (result_add) {
|
||||
ofmap_f[calc_offset(o_shape, out_offset)] += result_init;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
int native_fc_int8(const int8_t *L, const int8_t *R, const int16_t *B, const int16_t *Y, int *Y_ref,
|
||||
int L_row_num, int L_col_num, int R_col_num, int L_sign, int R_sign, int B_sign,
|
||||
int l_shift_width, int r_shift_width, int is_result_int8, int do_relu) {
|
||||
const uint8_t *uL = (const uint8_t *)L;
|
||||
const uint8_t *uR = (const uint8_t *)R;
|
||||
const uint16_t *uB = (const uint16_t *)B;
|
||||
|
||||
int opd0, opd1, opd2;
|
||||
int ret = BM_SUCCESS;
|
||||
|
||||
for (int hidx = 0; hidx < L_row_num; hidx++) {
|
||||
for (int widx = 0; widx < R_col_num; widx++) {
|
||||
int Y1 = 0;
|
||||
int Y2 = 0;
|
||||
int sum_idx = 0;
|
||||
for (sum_idx = 0; sum_idx < L_col_num; sum_idx++) {
|
||||
int idx_L = index_get(hidx, L_col_num, sum_idx);
|
||||
int idx_R = index_get(sum_idx, R_col_num, widx);
|
||||
opd0 = (L_sign) ? L[idx_L] : uL[idx_L];
|
||||
opd1 = (R_sign) ? R[idx_R] : uR[idx_R];
|
||||
if ((sum_idx % 2 == 0 && sum_idx != 2) || sum_idx == 1) {
|
||||
Y1 += opd0 * opd1;
|
||||
} else {
|
||||
Y2 += opd0 * opd1;
|
||||
}
|
||||
}
|
||||
sum_idx++;
|
||||
|
||||
if (B) {
|
||||
opd2 = (B_sign) ? (int)B[widx] : (int)uB[widx];
|
||||
if ((sum_idx % 2 == 0 && sum_idx != 2) || sum_idx == 1) {
|
||||
Y1 += opd2;
|
||||
} else {
|
||||
Y2 += opd2;
|
||||
}
|
||||
sum_idx++;
|
||||
}
|
||||
|
||||
int idx_Y = index_get(hidx, R_col_num, widx);
|
||||
if (Y) {
|
||||
if ((sum_idx % 2 == 0 && sum_idx != 2) || sum_idx == 1) {
|
||||
Y1 += (Y[idx_Y] << l_shift_width);
|
||||
} else {
|
||||
Y2 += (Y[idx_Y] << l_shift_width);
|
||||
}
|
||||
}
|
||||
|
||||
Y_ref[idx_Y] = Y1 + Y2;
|
||||
}
|
||||
}
|
||||
uint8_t *Yout_int8 = malloc(sizeof(int8_t) * L_row_num * R_col_num);
|
||||
uint16_t *Yout_int16 = malloc(sizeof(int16_t) * L_row_num * R_col_num);
|
||||
|
||||
if (is_result_int8) {
|
||||
ret =
|
||||
satu_2_8bit(Y_ref, L_row_num * R_col_num, (int8_t *)Yout_int8, r_shift_width, 1, !do_relu);
|
||||
if (ret != BM_SUCCESS) goto error_release;
|
||||
|
||||
fill_int_with_int8(Y_ref, (int8_t *)Yout_int8, L_row_num * R_col_num);
|
||||
} else {
|
||||
ret = satu_2_16bit(Y_ref, L_row_num * R_col_num, (int16_t *)Yout_int16, r_shift_width, 1,
|
||||
!do_relu);
|
||||
if (ret != BM_SUCCESS) goto error_release;
|
||||
|
||||
fill_int_with_int16(Y_ref, (int16_t *)Yout_int16, L_row_num * R_col_num);
|
||||
}
|
||||
|
||||
error_release:
|
||||
free(Yout_int8);
|
||||
free(Yout_int16);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
int native_pooling_ave_int8(const int8_t *i_fmap, const void *weight, const int16_t *bias,
|
||||
int8_t *o_fmap, int input_n, int input_c, int input_h, int input_w,
|
||||
int kh, int kw, int pad_h_t, int pad_h_b, int pad_w_l, int pad_w_r,
|
||||
int stride_h, int stride_w, int ins_h, int ins_w, int ins_h_last,
|
||||
int ins_w_last, int input_sign, int satu_sign, int r_shift_width,
|
||||
int const_weight) {
|
||||
if (kh * kw <= 0) return BM_ERR_INVALID_ARGUMENT;
|
||||
|
||||
int *avg_pooling_mac_a = (int *)malloc(kh * kw * sizeof(int));
|
||||
int *avg_pooling_mac_b = (int *)malloc(kh * kw * sizeof(int));
|
||||
|
||||
uint8_t avg_const_weight = *(uint8_t *)weight;
|
||||
const int8_t *weight_arr = weight;
|
||||
|
||||
int h_after = calc_dilute_hw(input_h, ins_h, ins_h_last, pad_h_t, pad_h_b);
|
||||
int w_after = calc_dilute_hw(input_w, ins_w, ins_w_last, pad_w_l, pad_w_r);
|
||||
|
||||
int output_h = calc_output_hw(h_after, kh, stride_h);
|
||||
int output_w = calc_output_hw(w_after, kw, stride_w);
|
||||
|
||||
int8_t *i_fmap_pad = NULL;
|
||||
for (int n = 0; n < input_n; n++) {
|
||||
if (const_weight == 0) weight_arr = weight;
|
||||
|
||||
for (int c = 0; c < input_c; ++c) {
|
||||
fill_pad_fmap_int8(i_fmap, &i_fmap_pad, 0, pad_w_l, pad_w_r, pad_h_t, pad_h_b, ins_h, ins_w,
|
||||
ins_h_last, ins_w_last, input_h, input_w);
|
||||
for (int ph = 0; ph < output_h; ++ph) {
|
||||
for (int pw = 0; pw < output_w; ++pw) {
|
||||
int hstart = ph * stride_h;
|
||||
int wstart = pw * stride_w;
|
||||
int pool_index = index_get(ph, output_w, pw);
|
||||
int mac_index = 0;
|
||||
int avg_pool_result;
|
||||
|
||||
for (int h = 0; h < kh; h++) {
|
||||
for (int w = 0; w < kw; w++) {
|
||||
int index = index_get((hstart + h), w_after, (w + wstart));
|
||||
mac_index = index_get(h, kw, w);
|
||||
avg_pooling_mac_a[mac_index] =
|
||||
input_sign ? i_fmap_pad[index] : (uint8_t)(i_fmap_pad[index]);
|
||||
|
||||
avg_pooling_mac_b[mac_index] =
|
||||
const_weight ? avg_const_weight : weight_arr[mac_index];
|
||||
}
|
||||
}
|
||||
|
||||
inner_product(avg_pooling_mac_a, avg_pooling_mac_b, kh * kw, &avg_pool_result);
|
||||
|
||||
if (bias) {
|
||||
avg_pool_result += bias[c];
|
||||
}
|
||||
|
||||
int ret = satu_2_8bit(&avg_pool_result, sizeof(int8_t), o_fmap + pool_index,
|
||||
r_shift_width, 1, satu_sign);
|
||||
|
||||
if (ret != BM_SUCCESS) {
|
||||
free(i_fmap_pad);
|
||||
free(avg_pooling_mac_a);
|
||||
free(avg_pooling_mac_b);
|
||||
|
||||
return BM_ERR_INVALID_ARGUMENT;
|
||||
}
|
||||
}
|
||||
}
|
||||
i_fmap += input_w * input_h;
|
||||
if (const_weight == 0) weight_arr += kh * kw;
|
||||
|
||||
o_fmap += output_w * output_h;
|
||||
}
|
||||
}
|
||||
free(i_fmap_pad);
|
||||
|
||||
free(avg_pooling_mac_a);
|
||||
free(avg_pooling_mac_b);
|
||||
|
||||
return BM_SUCCESS;
|
||||
}
|
||||
|
||||
int native_pooling_max_int8(const int8_t *i_fmap, int8_t *o_fmap, int input_n, int input_c,
|
||||
int input_h, int input_w, int kh, int kw, int pad_h_t, int pad_h_b,
|
||||
int pad_w_l, int pad_w_r, int stride_h, int stride_w, int ins_h,
|
||||
int ins_w, int ins_h_last, int ins_w_last, int input_sign) {
|
||||
if (ins_h != 0 || ins_w != 0 || ins_h_last != 0 || ins_w_last != 0)
|
||||
return BM_ERR_INVALID_ARGUMENT;
|
||||
|
||||
int h_after = calc_dilute_hw(input_h, ins_h, ins_h_last, pad_h_t, pad_h_b);
|
||||
int w_after = calc_dilute_hw(input_w, ins_w, ins_w_last, pad_w_l, pad_w_r);
|
||||
|
||||
int output_h = calc_output_hw(h_after, kh, stride_h);
|
||||
int output_w = calc_output_hw(w_after, kw, stride_w);
|
||||
|
||||
const int max_init = input_sign ? -128 : 0;
|
||||
int8_t *i_fmap_pad = NULL;
|
||||
for (int nc = 0; nc < input_n * input_c; nc++) {
|
||||
fill_pad_fmap_int8(i_fmap, &i_fmap_pad, max_init, pad_w_l, pad_w_r, pad_h_t, pad_h_b, 0, 0, 0,
|
||||
0, input_h, input_w);
|
||||
|
||||
for (int ph = 0; ph < output_h; ++ph) {
|
||||
for (int pw = 0; pw < output_w; ++pw) {
|
||||
int hstart = ph * stride_h;
|
||||
int wstart = pw * stride_w;
|
||||
int pool_index = index_get(ph, output_w, pw);
|
||||
int max = max_init;
|
||||
for (int h = 0; h < kh; h++) {
|
||||
for (int w = 0; w < kw; w++) {
|
||||
int index = index_get((hstart + h), (input_w + pad_w_l + pad_w_r), (w + wstart));
|
||||
int val = input_sign ? i_fmap_pad[index] : (uint8_t)i_fmap_pad[index];
|
||||
max = (val > max) ? val : max;
|
||||
}
|
||||
}
|
||||
o_fmap[pool_index] = max;
|
||||
}
|
||||
}
|
||||
i_fmap += input_w * input_h;
|
||||
o_fmap += output_w * output_h;
|
||||
}
|
||||
free(i_fmap_pad);
|
||||
|
||||
return BM_SUCCESS;
|
||||
}
|
||||
|
||||
int native_pooling_max_fp32(const float *ifmap, float *ofmap, int input_n, int input_c, int input_h,
|
||||
int input_w, int kh, int kw, int pad_h_t, int pad_h_b, int pad_w_l,
|
||||
int pad_w_r, int stride_h, int stride_w, int ins_h, int ins_w,
|
||||
int ins_h_last, int ins_w_last) {
|
||||
int h_after = calc_dilute_hw(input_h, ins_h, ins_h_last, pad_h_t, pad_h_b);
|
||||
int w_after = calc_dilute_hw(input_w, ins_w, ins_w_last, pad_w_l, pad_w_r);
|
||||
int output_h = calc_output_hw(h_after, kh, stride_h);
|
||||
int output_w = calc_output_hw(w_after, kw, stride_w);
|
||||
float *ifmap_after = malloc(sizeof(float) * h_after * w_after);
|
||||
|
||||
if (ifmap_after == NULL) {
|
||||
printf("No enough memory[h_after, w_after]: [%u, %u].\n", h_after, w_after);
|
||||
return BM_ERR_NOMEM;
|
||||
}
|
||||
|
||||
for (int n = 0; n < input_n; n++) {
|
||||
for (int c = 0; c < input_c; c++) {
|
||||
int ret = fill_pad_fmap_fp32(ifmap, &ifmap_after, -FLT_MAX, pad_h_t, pad_h_b, pad_w_l,
|
||||
pad_w_r, ins_h, ins_w, ins_h_last, ins_w_last, input_h, input_w);
|
||||
|
||||
if (ret != BM_SUCCESS) {
|
||||
printf("Failed to pad input fmap.\n");
|
||||
free(ifmap_after);
|
||||
return BM_ERR_FAILURE;
|
||||
}
|
||||
|
||||
for (int h = 0; h < output_h; h++) {
|
||||
for (int w = 0; w < output_w; w++) {
|
||||
int rf_h = h * stride_h, rf_w = w * stride_w;
|
||||
int kh_end = math_min(kh, h_after - rf_h);
|
||||
int kw_end = math_min(kw, w_after - rf_w);
|
||||
float *rf_addr = ifmap_after + rf_h * w_after + rf_w;
|
||||
float max_val = -FLT_MAX;
|
||||
|
||||
for (int i = 0; i < kh_end; i++) {
|
||||
for (int j = 0; j < kw_end; j++) {
|
||||
max_val = math_max(rf_addr[i * w_after + j], max_val);
|
||||
}
|
||||
}
|
||||
ofmap[h * output_w + w] = max_val;
|
||||
}
|
||||
}
|
||||
|
||||
ifmap += input_h * input_w;
|
||||
ofmap += output_h * output_w;
|
||||
}
|
||||
}
|
||||
|
||||
free(ifmap_after);
|
||||
return BM_SUCCESS;
|
||||
}
|
||||
|
||||
int native_pooling_avg_fp32(const float *ifmap, float *ofmap, int input_n, int input_c, int input_h,
|
||||
int input_w, int kh, int kw, int pad_h_t, int pad_h_b, int pad_w_l,
|
||||
int pad_w_r, int stride_h, int stride_w, int ins_h, int ins_w,
|
||||
int ins_h_last, int ins_w_last, float avg_pooling_const) {
|
||||
int h_after = calc_dilute_hw(input_h, ins_h, ins_h_last, pad_h_t, pad_h_b);
|
||||
int w_after = calc_dilute_hw(input_w, ins_w, ins_w_last, pad_w_l, pad_w_r);
|
||||
int output_h = calc_output_hw(h_after, kh, stride_h);
|
||||
int output_w = calc_output_hw(w_after, kw, stride_w);
|
||||
float *ifmap_after = malloc(sizeof(float) * h_after * w_after);
|
||||
|
||||
if (ifmap_after == NULL) {
|
||||
printf("No enough memory[h_after, w_after]: [%u, %u].\n", h_after, w_after);
|
||||
return BM_ERR_NOMEM;
|
||||
}
|
||||
|
||||
for (int n = 0; n < input_n; n++) {
|
||||
for (int c = 0; c < input_c; c++) {
|
||||
int ret = fill_pad_fmap_fp32(ifmap, &ifmap_after, 0, pad_h_t, pad_h_b, pad_w_l, pad_w_r,
|
||||
ins_h, ins_w, ins_h_last, ins_w_last, input_h, input_w);
|
||||
|
||||
if (ret != BM_SUCCESS) {
|
||||
printf("Failed to pad input fmap.\n");
|
||||
free(ifmap_after);
|
||||
return BM_ERR_FAILURE;
|
||||
}
|
||||
|
||||
for (int h = 0; h < output_h; h++) {
|
||||
for (int w = 0; w < output_w; w++) {
|
||||
int rf_h = h * stride_h, rf_w = w * stride_w;
|
||||
int kh_end = math_min(kh, h_after - rf_h);
|
||||
int kw_end = math_min(kw, w_after - rf_w);
|
||||
float *rf_addr = ifmap_after + rf_h * w_after + rf_w;
|
||||
float dot_product_even = 0.0, dot_product_odd = 0.0;
|
||||
|
||||
for (int i = 0; i < kh_end; i++) {
|
||||
for (int j = 0; j < kw_end; j++) {
|
||||
if ((i * kw_end + j) % 2) {
|
||||
dot_product_odd += rf_addr[i * w_after + j] * avg_pooling_const;
|
||||
} else {
|
||||
dot_product_even += rf_addr[i * w_after + j] * avg_pooling_const;
|
||||
}
|
||||
}
|
||||
}
|
||||
ofmap[h * output_w + w] = dot_product_even + dot_product_odd;
|
||||
}
|
||||
}
|
||||
|
||||
ifmap += input_h * input_w;
|
||||
ofmap += output_h * output_w;
|
||||
}
|
||||
}
|
||||
|
||||
free(ifmap_after);
|
||||
return BM_SUCCESS;
|
||||
}
|
||||
|
||||
void native_pooling_forward_max(const float *bottom_data, float *top_data, int *mask_data,
|
||||
const int count, const int num, const int channels,
|
||||
const int height, const int width, const int pooled_height,
|
||||
const int pooled_width, const int kernel_h, const int kernel_w,
|
||||
const int stride_h, const int stride_w, const int pad_h,
|
||||
const int pad_w) {
|
||||
(void)num;
|
||||
for (int index = 0; index < count; ++index) {
|
||||
const int pw = index % pooled_width;
|
||||
const int ph = (index / pooled_width) % pooled_height;
|
||||
const int c = (index / pooled_width / pooled_height) % channels;
|
||||
const int n = index / pooled_width / pooled_height / channels;
|
||||
int hstart = ph * stride_h - pad_h;
|
||||
int wstart = pw * stride_w - pad_w;
|
||||
const int hend = math_min(hstart + kernel_h, height);
|
||||
const int wend = math_min(wstart + kernel_w, width);
|
||||
hstart = math_max(hstart, 0);
|
||||
wstart = math_max(wstart, 0);
|
||||
float maxval = -FLT_MAX;
|
||||
int maxidx = -1;
|
||||
const float *const bottom_slice = bottom_data + (n * channels + c) * height * width;
|
||||
for (int h = hstart; h < hend; ++h) {
|
||||
for (int w = wstart; w < wend; ++w) {
|
||||
if (bottom_slice[h * width + w] > maxval) {
|
||||
maxidx = h * width + w;
|
||||
maxval = bottom_slice[maxidx];
|
||||
}
|
||||
}
|
||||
}
|
||||
top_data[index] = maxval;
|
||||
mask_data[index] = maxidx;
|
||||
}
|
||||
}
|
||||
|
||||
void native_pooling_forward_ave(const float *bottom_data, float *top_data, const int count,
|
||||
const int num, const int channels, const int height,
|
||||
const int width, const int pooled_height, const int pooled_width,
|
||||
const int kernel_h, const int kernel_w, const int stride_h,
|
||||
const int stride_w, const int pad_h, const int pad_w) {
|
||||
(void)num;
|
||||
for (int index = 0; index < count; ++index) {
|
||||
const int pw = index % pooled_width;
|
||||
const int ph = (index / pooled_width) % pooled_height;
|
||||
const int c = (index / pooled_width / pooled_height) % channels;
|
||||
const int n = index / pooled_width / pooled_height / channels;
|
||||
int hstart = ph * stride_h - pad_h;
|
||||
int wstart = pw * stride_w - pad_w;
|
||||
int hend = math_min(hstart + kernel_h, height + pad_h);
|
||||
int wend = math_min(wstart + kernel_w, width + pad_w);
|
||||
const int pool_size = (hend - hstart) * (wend - wstart);
|
||||
hstart = math_max(hstart, 0);
|
||||
wstart = math_max(wstart, 0);
|
||||
hend = math_min(hend, height);
|
||||
wend = math_min(wend, width);
|
||||
float aveval = 0;
|
||||
const float *const bottom_slice = bottom_data + (n * channels + c) * height * width;
|
||||
for (int h = hstart; h < hend; ++h) {
|
||||
for (int w = wstart; w < wend; ++w) {
|
||||
aveval += bottom_slice[h * width + w];
|
||||
}
|
||||
}
|
||||
top_data[index] = aveval / pool_size;
|
||||
}
|
||||
}
|
||||
|
||||
int satu_2_8bit(const int *pBuff, int len, int8_t *pByteOut, int rshiftbits, int round_floor,
|
||||
int sign_unsign) {
|
||||
if (rshiftbits < 0) return BM_ERR_INVALID_ARGUMENT;
|
||||
|
||||
int temp;
|
||||
int satu_max = sign_unsign ? 127 : 255;
|
||||
int satu_min = sign_unsign ? -128 : 0;
|
||||
if (rshiftbits == 0) {
|
||||
for (int ii = 0; ii < len; ii++) {
|
||||
temp = (pBuff[ii] > satu_max) ? satu_max : ((pBuff[ii] < satu_min) ? satu_min : pBuff[ii]);
|
||||
memcpy(pByteOut + ii, &temp, 1);
|
||||
}
|
||||
} else { // rshiftbits>0
|
||||
for (int ii = 0; ii < len; ii++) {
|
||||
if (round_floor == 1)
|
||||
temp = ((pBuff[ii] >> (rshiftbits - 1)) + 1) >> 1;
|
||||
else
|
||||
temp = pBuff[ii] >> rshiftbits;
|
||||
temp = (temp > satu_max) ? satu_max : ((temp < satu_min) ? satu_min : temp);
|
||||
memcpy(pByteOut + ii, &temp, 1);
|
||||
}
|
||||
}
|
||||
|
||||
return BM_SUCCESS;
|
||||
}
|
||||
|
||||
int satu_2_16bit(const int *pBuff, int len, short *pByteOut, int rshiftbits, int round_floor,
|
||||
int sign_unsign) {
|
||||
if (rshiftbits < 0) return BM_ERR_INVALID_ARGUMENT;
|
||||
|
||||
int ii;
|
||||
int temp;
|
||||
int satu_max = sign_unsign ? 32767 : 65535;
|
||||
int satu_min = sign_unsign ? -32768 : 0;
|
||||
if (rshiftbits == 0) {
|
||||
for (ii = 0; ii < len; ii++) {
|
||||
temp = (pBuff[ii] > satu_max) ? satu_max : ((pBuff[ii] < satu_min) ? satu_min : pBuff[ii]);
|
||||
memcpy(pByteOut + ii, &temp, 2);
|
||||
}
|
||||
} else { // rshiftbits>0
|
||||
for (ii = 0; ii < len; ii++) {
|
||||
if (round_floor == 1)
|
||||
temp = ((pBuff[ii] >> (rshiftbits - 1)) + 1) >> 1;
|
||||
else
|
||||
temp = pBuff[ii] >> rshiftbits;
|
||||
temp = (temp > satu_max) ? satu_max : ((temp < satu_min) ? satu_min : temp);
|
||||
memcpy(pByteOut + ii, &temp, 2);
|
||||
}
|
||||
}
|
||||
|
||||
return BM_SUCCESS;
|
||||
}
|
||||
477
cvimath/tests/cvi1835/atan.cpp
Normal file
477
cvimath/tests/cvi1835/atan.cpp
Normal file
@ -0,0 +1,477 @@
|
||||
/**
|
||||
* plz refer [git](https://github.com/xiezhq-hermann/atan_lookup)
|
||||
* input range is `all real numbers` and output range is -pi/2 < x < pi/2,
|
||||
* you can refer [here](https://www.mathopenref.com/arctan.html) for more
|
||||
* details
|
||||
*/
|
||||
//
|
||||
// xiezhq@shanghaitech.edu.cn && wanghe@shanghaitech.edu.cn
|
||||
/* Reference:
|
||||
[1] Abhisek Ukil, Vishal H Shah, Bernhard Deck,
|
||||
"Fast Computation of arctangent Functions for Embedded Applications: A
|
||||
Comparative Analysis" IEEE International Symposium on Industrial Electronics,
|
||||
Pages: 1206 - 1211, DOI: 10.1109/ISIE.2011.5984330, 2011
|
||||
[2] Sreeraman Rajan, Sichun Wang, Robert Inkol, and Alain Joyal
|
||||
"Efficient Approximations for the Arctangent Function"
|
||||
IEEE SIGNAL PROCESSING MAGAZINE [108] MAY 2006
|
||||
*/
|
||||
|
||||
#include <cvimath_internal.h>
|
||||
#include <test_cvikernel_util.h>
|
||||
|
||||
#define OUT
|
||||
#define IN
|
||||
#include <cfloat>
|
||||
#include <iomanip>
|
||||
#include <iostream>
|
||||
#include <map>
|
||||
#include <random>
|
||||
#include <string>
|
||||
//#define DBG
|
||||
|
||||
using namespace std;
|
||||
|
||||
#if 0
|
||||
double atan_double(double x) {
|
||||
/*
|
||||
More precise look-up table is used for higher accuracy
|
||||
*/
|
||||
if (x >= 0) {
|
||||
if (x <= 1) {
|
||||
int index = round(x * 100);
|
||||
return (LUT_d[index] + (x * 100 - index) * (LUT_d[index + 1] - LUT_d[index]));
|
||||
} else {
|
||||
double re_x = 1 / x;
|
||||
int index = round(re_x * 100);
|
||||
return (M_PI_2 - (LUT_d[index] + (re_x * 100 - index) * (LUT_d[index + 1] - LUT_d[index])));
|
||||
// No recursive is better here
|
||||
}
|
||||
} else {
|
||||
if (x >= -1) {
|
||||
double abs_x = -x;
|
||||
int index = round(abs_x * 100);
|
||||
return -(LUT_d[index] + (abs_x * 100 - index) * (LUT_d[index + 1] - LUT_d[index]));
|
||||
} else {
|
||||
double re_x = 1 / (-x);
|
||||
int index = round(re_x * 100);
|
||||
return (LUT_d[index] + (re_x * 100 - index) * (LUT_d[index+1] - LUT_d[index])) - M_PI_2;
|
||||
}
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
/**
|
||||
* pre_data means we test fixed pattern, it should be same sa lut
|
||||
*/
|
||||
enum TEST_MODE {
|
||||
PRE_DATA_COMPARE_FIX = 0, // pre-data + fix compare
|
||||
DATA_COMPARE_ACCURACY, // generate \range_start to \range_end value that
|
||||
// check epsilon
|
||||
DATA_COMPARE_U8, // generate \range_start to \range_end value that check
|
||||
// epsilon, result bf16->uint8_t
|
||||
TEST_MODE_MAX,
|
||||
};
|
||||
|
||||
static TEST_MODE mode;
|
||||
|
||||
static uint16_t test_pattern[] = {
|
||||
0x0000, 0x38D2, 0x3952, 0x399D, 0x39D2, 0x3A03, 0x3A1D, 0x3A38, 0x3A52, 0x3A6C, 0x3A83, 0x3A90,
|
||||
0x3A9D, 0x3AAA, 0x3AB8, 0x3AC5, 0x3AD2, 0x3ADF, 0x3AEC, 0x3AF9, 0x3B03, 0x3B0A, 0x3B10, 0x3B17,
|
||||
0x3B1D, 0x3B24, 0x3B2A, 0x3B31, 0x3B38, 0x3B3E, 0x3B45, 0x3B4B, 0x3B52, 0x3B58, 0x3B5F, 0x3B65,
|
||||
0x3B6C, 0x3B72, 0x3B79, 0x3B80, 0x3B83, 0x3B86, 0x3B8A, 0x3B8D, 0x3B90, 0x3B93, 0x3B97, 0x3B9A,
|
||||
0x3B9D, 0x3BA1, 0x3BA4, 0x3BA7, 0x3BAA, 0x3BAE, 0x3BB1, 0x3BB4, 0x3BB8, 0x3BBB, 0x3BBE, 0x3BC1,
|
||||
0x3BC5, 0x3BC8, 0x3BCB, 0x3BCE, 0x3BD2, 0x3BD5, 0x3BD8, 0x3BDC, 0x3BDF, 0x3BE2, 0x3BE5, 0x3BE9,
|
||||
0x3BEC, 0x3BEF, 0x3BF2, 0x3BF6, 0x3BF9, 0x3BFC, 0x3C00, 0x3C01, 0x3C03, 0x3C05, 0x3C06, 0x3C08,
|
||||
0x3C0A, 0x3C0B, 0x3C0D, 0x3C0F, 0x3C10, 0x3C12, 0x3C13, 0x3C15, 0x3C17, 0x3C18, 0x3C1A, 0x3C1C,
|
||||
0x3C1D, 0x3C1F, 0x3C21, 0x3C22, 0x3C24, 0x3C25, 0x3C27, 0x3C29, 0x3C2A, 0x3C2C, 0x3C2E, 0x3C2F,
|
||||
0x3C31, 0x3C33, 0x3C34, 0x3C36, 0x3C38, 0x3C39, 0x3C3B, 0x3C3C, 0x3C3E, 0x3C40, 0x3C41, 0x3C43,
|
||||
0x3C45, 0x3C46, 0x3C48, 0x3C4A, 0x3C4B, 0x3C4D, 0x3C4E, 0x3C50, 0x3C52, 0x3C53, 0x3C55, 0x3C57,
|
||||
0x3C58, 0x3C5A, 0x3C5C, 0x3C5D, 0x3C5F, 0x3C60, 0x3C62, 0x3C64, 0x3C65, 0x3C67, 0x3C69, 0x3C6A,
|
||||
0x3C6C, 0x3C6E, 0x3C6F, 0x3C71, 0x3C72, 0x3C74, 0x3C76, 0x3C77, 0x3C79, 0x3C7B, 0x3C7C, 0x3C7E,
|
||||
0x3C80, 0x3C81, 0x3C81, 0x3C82, 0x3C83, 0x3C84, 0x3C85, 0x3C86, 0x3C86, 0x3C87, 0x3C88, 0x3C89,
|
||||
0x3C8A, 0x3C8A, 0x3C8B, 0x3C8C, 0x3C8D, 0x3C8E, 0x3C8F, 0x3C8F, 0x3C90, 0x3C91, 0x3C92, 0x3C93,
|
||||
0x3C93, 0x3C94, 0x3C95, 0x3C96, 0x3C97, 0x3C98, 0x3C98, 0x3C99, 0x3C9A, 0x3C9B, 0x3C9C, 0x3C9C,
|
||||
0x3C9D, 0x3C9E, 0x3C9F, 0x3CA0, 0x3CA1, 0x3CA1, 0x3CA2, 0x3CA3, 0x3CA4, 0x3CA5, 0x3CA5, 0x3CA6,
|
||||
0x3CA7, 0x3CA8, 0x3CA9, 0x3CAA, 0x3CAA, 0x3CAB, 0x3CAC, 0x3CAD, 0x3CAE, 0x3CAE, 0x3CAF, 0x3CB0,
|
||||
0x3CB1, 0x3CB2, 0x3CB3, 0x3CB3, 0x3CB4, 0x3CB5, 0x3CB6, 0x3CB7, 0x3CB8, 0x3CB8, 0x3CB9, 0x3CBA,
|
||||
0x3CBB, 0x3CBC, 0x3CBC, 0x3CBD, 0x3CBE, 0x3CBF, 0x3CC0, 0x3CC1, 0x3CC1, 0x3CC2, 0x3CC3, 0x3CC4,
|
||||
0x3CC5, 0x3CC5, 0x3CC6, 0x3CC7, 0x3CC8, 0x3CC9, 0x3CCA, 0x3CCA, 0x3CCB, 0x3CCC, 0x3CCD, 0x3CCE,
|
||||
0x3CCE, 0x3CCF, 0x3CD0, 0x3CD1, 0x3CD2, 0x3CD3, 0x3CD3, 0x3CD4, 0x3CD5, 0x3CD6, 0x3CD7, 0x3CD7,
|
||||
0x3CD8, 0x3CD9, 0x3CDA, 0x3CDB, 0x3CDC, 0x3CDC, 0x3CDD, 0x3CDE, 0x3CDF, 0x3CE0, 0x3CE0, 0x3CE1,
|
||||
0x3CE2, 0x3CE3, 0x3CE4, 0x3CE5, 0x3CE5, 0x3CE6, 0x3CE7, 0x3CE8, 0x3CE9, 0x3CE9, 0x3CEA, 0x3CEB,
|
||||
0x3CEC, 0x3CED, 0x3CEE, 0x3CEE, 0x3CEF, 0x3CF0, 0x3CF1, 0x3CF2, 0x3CF2, 0x3CF3, 0x3CF4, 0x3CF5,
|
||||
0x3CF6, 0x3CF7, 0x3CF7, 0x3CF8, 0x3CF9, 0x3CFA, 0x3CFB, 0x3CFB, 0x3CFC, 0x3CFD, 0x3CFE, 0x3CFF,
|
||||
0x3D00, 0x3D00, 0x3D01, 0x3D01, 0x3D01, 0x3D02, 0x3D02, 0x3D03, 0x3D03, 0x3D03, 0x3D04, 0x3D04,
|
||||
0x3D05, 0x3D05, 0x3D06, 0x3D06, 0x3D06, 0x3D07, 0x3D07, 0x3D08, 0x3D08, 0x3D08, 0x3D09, 0x3D09,
|
||||
0x3D0A, 0x3D0A, 0x3D0A, 0x3D0B, 0x3D0B, 0x3D0C, 0x3D0C, 0x3D0C, 0x3D0D, 0x3D0D, 0x3D0E, 0x3D0E,
|
||||
0x3D0F, 0x3D0F, 0x3D0F, 0x3D10, 0x3D10, 0x3D11, 0x3D11, 0x3D11, 0x3D12, 0x3D12, 0x3D13, 0x3D13,
|
||||
0x3D13, 0x3D14, 0x3D14, 0x3D15, 0x3D15, 0x3D16, 0x3D16, 0x3D16, 0x3D17, 0x3D17, 0x3D18, 0x3D18,
|
||||
0x3D18, 0x3D19, 0x3D19, 0x3D1A, 0x3D1A, 0x3D1A, 0x3D1B, 0x3D1B, 0x3D1C, 0x3D1C, 0x3D1C, 0x3D1D,
|
||||
0x3D1D, 0x3D1E, 0x3D1E, 0x3D1F, 0x3D1F, 0x3D1F, 0x3D20, 0x3D20, 0x3D21, 0x3D21, 0x3D21, 0x3D22,
|
||||
0x3D22, 0x3D23, 0x3D23, 0x3D23, 0x3D24, 0x3D24, 0x3D25, 0x3D25, 0x3D25, 0x3D26, 0x3D26, 0x3D27,
|
||||
0x3D27, 0x3D28, 0x3D28, 0x3D28, 0x3D29, 0x3D29, 0x3D2A, 0x3D2A, 0x3D2A, 0x3D2B, 0x3D2B, 0x3D2C,
|
||||
0x3D2C, 0x3D2C, 0x3D2D, 0x3D2D, 0x3D2E, 0x3D2E, 0x3D2E, 0x3D2F, 0x3D2F, 0x3D30, 0x3D30, 0x3D31,
|
||||
0x3D31, 0x3D31, 0x3D32, 0x3D32, 0x3D33, 0x3D33, 0x3D33, 0x3D34, 0x3D34, 0x3D35, 0x3D35, 0x3D35,
|
||||
0x3D36, 0x3D36, 0x3D37, 0x3D37, 0x3D38, 0x3D38, 0x3D38, 0x3D39, 0x3D39, 0x3D3A, 0x3D3A, 0x3D3A,
|
||||
0x3D3B, 0x3D3B, 0x3D3C, 0x3D3C, 0x3D3C, 0x3D3D, 0x3D3D, 0x3D3E, 0x3D3E, 0x3D3E, 0x3D3F, 0x3D3F,
|
||||
0x3D40, 0x3D40, 0x3D41, 0x3D41, 0x3D41, 0x3D42, 0x3D42, 0x3D43, 0x3D43, 0x3D43, 0x3D44, 0x3D44,
|
||||
0x3D45, 0x3D45, 0x3D45, 0x3D46, 0x3D46, 0x3D47, 0x3D47, 0x3D47, 0x3D48, 0x3D48, 0x3D49, 0x3D49,
|
||||
0x3D4A, 0x3D4A, 0x3D4A, 0x3D4B, 0x3D4B, 0x3D4C, 0x3D4C, 0x3D4C, 0x3D4D, 0x3D4D, 0x3D4E, 0x3D4E,
|
||||
0x3D4E, 0x3D4F, 0x3D4F, 0x3D50, 0x3D50, 0x3D50, 0x3D51, 0x3D51, 0x3D52, 0x3D52, 0x3D53, 0x3D53,
|
||||
0x3D53, 0x3D54, 0x3D54, 0x3D55, 0x3D55, 0x3D55, 0x3D56, 0x3D56, 0x3D57, 0x3D57, 0x3D57, 0x3D58,
|
||||
0x3D58, 0x3D59, 0x3D59, 0x3D59, 0x3D5A, 0x3D5A, 0x3D5B, 0x3D5B, 0x3D5C, 0x3D5C, 0x3D5C, 0x3D5D,
|
||||
0x3D5D, 0x3D5E, 0x3D5E, 0x3D5E, 0x3D5F, 0x3D5F, 0x3D60, 0x3D60, 0x3D60, 0x3D61, 0x3D61, 0x3D62,
|
||||
0x3D62, 0x3D63, 0x3D63, 0x3D63, 0x3D64, 0x3D64, 0x3D65, 0x3D65, 0x3D65, 0x3D66, 0x3D66, 0x3D67,
|
||||
0x3D67, 0x3D67, 0x3D68, 0x3D68, 0x3D69, 0x3D69, 0x3D69, 0x3D6A, 0x3D6A, 0x3D6B, 0x3D6B, 0x3D6C,
|
||||
0x3D6C, 0x3D6C, 0x3D6D, 0x3D6D, 0x3D6E, 0x3D6E, 0x3D6E, 0x3D6F, 0x3D6F, 0x3D70, 0x3D70, 0x3D70,
|
||||
0x3D71, 0x3D71, 0x3D72, 0x3D72, 0x3D72, 0x3D73, 0x3D73, 0x3D74, 0x3D74, 0x3D75, 0x3D75, 0x3D75,
|
||||
0x3D76, 0x3D76, 0x3D77, 0x3D77, 0x3D77, 0x3D78, 0x3D78, 0x3D79, 0x3D79, 0x3D79, 0x3D7A, 0x3D7A,
|
||||
0x3D7B, 0x3D7B, 0x3D7B, 0x3D7C, 0x3D7C, 0x3D7D, 0x3D7D, 0x3D7E, 0x3D7E, 0x3D7E, 0x3D7F, 0x3D7F,
|
||||
0x3D80, 0x3D80, 0x3D80, 0x3D80, 0x3D81, 0x3D81, 0x3D81, 0x3D81, 0x3D81, 0x3D82, 0x3D82, 0x3D82,
|
||||
0x3D82, 0x3D82, 0x3D83, 0x3D83, 0x3D83, 0x3D83, 0x3D83, 0x3D84, 0x3D84, 0x3D84, 0x3D84, 0x3D85,
|
||||
0x3D85, 0x3D85, 0x3D85, 0x3D85, 0x3D86, 0x3D86, 0x3D86, 0x3D86, 0x3D86, 0x3D87, 0x3D87, 0x3D87,
|
||||
0x3D87, 0x3D87, 0x3D88, 0x3D88, 0x3D88, 0x3D88, 0x3D88, 0x3D89, 0x3D89, 0x3D89, 0x3D89, 0x3D89,
|
||||
0x3D8A, 0x3D8A, 0x3D8A, 0x3D8A, 0x3D8A, 0x3D8B, 0x3D8B, 0x3D8B, 0x3D8B, 0x3D8B, 0x3D8C, 0x3D8C,
|
||||
0x3D8C, 0x3D8C, 0x3D8C, 0x3D8D, 0x3D8D, 0x3D8D, 0x3D8D, 0x3D8E, 0x3D8E, 0x3D8E, 0x3D8E, 0x3D8E,
|
||||
0x3D8F, 0x3D8F, 0x3D8F, 0x3D8F, 0x3D8F, 0x3D90, 0x3D90, 0x3D90, 0x3D90, 0x3D90, 0x3D91, 0x3D91,
|
||||
0x3D91, 0x3D91, 0x3D91, 0x3D92, 0x3D92, 0x3D92, 0x3D92, 0x3D92, 0x3D93, 0x3D93, 0x3D93, 0x3D93,
|
||||
0x3D93, 0x3D94, 0x3D94, 0x3D94, 0x3D94, 0x3D94, 0x3D95, 0x3D95, 0x3D95, 0x3D95, 0x3D96, 0x3D96,
|
||||
0x3D96, 0x3D96, 0x3D96, 0x3D97, 0x3D97, 0x3D97, 0x3D97, 0x3D97, 0x3D98, 0x3D98, 0x3D98, 0x3D98,
|
||||
0x3D98, 0x3D99, 0x3D99, 0x3D99, 0x3D99, 0x3D99, 0x3D9A, 0x3D9A, 0x3D9A, 0x3D9A, 0x3D9A, 0x3D9B,
|
||||
0x3D9B, 0x3D9B, 0x3D9B, 0x3D9B, 0x3D9C, 0x3D9C, 0x3D9C, 0x3D9C, 0x3D9C, 0x3D9D, 0x3D9D, 0x3D9D,
|
||||
0x3D9D, 0x3D9D, 0x3D9E, 0x3D9E, 0x3D9E, 0x3D9E, 0x3D9F, 0x3D9F, 0x3D9F, 0x3D9F, 0x3D9F, 0x3DA0,
|
||||
0x3DA0, 0x3DA0, 0x3DA0, 0x3DA0, 0x3DA1, 0x3DA1, 0x3DA1, 0x3DA1, 0x3DA1, 0x3DA2, 0x3DA2, 0x3DA2,
|
||||
0x3DA2, 0x3DA2, 0x3DA3, 0x3DA3, 0x3DA3, 0x3DA3, 0x3DA3, 0x3DA4, 0x3DA4, 0x3DA4, 0x3DA4, 0x3DA4,
|
||||
0x3DA5, 0x3DA5, 0x3DA5, 0x3DA5, 0x3DA5, 0x3DA6, 0x3DA6, 0x3DA6, 0x3DA6, 0x3DA7, 0x3DA7, 0x3DA7,
|
||||
0x3DA7, 0x3DA7, 0x3DA8, 0x3DA8, 0x3DA8, 0x3DA8, 0x3DA8, 0x3DA9, 0x3DA9, 0x3DA9, 0x3DA9, 0x3DA9,
|
||||
0x3DAA, 0x3DAA, 0x3DAA, 0x3DAA, 0x3DAA, 0x3DAB, 0x3DAB, 0x3DAB, 0x3DAB, 0x3DAB, 0x3DAC, 0x3DAC,
|
||||
0x3DAC, 0x3DAC, 0x3DAC, 0x3DAD, 0x3DAD, 0x3DAD, 0x3DAD, 0x3DAD, 0x3DAE, 0x3DAE, 0x3DAE, 0x3DAE,
|
||||
0x3DAE, 0x3DAF, 0x3DAF, 0x3DAF, 0x3DAF, 0x3DB0, 0x3DB0, 0x3DB0, 0x3DB0, 0x3DB0, 0x3DB1, 0x3DB1,
|
||||
0x3DB1, 0x3DB1, 0x3DB1, 0x3DB2, 0x3DB2, 0x3DB2, 0x3DB2, 0x3DB2, 0x3DB3, 0x3DB3, 0x3DB3, 0x3DB3,
|
||||
0x3DB3, 0x3DB4, 0x3DB4, 0x3DB4, 0x3DB4, 0x3DB4, 0x3DB5, 0x3DB5, 0x3DB5, 0x3DB5, 0x3DB5, 0x3DB6,
|
||||
0x3DB6, 0x3DB6, 0x3DB6, 0x3DB6, 0x3DB7, 0x3DB7, 0x3DB7, 0x3DB7, 0x3DB8, 0x3DB8, 0x3DB8, 0x3DB8,
|
||||
0x3DB8, 0x3DB9, 0x3DB9, 0x3DB9, 0x3DB9, 0x3DB9, 0x3DBA, 0x3DBA, 0x3DBA, 0x3DBA, 0x3DBA, 0x3DBB,
|
||||
0x3DBB, 0x3DBB, 0x3DBB, 0x3DBB, 0x3DBC, 0x3DBC, 0x3DBC, 0x3DBC, 0x3DBC, 0x3DBD, 0x3DBD, 0x3DBD,
|
||||
0x3DBD, 0x3DBD, 0x3DBE, 0x3DBE, 0x3DBE, 0x3DBE, 0x3DBE, 0x3DBF, 0x3DBF, 0x3DBF, 0x3DBF, 0x3DBF,
|
||||
0x3DC0, 0x3DC0, 0x3DC0, 0x3DC0, 0x3DC1, 0x3DC1, 0x3DC1, 0x3DC1, 0x3DC1, 0x3DC2, 0x3DC2, 0x3DC2,
|
||||
0x3DC2, 0x3DC2, 0x3DC3, 0x3DC3, 0x3DC3, 0x3DC3, 0x3DC3, 0x3DC4, 0x3DC4, 0x3DC4, 0x3DC4, 0x3DC4,
|
||||
0x3DC5, 0x3DC5, 0x3DC5, 0x3DC5, 0x3DC5, 0x3DC6, 0x3DC6, 0x3DC6, 0x3DC6, 0x3DC6, 0x3DC7, 0x3DC7,
|
||||
0x3DC7, 0x3DC7, 0x3DC7, 0x3DC8, 0x3DC8, 0x3DC8, 0x3DC8, 0x3DC8, 0x3DC9, 0x3DC9, 0x3DC9, 0x3DC9,
|
||||
0x3DCA, 0x3DCA, 0x3DCA, 0x3DCA, 0x3DCA, 0x3DCB, 0x3DCB, 0x3DCB, 0x3DCB, 0x3DCB, 0x3DCC, 0x3DCC,
|
||||
0x3DCC, 0x3DCC, 0x3DCC, 0x3DCD, 0x3DCE, 0x3DCF, 0x3DD0, 0x3DD1, 0x3DD2, 0x3DD3, 0x3DD4, 0x3DD5,
|
||||
0x3DD6, 0x3DD7, 0x3DD8, 0x3DD9, 0x3DDA, 0x3DDB, 0x3DDC, 0x3DDD, 0x3DDE, 0x3DDF, 0x3DE0, 0x3DE1,
|
||||
0x3DE2, 0x3DE3, 0x3DE4, 0x3DE5,
|
||||
};
|
||||
|
||||
static uint16_t golden_bf16[] = {
|
||||
0x0, 0x38d2, 0x3952, 0x399d, 0x39d2, 0x3a03, 0x3a1d, 0x3a38, 0x3a52, 0x3a6c, 0x3a83, 0x3a90,
|
||||
0x3a9d, 0x3aaa, 0x3ab8, 0x3ac5, 0x3ad2, 0x3adf, 0x3aec, 0x3afa, 0x3b03, 0x3b0a, 0x3b10, 0x3b17,
|
||||
0x3b1d, 0x3b24, 0x3b2a, 0x3b31, 0x3b38, 0x3b3e, 0x3b45, 0x3b4c, 0x3b52, 0x3b59, 0x3b5f, 0x3b65,
|
||||
0x3b6c, 0x3b72, 0x3b7a, 0x3b80, 0x3b83, 0x3b86, 0x3b8a, 0x3b8d, 0x3b90, 0x3b93, 0x3b97, 0x3b9a,
|
||||
0x3b9d, 0x3ba1, 0x3ba4, 0x3ba7, 0x3baa, 0x3bae, 0x3bb1, 0x3bb4, 0x3bb8, 0x3bbb, 0x3bbe, 0x3bc1,
|
||||
0x3bc5, 0x3bc8, 0x3bcb, 0x3bce, 0x3bd2, 0x3bd6, 0x3bd8, 0x3bdc, 0x3bdf, 0x3be2, 0x3be6, 0x3be9,
|
||||
0x3bec, 0x3bef, 0x3bf2, 0x3bf6, 0x3bf9, 0x3bfc, 0x3c00, 0x3c01, 0x3c03, 0x3c05, 0x3c06, 0x3c08,
|
||||
0x3c0a, 0x3c0b, 0x3c0d, 0x3c0f, 0x3c10, 0x3c12, 0x3c13, 0x3c15, 0x3c17, 0x3c18, 0x3c1a, 0x3c1c,
|
||||
0x3c1d, 0x3c1f, 0x3c21, 0x3c22, 0x3c24, 0x3c25, 0x3c27, 0x3c29, 0x3c2a, 0x3c2c, 0x3c2e, 0x3c2f,
|
||||
0x3c31, 0x3c33, 0x3c34, 0x3c36, 0x3c38, 0x3c39, 0x3c3b, 0x3c3c, 0x3c3e, 0x3c40, 0x3c41, 0x3c43,
|
||||
0x3c45, 0x3c46, 0x3c48, 0x3c4a, 0x3c4b, 0x3c4d, 0x3c4e, 0x3c50, 0x3c52, 0x3c53, 0x3c55, 0x3c57,
|
||||
0x3c58, 0x3c5a, 0x3c5c, 0x3c5d, 0x3c5f, 0x3c60, 0x3c62, 0x3c64, 0x3c66, 0x3c68, 0x3c69, 0x3c6a,
|
||||
0x3c6c, 0x3c6e, 0x3c70, 0x3c71, 0x3c72, 0x3c74, 0x3c76, 0x3c78, 0x3c79, 0x3c7b, 0x3c7c, 0x3c7e,
|
||||
0x3c80, 0x3c81, 0x3c81, 0x3c82, 0x3c83, 0x3c84, 0x3c85, 0x3c86, 0x3c86, 0x3c87, 0x3c88, 0x3c89,
|
||||
0x3c8a, 0x3c8a, 0x3c8b, 0x3c8c, 0x3c8d, 0x3c8e, 0x3c8f, 0x3c8f, 0x3c90, 0x3c91, 0x3c92, 0x3c93,
|
||||
0x3c93, 0x3c94, 0x3c95, 0x3c96, 0x3c97, 0x3c98, 0x3c98, 0x3c99, 0x3c9a, 0x3c9b, 0x3c9c, 0x3c9c,
|
||||
0x3c9d, 0x3c9e, 0x3c9f, 0x3ca0, 0x3ca1, 0x3ca1, 0x3ca2, 0x3ca3, 0x3ca4, 0x3ca5, 0x3ca5, 0x3ca6,
|
||||
0x3ca7, 0x3ca8, 0x3ca9, 0x3caa, 0x3caa, 0x3cab, 0x3cac, 0x3cad, 0x3cae, 0x3cae, 0x3caf, 0x3cb0,
|
||||
0x3cb1, 0x3cb2, 0x3cb3, 0x3cb3, 0x3cb4, 0x3cb5, 0x3cb6, 0x3cb7, 0x3cb8, 0x3cb8, 0x3cb9, 0x3cba,
|
||||
0x3cbb, 0x3cbc, 0x3cbc, 0x3cbd, 0x3cbe, 0x3cbf, 0x3cc0, 0x3cc1, 0x3cc1, 0x3cc2, 0x3cc3, 0x3cc4,
|
||||
0x3cc5, 0x3cc5, 0x3cc6, 0x3cc7, 0x3cc8, 0x3cc9, 0x3cca, 0x3cca, 0x3ccb, 0x3ccc, 0x3ccd, 0x3cce,
|
||||
0x3cce, 0x3ccf, 0x3cd0, 0x3cd1, 0x3cd2, 0x3cd3, 0x3cd3, 0x3cd4, 0x3cd5, 0x3cd6, 0x3cd7, 0x3cd7,
|
||||
0x3cd8, 0x3cd9, 0x3cda, 0x3cdb, 0x3cdc, 0x3cdc, 0x3cdd, 0x3cde, 0x3cdf, 0x3ce0, 0x3ce0, 0x3ce1,
|
||||
0x3ce2, 0x3ce3, 0x3ce4, 0x3ce5, 0x3ce5, 0x3ce6, 0x3ce7, 0x3ce8, 0x3ce9, 0x3ce9, 0x3cea, 0x3ceb,
|
||||
0x3cec, 0x3ced, 0x3cee, 0x3cee, 0x3cef, 0x3cf0, 0x3cf1, 0x3cf2, 0x3cf2, 0x3cf3, 0x3cf4, 0x3cf5,
|
||||
0x3cf6, 0x3cf7, 0x3cf7, 0x3cf8, 0x3cf9, 0x3cfa, 0x3cfb, 0x3cfb, 0x3cfc, 0x3cfd, 0x3cfe, 0x3cff,
|
||||
0x3d00, 0x3d00, 0x3d01, 0x3d01, 0x3d01, 0x3d02, 0x3d02, 0x3d03, 0x3d03, 0x3d03, 0x3d04, 0x3d04,
|
||||
0x3d05, 0x3d05, 0x3d06, 0x3d06, 0x3d06, 0x3d07, 0x3d07, 0x3d08, 0x3d08, 0x3d08, 0x3d09, 0x3d09,
|
||||
0x3d0a, 0x3d0a, 0x3d0a, 0x3d0b, 0x3d0b, 0x3d0c, 0x3d0c, 0x3d0c, 0x3d0d, 0x3d0d, 0x3d0e, 0x3d0e,
|
||||
0x3d0f, 0x3d0f, 0x3d0f, 0x3d10, 0x3d10, 0x3d11, 0x3d11, 0x3d11, 0x3d12, 0x3d12, 0x3d13, 0x3d13,
|
||||
0x3d13, 0x3d14, 0x3d14, 0x3d15, 0x3d15, 0x3d16, 0x3d16, 0x3d16, 0x3d17, 0x3d17, 0x3d18, 0x3d18,
|
||||
0x3d18, 0x3d19, 0x3d19, 0x3d1a, 0x3d1a, 0x3d1a, 0x3d1b, 0x3d1b, 0x3d1c, 0x3d1c, 0x3d1c, 0x3d1d,
|
||||
0x3d1d, 0x3d1e, 0x3d1e, 0x3d1f, 0x3d1f, 0x3d1f, 0x3d20, 0x3d20, 0x3d21, 0x3d21, 0x3d21, 0x3d22,
|
||||
0x3d22, 0x3d23, 0x3d23, 0x3d23, 0x3d24, 0x3d24, 0x3d25, 0x3d25, 0x3d25, 0x3d26, 0x3d26, 0x3d27,
|
||||
0x3d27, 0x3d28, 0x3d28, 0x3d28, 0x3d29, 0x3d29, 0x3d2a, 0x3d2a, 0x3d2a, 0x3d2b, 0x3d2b, 0x3d2c,
|
||||
0x3d2c, 0x3d2c, 0x3d2d, 0x3d2d, 0x3d2e, 0x3d2e, 0x3d2e, 0x3d2f, 0x3d2f, 0x3d30, 0x3d30, 0x3d31,
|
||||
0x3d31, 0x3d31, 0x3d32, 0x3d32, 0x3d33, 0x3d33, 0x3d33, 0x3d34, 0x3d34, 0x3d35, 0x3d35, 0x3d35,
|
||||
0x3d36, 0x3d36, 0x3d37, 0x3d37, 0x3d38, 0x3d38, 0x3d38, 0x3d39, 0x3d39, 0x3d3a, 0x3d3a, 0x3d3a,
|
||||
0x3d3b, 0x3d3b, 0x3d3c, 0x3d3c, 0x3d3c, 0x3d3d, 0x3d3d, 0x3d3e, 0x3d3e, 0x3d3e, 0x3d3f, 0x3d3f,
|
||||
0x3d40, 0x3d40, 0x3d41, 0x3d41, 0x3d41, 0x3d42, 0x3d42, 0x3d43, 0x3d43, 0x3d43, 0x3d44, 0x3d44,
|
||||
0x3d45, 0x3d45, 0x3d45, 0x3d46, 0x3d46, 0x3d47, 0x3d47, 0x3d47, 0x3d48, 0x3d48, 0x3d49, 0x3d49,
|
||||
0x3d4a, 0x3d4a, 0x3d4a, 0x3d4b, 0x3d4b, 0x3d4c, 0x3d4c, 0x3d4c, 0x3d4d, 0x3d4d, 0x3d4e, 0x3d4e,
|
||||
0x3d4e, 0x3d4f, 0x3d4f, 0x3d50, 0x3d50, 0x3d50, 0x3d51, 0x3d51, 0x3d52, 0x3d52, 0x3d53, 0x3d53,
|
||||
0x3d53, 0x3d54, 0x3d54, 0x3d55, 0x3d55, 0x3d55, 0x3d56, 0x3d56, 0x3d57, 0x3d57, 0x3d57, 0x3d58,
|
||||
0x3d58, 0x3d59, 0x3d59, 0x3d59, 0x3d5a, 0x3d5a, 0x3d5b, 0x3d5b, 0x3d5c, 0x3d5c, 0x3d5c, 0x3d5d,
|
||||
0x3d5d, 0x3d5e, 0x3d5e, 0x3d5e, 0x3d5f, 0x3d5f, 0x3d60, 0x3d60, 0x3d60, 0x3d60, 0x3d60, 0x3d61,
|
||||
0x3d61, 0x3d62, 0x3d62, 0x3d62, 0x3d63, 0x3d63, 0x3d64, 0x3d64, 0x3d64, 0x3d65, 0x3d65, 0x3d66,
|
||||
0x3d66, 0x3d66, 0x3d67, 0x3d67, 0x3d68, 0x3d68, 0x3d68, 0x3d69, 0x3d69, 0x3d6a, 0x3d6a, 0x3d6b,
|
||||
0x3d6b, 0x3d6b, 0x3d6c, 0x3d6c, 0x3d6d, 0x3d6d, 0x3d6d, 0x3d6e, 0x3d6e, 0x3d6f, 0x3d6f, 0x3d6f,
|
||||
0x3d70, 0x3d70, 0x3d71, 0x3d71, 0x3d71, 0x3d72, 0x3d72, 0x3d73, 0x3d73, 0x3d74, 0x3d74, 0x3d74,
|
||||
0x3d75, 0x3d75, 0x3d76, 0x3d76, 0x3d76, 0x3d77, 0x3d77, 0x3d78, 0x3d78, 0x3d78, 0x3d79, 0x3d79,
|
||||
0x3d7a, 0x3d7a, 0x3d7a, 0x3d7b, 0x3d7b, 0x3d7c, 0x3d7c, 0x3d7d, 0x3d7d, 0x3d7d, 0x3d7e, 0x3d7e,
|
||||
0x3d7f, 0x3d7f, 0x3d7f, 0x3d7f, 0x3d81, 0x3d81, 0x3d81, 0x3d81, 0x3d81, 0x3d82, 0x3d82, 0x3d82,
|
||||
0x3d82, 0x3d82, 0x3d83, 0x3d83, 0x3d83, 0x3d83, 0x3d83, 0x3d84, 0x3d84, 0x3d84, 0x3d84, 0x3d85,
|
||||
0x3d85, 0x3d85, 0x3d85, 0x3d85, 0x3d86, 0x3d86, 0x3d86, 0x3d86, 0x3d86, 0x3d87, 0x3d87, 0x3d87,
|
||||
0x3d87, 0x3d87, 0x3d88, 0x3d88, 0x3d88, 0x3d88, 0x3d88, 0x3d89, 0x3d89, 0x3d89, 0x3d89, 0x3d89,
|
||||
0x3d8a, 0x3d8a, 0x3d8a, 0x3d8a, 0x3d8a, 0x3d8b, 0x3d8b, 0x3d8b, 0x3d8b, 0x3d8b, 0x3d8c, 0x3d8c,
|
||||
0x3d8c, 0x3d8c, 0x3d8c, 0x3d8d, 0x3d8d, 0x3d8d, 0x3d8d, 0x3d8e, 0x3d8e, 0x3d8e, 0x3d8e, 0x3d8e,
|
||||
0x3d8f, 0x3d8f, 0x3d8f, 0x3d8f, 0x3d8f, 0x3d90, 0x3d90, 0x3d90, 0x3d90, 0x3d90, 0x3d91, 0x3d91,
|
||||
0x3d91, 0x3d91, 0x3d91, 0x3d92, 0x3d92, 0x3d92, 0x3d92, 0x3d92, 0x3d93, 0x3d93, 0x3d93, 0x3d93,
|
||||
0x3d93, 0x3d94, 0x3d94, 0x3d94, 0x3d94, 0x3d94, 0x3d95, 0x3d95, 0x3d95, 0x3d95, 0x3d96, 0x3d96,
|
||||
0x3d96, 0x3d96, 0x3d96, 0x3d97, 0x3d97, 0x3d97, 0x3d97, 0x3d97, 0x3d98, 0x3d98, 0x3d98, 0x3d98,
|
||||
0x3d98, 0x3d99, 0x3d99, 0x3d99, 0x3d99, 0x3d99, 0x3d99, 0x3d99, 0x3d99, 0x3d99, 0x3d99, 0x3d9a,
|
||||
0x3d9a, 0x3d9a, 0x3d9a, 0x3d9a, 0x3d9b, 0x3d9b, 0x3d9b, 0x3d9b, 0x3d9b, 0x3d9c, 0x3d9c, 0x3d9c,
|
||||
0x3d9c, 0x3d9c, 0x3d9d, 0x3d9d, 0x3d9d, 0x3d9d, 0x3d9e, 0x3d9e, 0x3d9e, 0x3d9e, 0x3d9e, 0x3d9f,
|
||||
0x3d9f, 0x3d9f, 0x3d9f, 0x3d9f, 0x3da0, 0x3da0, 0x3da0, 0x3da0, 0x3da0, 0x3da1, 0x3da1, 0x3da1,
|
||||
0x3da1, 0x3da1, 0x3da2, 0x3da2, 0x3da2, 0x3da2, 0x3da2, 0x3da3, 0x3da3, 0x3da3, 0x3da3, 0x3da3,
|
||||
0x3da4, 0x3da4, 0x3da4, 0x3da4, 0x3da4, 0x3da5, 0x3da5, 0x3da5, 0x3da5, 0x3da6, 0x3da6, 0x3da6,
|
||||
0x3da6, 0x3da6, 0x3da7, 0x3da7, 0x3da7, 0x3da7, 0x3da7, 0x3da8, 0x3da8, 0x3da8, 0x3da8, 0x3da8,
|
||||
0x3da9, 0x3da9, 0x3da9, 0x3da9, 0x3da9, 0x3daa, 0x3daa, 0x3daa, 0x3daa, 0x3daa, 0x3dab, 0x3dab,
|
||||
0x3dab, 0x3dab, 0x3dab, 0x3dac, 0x3dac, 0x3dac, 0x3dac, 0x3dac, 0x3dad, 0x3dad, 0x3dad, 0x3dad,
|
||||
0x3dad, 0x3daf, 0x3daf, 0x3daf, 0x3daf, 0x3db0, 0x3db0, 0x3db0, 0x3db0, 0x3db0, 0x3db1, 0x3db1,
|
||||
0x3db1, 0x3db1, 0x3db1, 0x3db2, 0x3db2, 0x3db2, 0x3db2, 0x3db2, 0x3db3, 0x3db3, 0x3db3, 0x3db3,
|
||||
0x3db3, 0x3db4, 0x3db4, 0x3db4, 0x3db4, 0x3db4, 0x3db5, 0x3db5, 0x3db5, 0x3db5, 0x3db5, 0x3db6,
|
||||
0x3db6, 0x3db6, 0x3db6, 0x3db6, 0x3db7, 0x3db7, 0x3db7, 0x3db7, 0x3db8, 0x3db8, 0x3db8, 0x3db8,
|
||||
0x3db8, 0x3db9, 0x3db9, 0x3db9, 0x3db9, 0x3db9, 0x3dba, 0x3dba, 0x3dba, 0x3dba, 0x3dba, 0x3dbb,
|
||||
0x3dbb, 0x3dbb, 0x3dbb, 0x3dbb, 0x3dbc, 0x3dbc, 0x3dbc, 0x3dbc, 0x3dbc, 0x3dbd, 0x3dbd, 0x3dbd,
|
||||
0x3dbd, 0x3dbd, 0x3dbe, 0x3dbe, 0x3dbe, 0x3dbe, 0x3dbe, 0x3dbf, 0x3dbf, 0x3dbf, 0x3dbf, 0x3dbf,
|
||||
0x3dc0, 0x3dc0, 0x3dc0, 0x3dc0, 0x3dc1, 0x3dc1, 0x3dc1, 0x3dc1, 0x3dc1, 0x3dc1, 0x3dc1, 0x3dc1,
|
||||
0x3dc1, 0x3dc1, 0x3dc2, 0x3dc2, 0x3dc2, 0x3dc2, 0x3dc2, 0x3dc3, 0x3dc3, 0x3dc3, 0x3dc3, 0x3dc3,
|
||||
0x3dc4, 0x3dc4, 0x3dc4, 0x3dc4, 0x3dc4, 0x3dc5, 0x3dc5, 0x3dc5, 0x3dc5, 0x3dc5, 0x3dc6, 0x3dc6,
|
||||
0x3dc6, 0x3dc6, 0x3dc6, 0x3dc7, 0x3dc7, 0x3dc7, 0x3dc7, 0x3dc7, 0x3dc8, 0x3dc8, 0x3dc8, 0x3dc8,
|
||||
0x3dc9, 0x3dc9, 0x3dc9, 0x3dc9, 0x3dc9, 0x3dca, 0x3dca, 0x3dca, 0x3dca, 0x3dca, 0x3dcb, 0x3dcb,
|
||||
0x3dcb, 0x3dcb, 0x3dcb, 0x3dcc, 0x3dcd, 0x3dce, 0x3dcf, 0x3dd0, 0x3dd1, 0x3dd2, 0x3dd3, 0x3dd4,
|
||||
0x3dd5, 0x3dd6, 0x3dd7, 0x3dd8, 0x3dd9, 0x3dda, 0x3ddb, 0x3ddc, 0x3ddd, 0x3dde, 0x3ddf, 0x3de0,
|
||||
0x3de1, 0x3de2, 0x3de3, 0x3de4,
|
||||
};
|
||||
|
||||
// <! gen atan f(x) = atan(x)
|
||||
static double _gen_atan(float i) { return atan(i); }
|
||||
|
||||
static void tl_lut_ref(uint16_t *ofmap, uint16_t *ifmap, cvk_tl_shape_t ifmap_shape) {
|
||||
assert(ofmap);
|
||||
|
||||
for (uint32_t i = 0; i < tl_shape_size(&ifmap_shape); i++) {
|
||||
float f = convert_bf16_fp32(ifmap[i]);
|
||||
double v = _gen_atan(f);
|
||||
ofmap[i] = convert_fp32_bf16(v);
|
||||
|
||||
if (mode == PRE_DATA_COMPARE_FIX) {
|
||||
ofmap[i] = golden_bf16[i];
|
||||
} else if (mode == DATA_COMPARE_U8) {
|
||||
ofmap[i] = (uint8_t)convert_bf16_s8(ofmap[i]);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static bool verify(uint16_t *ofmap_data, uint16_t *ref_data, uint16_t *ifmap, uint64_t ifmap_size,
|
||||
float epsilon) {
|
||||
uint64_t size = ifmap_size;
|
||||
|
||||
for (uint64_t i = 0; i < size; i++) {
|
||||
bool is_close;
|
||||
uint16_t ref = ref_data[i];
|
||||
uint16_t ofmap_data_bf16;
|
||||
float ref_f;
|
||||
float ofmap_data_f;
|
||||
|
||||
ref_f = convert_bf16_fp32(ref);
|
||||
ofmap_data_f = convert_bf16_fp32(ofmap_data[i]);
|
||||
ofmap_data_bf16 = ofmap_data[i];
|
||||
|
||||
if (mode == PRE_DATA_COMPARE_FIX) {
|
||||
is_close = ofmap_data[i] == ref;
|
||||
} else {
|
||||
is_close = fabs(ref_f - ofmap_data_f) < epsilon;
|
||||
}
|
||||
|
||||
if (!is_close) {
|
||||
float input = convert_bf16_fp32(ifmap[i]);
|
||||
fprintf(stderr,
|
||||
"comparing failed at ofmap_data[%lu](input:%f)\n"
|
||||
"\tgot %x, exp %x, fp32: got %f exp %f, atan(%f) = %f\n",
|
||||
i, input, ofmap_data_bf16, ref, ofmap_data_f, ref_f, input, _gen_atan(input));
|
||||
exit(-1);
|
||||
}
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
static void gen_input(uint16_t *input_data, uint64_t ifmap_size, TEST_MODE mode, int range_start,
|
||||
int range_end) {
|
||||
if (mode == PRE_DATA_COMPARE_FIX) {
|
||||
memcpy(input_data, &test_pattern, sizeof(test_pattern));
|
||||
} else {
|
||||
std::random_device rd;
|
||||
std::mt19937 e2(rd());
|
||||
std::uniform_real_distribution<> dist(range_start, range_end);
|
||||
int table_hw = 256;
|
||||
for (uint64_t i = 0; i < ifmap_size; i++) {
|
||||
// input range is -8 ~ +8
|
||||
float input =
|
||||
((int)i % (range_end - 2)) * (((int)i % 2) ? 1 : -1) + 0.03 + (i % table_hw) * 0.002;
|
||||
// float input = ((int)i % 10) * (((int)i % 2) ? 1 : -1) + 0.03 + (i %
|
||||
// table_hw) * 0.002; float input = dist(e2); input = ((int)i %
|
||||
// (range_end-2)) * (((int)i % 2) ? 1 : 1) + 0.03 + (i % table_hw) *
|
||||
// 0.002; if (input < 1 && input > 0) {
|
||||
// input = 111.9;
|
||||
//}
|
||||
input_data[i] = convert_fp32_bf16(input);
|
||||
}
|
||||
input_data[0] = convert_fp32_bf16(0);
|
||||
input_data[1] = convert_fp32_bf16(1);
|
||||
input_data[2] = convert_fp32_bf16(-1);
|
||||
}
|
||||
|
||||
#ifdef DBG
|
||||
for (uint64_t i = 0; i < ifmap_size; i++) {
|
||||
printf("source if[%lu] bf16 %f 0x%x, log2f is %f\n", i, convert_bf16_fp32(input_data[i]),
|
||||
input_data[i], floor(log2((convert_bf16_fp32(input_data[i])))));
|
||||
}
|
||||
#endif /* ifdef DBG */
|
||||
}
|
||||
|
||||
static void testbench(CVI_RT_HANDLE *ctx, cvk_context_t *bmk) {
|
||||
// TODO: check more shape / align
|
||||
cvk_chip_info_t chip_info = bmk->info;
|
||||
|
||||
uint32_t input_n = 1;
|
||||
uint32_t input_c = chip_info.npu_num;
|
||||
uint32_t input_h = 16;
|
||||
uint32_t input_w = 16;
|
||||
float epsilon = 0.01;
|
||||
int range_start = -8;
|
||||
int range_end = 8;
|
||||
cvk_fmt_t fmt = CVK_FMT_BF16;
|
||||
|
||||
if (mode == PRE_DATA_COMPARE_FIX) {
|
||||
input_h = 4;
|
||||
input_w = 8;
|
||||
}
|
||||
|
||||
cvk_tl_shape_t ifmap_shape = {input_n, input_c, input_h, input_w};
|
||||
cvk_tl_shape_t ofmap_shape = ifmap_shape;
|
||||
uint64_t ifmap_size = tl_shape_size(&ifmap_shape);
|
||||
uint64_t ofmap_size = tl_shape_size(&ofmap_shape);
|
||||
|
||||
int data_type_size = bytesize_of_fmt(fmt);
|
||||
uint64_t ifmap_bytesize = ifmap_size * data_type_size;
|
||||
uint64_t ofmap_bytesize = ofmap_size * data_type_size;
|
||||
|
||||
// get lut table shape and size
|
||||
cvk_tl_shape_t table_shape;
|
||||
uint64_t table_bytesize = cvm_lut_tbl_bytesize(bmk, &table_shape, fmt);
|
||||
|
||||
cvk_tl_t *tl_ifmap = test_alloc_tl(bmk, ifmap_shape, fmt, /*align*/ 1);
|
||||
cvk_tl_t *tl_ofmap_bf16 = test_alloc_tl(bmk, ofmap_shape, fmt, /*align*/ 1);
|
||||
cvk_tl_t *out = tl_ofmap_bf16;
|
||||
|
||||
// atan buf
|
||||
cvk_tl_t *tl_y0_buf = test_alloc_tl(bmk, table_shape, fmt, /*align*/ 1);
|
||||
cvk_tl_t *tl_slope_buf = test_alloc_tl(bmk, table_shape, fmt, /*align*/ 1);
|
||||
cvk_tl_t *tl_invert_buf = test_alloc_tl(bmk, table_shape, fmt, /*align*/ 1);
|
||||
cvk_tl_t *tl_pos_neg_buf = test_alloc_tl(bmk, table_shape, fmt, /*align*/ 1);
|
||||
|
||||
// reciprocal buf
|
||||
cvk_tl_t *tl_reciprocal_table_answer = test_alloc_tl(bmk, table_shape, fmt, /*align*/ 1);
|
||||
cvk_tl_t *tl_reciprocal_table_answer_mantissa = test_alloc_tl(bmk, table_shape, fmt, /*align*/ 1);
|
||||
|
||||
// temp buf
|
||||
cvk_tl_t *tl_buf = test_alloc_tl(bmk, ofmap_shape, fmt, /*align*/ 1);
|
||||
cvk_tl_t *tl_buf2 = test_alloc_tl(bmk, tl_ofmap_bf16->shape, fmt, /*align*/ 1);
|
||||
cvk_tl_t *tl_buf4 = test_alloc_tl(bmk, tl_ofmap_bf16->shape, fmt, /*align*/ 1);
|
||||
|
||||
uint16_t *input_data = (uint16_t *)xmalloc(ifmap_bytesize);
|
||||
uint16_t *ref_data = (uint16_t *)xmalloc(ofmap_bytesize);
|
||||
|
||||
// for reciprocal
|
||||
uint16_t *table_reciprocal_data = (uint16_t *)xmalloc(table_bytesize);
|
||||
uint16_t *table_reciprocal_data_mantissa = (uint16_t *)xmalloc(table_bytesize);
|
||||
|
||||
// for atan
|
||||
uint16_t *table_data_atan_y0 = (uint16_t *)xmalloc(table_bytesize);
|
||||
uint16_t *table_data_atan_slope = (uint16_t *)xmalloc(table_bytesize);
|
||||
uint16_t *table_data_atan_invert = (uint16_t *)xmalloc(table_bytesize);
|
||||
uint16_t *table_data_atan_pos_neg = (uint16_t *)xmalloc(table_bytesize);
|
||||
|
||||
gen_input(input_data, ifmap_size, mode, range_start, range_end);
|
||||
tl_lut_ref(ref_data, input_data, ifmap_shape);
|
||||
|
||||
cvm_reciprocal_tbl(table_reciprocal_data, table_reciprocal_data_mantissa, &table_shape);
|
||||
cvm_atan_tbl(table_data_atan_y0, table_data_atan_slope, table_data_atan_invert,
|
||||
table_data_atan_pos_neg, &table_shape);
|
||||
|
||||
test_put_tensor_g2l_comp(ctx, bmk, tl_ifmap, (uint8_t *)input_data);
|
||||
test_put_tensor_g2l_comp(ctx, bmk, tl_reciprocal_table_answer, (uint8_t *)table_reciprocal_data);
|
||||
test_put_tensor_g2l_comp(ctx, bmk, tl_reciprocal_table_answer_mantissa,
|
||||
(uint8_t *)table_reciprocal_data_mantissa);
|
||||
|
||||
// prepare atan
|
||||
test_put_tensor_g2l_comp(ctx, bmk, tl_y0_buf, (uint8_t *)table_data_atan_y0);
|
||||
test_put_tensor_g2l_comp(ctx, bmk, tl_slope_buf, (uint8_t *)table_data_atan_slope);
|
||||
test_put_tensor_g2l_comp(ctx, bmk, tl_invert_buf, (uint8_t *)table_data_atan_invert);
|
||||
test_put_tensor_g2l_comp(ctx, bmk, tl_pos_neg_buf, (uint8_t *)table_data_atan_pos_neg);
|
||||
|
||||
cvm_atan_emit(bmk, tl_ifmap, tl_buf, tl_buf2, tl_buf4, tl_y0_buf, tl_slope_buf, tl_invert_buf,
|
||||
tl_pos_neg_buf, tl_reciprocal_table_answer, tl_reciprocal_table_answer_mantissa,
|
||||
tl_ofmap_bf16, fmt);
|
||||
|
||||
test_submit_comp(ctx, bmk);
|
||||
|
||||
uint16_t *ofmap_data = (uint16_t *)test_get_tensor_l2g_comp(ctx, bmk, out);
|
||||
verify(ofmap_data, ref_data, input_data, ifmap_size, epsilon);
|
||||
|
||||
free_tl(bmk, tl_buf4);
|
||||
free_tl(bmk, tl_buf2);
|
||||
free_tl(bmk, tl_buf);
|
||||
free_tl(bmk, tl_reciprocal_table_answer_mantissa);
|
||||
free_tl(bmk, tl_reciprocal_table_answer);
|
||||
free_tl(bmk, tl_pos_neg_buf);
|
||||
free_tl(bmk, tl_invert_buf);
|
||||
free_tl(bmk, tl_slope_buf);
|
||||
free_tl(bmk, tl_y0_buf);
|
||||
free_tl(bmk, tl_ofmap_bf16);
|
||||
free_tl(bmk, tl_ifmap);
|
||||
|
||||
free(table_data_atan_y0);
|
||||
free(table_data_atan_slope);
|
||||
free(table_data_atan_invert);
|
||||
free(table_data_atan_pos_neg);
|
||||
free(table_reciprocal_data);
|
||||
free(table_reciprocal_data_mantissa);
|
||||
free(input_data);
|
||||
free(ref_data);
|
||||
free(ofmap_data);
|
||||
}
|
||||
|
||||
int main() {
|
||||
cvk_context_t *bmk = NULL;
|
||||
int round_mode;
|
||||
round_mode = set_store_feround();
|
||||
|
||||
CVI_RT_HANDLE ctx;
|
||||
test_init(&ctx, &bmk);
|
||||
|
||||
// for (int i = PRE_DATA_COMPARE_FIX; i < TEST_MODE_MAX; i++)
|
||||
// for (int i = PRE_DATA_COMPARE_FIX; i < DATA_COMPARE_ACCURACY; i++)
|
||||
for (int i = PRE_DATA_COMPARE_FIX; i < DATA_COMPARE_U8; i++)
|
||||
// for (int i = DATA_COMPARE_ACCURACY; i < DATA_COMPARE_U8; i++)
|
||||
{
|
||||
mode = static_cast<TEST_MODE>(i);
|
||||
printf("test mode %d...\n", mode);
|
||||
testbench(&ctx, bmk);
|
||||
}
|
||||
printf("pass\n");
|
||||
|
||||
test_exit(&ctx, bmk);
|
||||
restore_feround(round_mode);
|
||||
return 0;
|
||||
}
|
||||
667
cvimath/tests/cvi1835/atan2_degree.cpp
Normal file
667
cvimath/tests/cvi1835/atan2_degree.cpp
Normal file
@ -0,0 +1,667 @@
|
||||
/**
|
||||
* \breif atan2 is implemented by atan, you can refer
|
||||
* [wiki](https://en.wikipedia.org/wiki/Atan2) for more details
|
||||
*/
|
||||
|
||||
#include <cvimath_internal.h>
|
||||
#include <test_cvikernel_util.h>
|
||||
|
||||
#define OUT
|
||||
#define IN
|
||||
#include <cfloat>
|
||||
#include <iomanip>
|
||||
#include <iostream>
|
||||
#include <map>
|
||||
#include <random>
|
||||
#include <string>
|
||||
//#define DBG
|
||||
|
||||
/**
|
||||
* pre_data means we test fixed pattern, it should be same sa lut
|
||||
*/
|
||||
enum TEST_MODE {
|
||||
PRE_DATA_COMPARE_FIX = 0, // pre-data + fix compare
|
||||
DATA_COMPARE_ACCURACY, // generate \range_start to \range_end value that
|
||||
// check epsilon, default set x > 0, y > 0
|
||||
|
||||
DATA_COMPARE_ACCURACY_X_GT_0, // atan(y/x), x > 0, y = 0
|
||||
DATA_COMPARE_ACCURACY_X_LT_0_Y_GE_0, // atan(y/x) + PI , x < 0 and y >= 0
|
||||
DATA_COMPARE_ACCURACY_X_LT_0_Y_LT_0, // atan(y/x) - PI , x < 0 and y < 0
|
||||
DATA_COMPARE_ACCURACY_X_0_Y_GT_0, // pi / 2, x = 0 and y > 0
|
||||
DATA_COMPARE_ACCURACY_X_0_Y_LT_0, // -pi / 2, x = 0 and y < 0
|
||||
DATA_COMPARE_U8, // generate \range_start to \range_end value that check
|
||||
// epsilon, result bf16->uint8_t
|
||||
TEST_MODE_MAX,
|
||||
};
|
||||
|
||||
static TEST_MODE mode;
|
||||
|
||||
static uint16_t test_pattern[] = {
|
||||
0x0000, 0x38D2, 0x3952, 0x399D, 0x39D2, 0x3A03, 0x3A1D, 0x3A38, 0x3A52, 0x3A6C, 0x3A83, 0x3A90,
|
||||
0x3A9D, 0x3AAA, 0x3AB8, 0x3AC5, 0x3AD2, 0x3ADF, 0x3AEC, 0x3AF9, 0x3B03, 0x3B0A, 0x3B10, 0x3B17,
|
||||
0x3B1D, 0x3B24, 0x3B2A, 0x3B31, 0x3B38, 0x3B3E, 0x3B45, 0x3B4B, 0x3B52, 0x3B58, 0x3B5F, 0x3B65,
|
||||
0x3B6C, 0x3B72, 0x3B79, 0x3B80, 0x3B83, 0x3B86, 0x3B8A, 0x3B8D, 0x3B90, 0x3B93, 0x3B97, 0x3B9A,
|
||||
0x3B9D, 0x3BA1, 0x3BA4, 0x3BA7, 0x3BAA, 0x3BAE, 0x3BB1, 0x3BB4, 0x3BB8, 0x3BBB, 0x3BBE, 0x3BC1,
|
||||
0x3BC5, 0x3BC8, 0x3BCB, 0x3BCE, 0x3BD2, 0x3BD5, 0x3BD8, 0x3BDC, 0x3BDF, 0x3BE2, 0x3BE5, 0x3BE9,
|
||||
0x3BEC, 0x3BEF, 0x3BF2, 0x3BF6, 0x3BF9, 0x3BFC, 0x3C00, 0x3C01, 0x3C03, 0x3C05, 0x3C06, 0x3C08,
|
||||
0x3C0A, 0x3C0B, 0x3C0D, 0x3C0F, 0x3C10, 0x3C12, 0x3C13, 0x3C15, 0x3C17, 0x3C18, 0x3C1A, 0x3C1C,
|
||||
0x3C1D, 0x3C1F, 0x3C21, 0x3C22, 0x3C24, 0x3C25, 0x3C27, 0x3C29, 0x3C2A, 0x3C2C, 0x3C2E, 0x3C2F,
|
||||
0x3C31, 0x3C33, 0x3C34, 0x3C36, 0x3C38, 0x3C39, 0x3C3B, 0x3C3C, 0x3C3E, 0x3C40, 0x3C41, 0x3C43,
|
||||
0x3C45, 0x3C46, 0x3C48, 0x3C4A, 0x3C4B, 0x3C4D, 0x3C4E, 0x3C50, 0x3C52, 0x3C53, 0x3C55, 0x3C57,
|
||||
0x3C58, 0x3C5A, 0x3C5C, 0x3C5D, 0x3C5F, 0x3C60, 0x3C62, 0x3C64, 0x3C65, 0x3C67, 0x3C69, 0x3C6A,
|
||||
0x3C6C, 0x3C6E, 0x3C6F, 0x3C71, 0x3C72, 0x3C74, 0x3C76, 0x3C77, 0x3C79, 0x3C7B, 0x3C7C, 0x3C7E,
|
||||
0x3C80, 0x3C81, 0x3C81, 0x3C82, 0x3C83, 0x3C84, 0x3C85, 0x3C86, 0x3C86, 0x3C87, 0x3C88, 0x3C89,
|
||||
0x3C8A, 0x3C8A, 0x3C8B, 0x3C8C, 0x3C8D, 0x3C8E, 0x3C8F, 0x3C8F, 0x3C90, 0x3C91, 0x3C92, 0x3C93,
|
||||
0x3C93, 0x3C94, 0x3C95, 0x3C96, 0x3C97, 0x3C98, 0x3C98, 0x3C99, 0x3C9A, 0x3C9B, 0x3C9C, 0x3C9C,
|
||||
0x3C9D, 0x3C9E, 0x3C9F, 0x3CA0, 0x3CA1, 0x3CA1, 0x3CA2, 0x3CA3, 0x3CA4, 0x3CA5, 0x3CA5, 0x3CA6,
|
||||
0x3CA7, 0x3CA8, 0x3CA9, 0x3CAA, 0x3CAA, 0x3CAB, 0x3CAC, 0x3CAD, 0x3CAE, 0x3CAE, 0x3CAF, 0x3CB0,
|
||||
0x3CB1, 0x3CB2, 0x3CB3, 0x3CB3, 0x3CB4, 0x3CB5, 0x3CB6, 0x3CB7, 0x3CB8, 0x3CB8, 0x3CB9, 0x3CBA,
|
||||
0x3CBB, 0x3CBC, 0x3CBC, 0x3CBD, 0x3CBE, 0x3CBF, 0x3CC0, 0x3CC1, 0x3CC1, 0x3CC2, 0x3CC3, 0x3CC4,
|
||||
0x3CC5, 0x3CC5, 0x3CC6, 0x3CC7, 0x3CC8, 0x3CC9, 0x3CCA, 0x3CCA, 0x3CCB, 0x3CCC, 0x3CCD, 0x3CCE,
|
||||
0x3CCE, 0x3CCF, 0x3CD0, 0x3CD1, 0x3CD2, 0x3CD3, 0x3CD3, 0x3CD4, 0x3CD5, 0x3CD6, 0x3CD7, 0x3CD7,
|
||||
0x3CD8, 0x3CD9, 0x3CDA, 0x3CDB, 0x3CDC, 0x3CDC, 0x3CDD, 0x3CDE, 0x3CDF, 0x3CE0, 0x3CE0, 0x3CE1,
|
||||
0x3CE2, 0x3CE3, 0x3CE4, 0x3CE5, 0x3CE5, 0x3CE6, 0x3CE7, 0x3CE8, 0x3CE9, 0x3CE9, 0x3CEA, 0x3CEB,
|
||||
0x3CEC, 0x3CED, 0x3CEE, 0x3CEE, 0x3CEF, 0x3CF0, 0x3CF1, 0x3CF2, 0x3CF2, 0x3CF3, 0x3CF4, 0x3CF5,
|
||||
0x3CF6, 0x3CF7, 0x3CF7, 0x3CF8, 0x3CF9, 0x3CFA, 0x3CFB, 0x3CFB, 0x3CFC, 0x3CFD, 0x3CFE, 0x3CFF,
|
||||
0x3D00, 0x3D00, 0x3D01, 0x3D01, 0x3D01, 0x3D02, 0x3D02, 0x3D03, 0x3D03, 0x3D03, 0x3D04, 0x3D04,
|
||||
0x3D05, 0x3D05, 0x3D06, 0x3D06, 0x3D06, 0x3D07, 0x3D07, 0x3D08, 0x3D08, 0x3D08, 0x3D09, 0x3D09,
|
||||
0x3D0A, 0x3D0A, 0x3D0A, 0x3D0B, 0x3D0B, 0x3D0C, 0x3D0C, 0x3D0C, 0x3D0D, 0x3D0D, 0x3D0E, 0x3D0E,
|
||||
0x3D0F, 0x3D0F, 0x3D0F, 0x3D10, 0x3D10, 0x3D11, 0x3D11, 0x3D11, 0x3D12, 0x3D12, 0x3D13, 0x3D13,
|
||||
0x3D13, 0x3D14, 0x3D14, 0x3D15, 0x3D15, 0x3D16, 0x3D16, 0x3D16, 0x3D17, 0x3D17, 0x3D18, 0x3D18,
|
||||
0x3D18, 0x3D19, 0x3D19, 0x3D1A, 0x3D1A, 0x3D1A, 0x3D1B, 0x3D1B, 0x3D1C, 0x3D1C, 0x3D1C, 0x3D1D,
|
||||
0x3D1D, 0x3D1E, 0x3D1E, 0x3D1F, 0x3D1F, 0x3D1F, 0x3D20, 0x3D20, 0x3D21, 0x3D21, 0x3D21, 0x3D22,
|
||||
0x3D22, 0x3D23, 0x3D23, 0x3D23, 0x3D24, 0x3D24, 0x3D25, 0x3D25, 0x3D25, 0x3D26, 0x3D26, 0x3D27,
|
||||
0x3D27, 0x3D28, 0x3D28, 0x3D28, 0x3D29, 0x3D29, 0x3D2A, 0x3D2A, 0x3D2A, 0x3D2B, 0x3D2B, 0x3D2C,
|
||||
0x3D2C, 0x3D2C, 0x3D2D, 0x3D2D, 0x3D2E, 0x3D2E, 0x3D2E, 0x3D2F, 0x3D2F, 0x3D30, 0x3D30, 0x3D31,
|
||||
0x3D31, 0x3D31, 0x3D32, 0x3D32, 0x3D33, 0x3D33, 0x3D33, 0x3D34, 0x3D34, 0x3D35, 0x3D35, 0x3D35,
|
||||
0x3D36, 0x3D36, 0x3D37, 0x3D37, 0x3D38, 0x3D38, 0x3D38, 0x3D39, 0x3D39, 0x3D3A, 0x3D3A, 0x3D3A,
|
||||
0x3D3B, 0x3D3B, 0x3D3C, 0x3D3C, 0x3D3C, 0x3D3D, 0x3D3D, 0x3D3E, 0x3D3E, 0x3D3E, 0x3D3F, 0x3D3F,
|
||||
0x3D40, 0x3D40, 0x3D41, 0x3D41, 0x3D41, 0x3D42, 0x3D42, 0x3D43, 0x3D43, 0x3D43, 0x3D44, 0x3D44,
|
||||
0x3D45, 0x3D45, 0x3D45, 0x3D46, 0x3D46, 0x3D47, 0x3D47, 0x3D47, 0x3D48, 0x3D48, 0x3D49, 0x3D49,
|
||||
0x3D4A, 0x3D4A, 0x3D4A, 0x3D4B, 0x3D4B, 0x3D4C, 0x3D4C, 0x3D4C, 0x3D4D, 0x3D4D, 0x3D4E, 0x3D4E,
|
||||
0x3D4E, 0x3D4F, 0x3D4F, 0x3D50, 0x3D50, 0x3D50, 0x3D51, 0x3D51, 0x3D52, 0x3D52, 0x3D53, 0x3D53,
|
||||
0x3D53, 0x3D54, 0x3D54, 0x3D55, 0x3D55, 0x3D55, 0x3D56, 0x3D56, 0x3D57, 0x3D57, 0x3D57, 0x3D58,
|
||||
0x3D58, 0x3D59, 0x3D59, 0x3D59, 0x3D5A, 0x3D5A, 0x3D5B, 0x3D5B, 0x3D5C, 0x3D5C, 0x3D5C, 0x3D5D,
|
||||
0x3D5D, 0x3D5E, 0x3D5E, 0x3D5E, 0x3D5F, 0x3D5F, 0x3D60, 0x3D60, 0x3D60, 0x3D61, 0x3D61, 0x3D62,
|
||||
0x3D62, 0x3D63, 0x3D63, 0x3D63, 0x3D64, 0x3D64, 0x3D65, 0x3D65, 0x3D65, 0x3D66, 0x3D66, 0x3D67,
|
||||
0x3D67, 0x3D67, 0x3D68, 0x3D68, 0x3D69, 0x3D69, 0x3D69, 0x3D6A, 0x3D6A, 0x3D6B, 0x3D6B, 0x3D6C,
|
||||
0x3D6C, 0x3D6C, 0x3D6D, 0x3D6D, 0x3D6E, 0x3D6E, 0x3D6E, 0x3D6F, 0x3D6F, 0x3D70, 0x3D70, 0x3D70,
|
||||
0x3D71, 0x3D71, 0x3D72, 0x3D72, 0x3D72, 0x3D73, 0x3D73, 0x3D74, 0x3D74, 0x3D75, 0x3D75, 0x3D75,
|
||||
0x3D76, 0x3D76, 0x3D77, 0x3D77, 0x3D77, 0x3D78, 0x3D78, 0x3D79, 0x3D79, 0x3D79, 0x3D7A, 0x3D7A,
|
||||
0x3D7B, 0x3D7B, 0x3D7B, 0x3D7C, 0x3D7C, 0x3D7D, 0x3D7D, 0x3D7E, 0x3D7E, 0x3D7E, 0x3D7F, 0x3D7F,
|
||||
0x3D80, 0x3D80, 0x3D80, 0x3D80, 0x3D81, 0x3D81, 0x3D81, 0x3D81, 0x3D81, 0x3D82, 0x3D82, 0x3D82,
|
||||
0x3D82, 0x3D82, 0x3D83, 0x3D83, 0x3D83, 0x3D83, 0x3D83, 0x3D84, 0x3D84, 0x3D84, 0x3D84, 0x3D85,
|
||||
0x3D85, 0x3D85, 0x3D85, 0x3D85, 0x3D86, 0x3D86, 0x3D86, 0x3D86, 0x3D86, 0x3D87, 0x3D87, 0x3D87,
|
||||
0x3D87, 0x3D87, 0x3D88, 0x3D88, 0x3D88, 0x3D88, 0x3D88, 0x3D89, 0x3D89, 0x3D89, 0x3D89, 0x3D89,
|
||||
0x3D8A, 0x3D8A, 0x3D8A, 0x3D8A, 0x3D8A, 0x3D8B, 0x3D8B, 0x3D8B, 0x3D8B, 0x3D8B, 0x3D8C, 0x3D8C,
|
||||
0x3D8C, 0x3D8C, 0x3D8C, 0x3D8D, 0x3D8D, 0x3D8D, 0x3D8D, 0x3D8E, 0x3D8E, 0x3D8E, 0x3D8E, 0x3D8E,
|
||||
0x3D8F, 0x3D8F, 0x3D8F, 0x3D8F, 0x3D8F, 0x3D90, 0x3D90, 0x3D90, 0x3D90, 0x3D90, 0x3D91, 0x3D91,
|
||||
0x3D91, 0x3D91, 0x3D91, 0x3D92, 0x3D92, 0x3D92, 0x3D92, 0x3D92, 0x3D93, 0x3D93, 0x3D93, 0x3D93,
|
||||
0x3D93, 0x3D94, 0x3D94, 0x3D94, 0x3D94, 0x3D94, 0x3D95, 0x3D95, 0x3D95, 0x3D95, 0x3D96, 0x3D96,
|
||||
0x3D96, 0x3D96, 0x3D96, 0x3D97, 0x3D97, 0x3D97, 0x3D97, 0x3D97, 0x3D98, 0x3D98, 0x3D98, 0x3D98,
|
||||
0x3D98, 0x3D99, 0x3D99, 0x3D99, 0x3D99, 0x3D99, 0x3D9A, 0x3D9A, 0x3D9A, 0x3D9A, 0x3D9A, 0x3D9B,
|
||||
0x3D9B, 0x3D9B, 0x3D9B, 0x3D9B, 0x3D9C, 0x3D9C, 0x3D9C, 0x3D9C, 0x3D9C, 0x3D9D, 0x3D9D, 0x3D9D,
|
||||
0x3D9D, 0x3D9D, 0x3D9E, 0x3D9E, 0x3D9E, 0x3D9E, 0x3D9F, 0x3D9F, 0x3D9F, 0x3D9F, 0x3D9F, 0x3DA0,
|
||||
0x3DA0, 0x3DA0, 0x3DA0, 0x3DA0, 0x3DA1, 0x3DA1, 0x3DA1, 0x3DA1, 0x3DA1, 0x3DA2, 0x3DA2, 0x3DA2,
|
||||
0x3DA2, 0x3DA2, 0x3DA3, 0x3DA3, 0x3DA3, 0x3DA3, 0x3DA3, 0x3DA4, 0x3DA4, 0x3DA4, 0x3DA4, 0x3DA4,
|
||||
0x3DA5, 0x3DA5, 0x3DA5, 0x3DA5, 0x3DA5, 0x3DA6, 0x3DA6, 0x3DA6, 0x3DA6, 0x3DA7, 0x3DA7, 0x3DA7,
|
||||
0x3DA7, 0x3DA7, 0x3DA8, 0x3DA8, 0x3DA8, 0x3DA8, 0x3DA8, 0x3DA9, 0x3DA9, 0x3DA9, 0x3DA9, 0x3DA9,
|
||||
0x3DAA, 0x3DAA, 0x3DAA, 0x3DAA, 0x3DAA, 0x3DAB, 0x3DAB, 0x3DAB, 0x3DAB, 0x3DAB, 0x3DAC, 0x3DAC,
|
||||
0x3DAC, 0x3DAC, 0x3DAC, 0x3DAD, 0x3DAD, 0x3DAD, 0x3DAD, 0x3DAD, 0x3DAE, 0x3DAE, 0x3DAE, 0x3DAE,
|
||||
0x3DAE, 0x3DAF, 0x3DAF, 0x3DAF, 0x3DAF, 0x3DB0, 0x3DB0, 0x3DB0, 0x3DB0, 0x3DB0, 0x3DB1, 0x3DB1,
|
||||
0x3DB1, 0x3DB1, 0x3DB1, 0x3DB2, 0x3DB2, 0x3DB2, 0x3DB2, 0x3DB2, 0x3DB3, 0x3DB3, 0x3DB3, 0x3DB3,
|
||||
0x3DB3, 0x3DB4, 0x3DB4, 0x3DB4, 0x3DB4, 0x3DB4, 0x3DB5, 0x3DB5, 0x3DB5, 0x3DB5, 0x3DB5, 0x3DB6,
|
||||
0x3DB6, 0x3DB6, 0x3DB6, 0x3DB6, 0x3DB7, 0x3DB7, 0x3DB7, 0x3DB7, 0x3DB8, 0x3DB8, 0x3DB8, 0x3DB8,
|
||||
0x3DB8, 0x3DB9, 0x3DB9, 0x3DB9, 0x3DB9, 0x3DB9, 0x3DBA, 0x3DBA, 0x3DBA, 0x3DBA, 0x3DBA, 0x3DBB,
|
||||
0x3DBB, 0x3DBB, 0x3DBB, 0x3DBB, 0x3DBC, 0x3DBC, 0x3DBC, 0x3DBC, 0x3DBC, 0x3DBD, 0x3DBD, 0x3DBD,
|
||||
0x3DBD, 0x3DBD, 0x3DBE, 0x3DBE, 0x3DBE, 0x3DBE, 0x3DBE, 0x3DBF, 0x3DBF, 0x3DBF, 0x3DBF, 0x3DBF,
|
||||
0x3DC0, 0x3DC0, 0x3DC0, 0x3DC0, 0x3DC1, 0x3DC1, 0x3DC1, 0x3DC1, 0x3DC1, 0x3DC2, 0x3DC2, 0x3DC2,
|
||||
0x3DC2, 0x3DC2, 0x3DC3, 0x3DC3, 0x3DC3, 0x3DC3, 0x3DC3, 0x3DC4, 0x3DC4, 0x3DC4, 0x3DC4, 0x3DC4,
|
||||
0x3DC5, 0x3DC5, 0x3DC5, 0x3DC5, 0x3DC5, 0x3DC6, 0x3DC6, 0x3DC6, 0x3DC6, 0x3DC6, 0x3DC7, 0x3DC7,
|
||||
0x3DC7, 0x3DC7, 0x3DC7, 0x3DC8, 0x3DC8, 0x3DC8, 0x3DC8, 0x3DC8, 0x3DC9, 0x3DC9, 0x3DC9, 0x3DC9,
|
||||
0x3DCA, 0x3DCA, 0x3DCA, 0x3DCA, 0x3DCA, 0x3DCB, 0x3DCB, 0x3DCB, 0x3DCB, 0x3DCB, 0x3DCC, 0x3DCC,
|
||||
0x3DCC, 0x3DCC, 0x3DCC, 0x3DCD, 0x3DCE, 0x3DCF, 0x3DD0, 0x3DD1, 0x3DD2, 0x3DD3, 0x3DD4, 0x3DD5,
|
||||
0x3DD6, 0x3DD7, 0x3DD8, 0x3DD9, 0x3DDA, 0x3DDB, 0x3DDC, 0x3DDD, 0x3DDE, 0x3DDF, 0x3DE0, 0x3DE1,
|
||||
0x3DE2, 0x3DE3, 0x3DE4, 0x3DE5,
|
||||
};
|
||||
|
||||
static uint16_t golden_bf16[] = {
|
||||
0x42b4, 0x42b4, 0x42b4, 0x42b4, 0x42b4, 0x42b4, 0x42b3, 0x42b3, 0x42b3, 0x42b3, 0x42b3, 0x42b3,
|
||||
0x42b3, 0x42b3, 0x42b3, 0x42b3, 0x42b2, 0x42b2, 0x42b2, 0x42b2, 0x42b2, 0x42b2, 0x42b2, 0x42b2,
|
||||
0x42b2, 0x42b1, 0x42b1, 0x42b1, 0x42b1, 0x42b1, 0x42b1, 0x42b1, 0x42b1, 0x42b1, 0x42b1, 0x42af,
|
||||
0x42af, 0x42af, 0x42af, 0x42af, 0x42af, 0x42af, 0x42af, 0x42af, 0x42af, 0x42ae, 0x42ae, 0x42ae,
|
||||
0x42ae, 0x42ae, 0x42ae, 0x42ae, 0x42ae, 0x42ae, 0x42ad, 0x42ad, 0x42ad, 0x42ad, 0x42ad, 0x42ad,
|
||||
0x42ad, 0x42ad, 0x42ad, 0x42ac, 0x42ac, 0x42ac, 0x42ac, 0x42ac, 0x42ac, 0x42ac, 0x42ac, 0x42ac,
|
||||
0x42ab, 0x42ab, 0x42ab, 0x42ab, 0x42ab, 0x42ab, 0x42ab, 0x42ab, 0x42ab, 0x42aa, 0x42aa, 0x42aa,
|
||||
0x42aa, 0x42aa, 0x42aa, 0x42aa, 0x42aa, 0x42a9, 0x42a9, 0x42a9, 0x42a9, 0x42a9, 0x42a9, 0x42a9,
|
||||
0x42a9, 0x42a9, 0x42a7, 0x42a7, 0x42a7, 0x42a7, 0x42a7, 0x42a7, 0x42a7, 0x42a7, 0x42a6, 0x42a6,
|
||||
0x42a6, 0x42a6, 0x42a6, 0x42a6, 0x42a6, 0x42a6, 0x42a5, 0x42a5, 0x42a5, 0x42a5, 0x42a5, 0x42a5,
|
||||
0x42a5, 0x42a4, 0x42a4, 0x42a4, 0x42a4, 0x42a4, 0x42a4, 0x42a4, 0x42a4, 0x42a4, 0x42a3, 0x42a3,
|
||||
0x42a3, 0x42a3, 0x42a3, 0x42a3, 0x42a3, 0x42a2, 0x42a2, 0x42a2, 0x42a2, 0x42a2, 0x42a2, 0x42a2,
|
||||
0x42a2, 0x42a2, 0x42a1, 0x42a1, 0x42a1, 0x42a1, 0x42a1, 0x42a1, 0x42a1, 0x42a0, 0x42a0, 0x42a0,
|
||||
0x42a0, 0x42a0, 0x42a0, 0x42a0, 0x42a0, 0x429e, 0x429e, 0x429e, 0x429e, 0x429e, 0x429e, 0x429d,
|
||||
0x429d, 0x429d, 0x429d, 0x429d, 0x429d, 0x429d, 0x429d, 0x429d, 0x429c, 0x429c, 0x429c, 0x429c,
|
||||
0x429c, 0x429b, 0x429b, 0x429b, 0x429b, 0x429b, 0x429b, 0x429b, 0x429b, 0x429a, 0x429a, 0x429a,
|
||||
0x429a, 0x429a, 0x429a, 0x4299, 0x4299, 0x4299, 0x4299, 0x4299, 0x4299, 0x4299, 0x4299, 0x4298,
|
||||
0x4298, 0x4298, 0x4298, 0x4298, 0x4298, 0x4297, 0x4297, 0x4297, 0x4297, 0x4297, 0x4297, 0x4296,
|
||||
0x4296, 0x4296, 0x4296, 0x4296, 0x4295, 0x4295, 0x4295, 0x4295, 0x4295, 0x4295, 0x4295, 0x4295,
|
||||
0x4294, 0x4294, 0x4294, 0x4294, 0x4294, 0x4294, 0x4293, 0x4293, 0x4293, 0x4293, 0x4293, 0x4293,
|
||||
0x4292, 0x4292, 0x4292, 0x4292, 0x4292, 0x4291, 0x4291, 0x4291, 0x4291, 0x4291, 0x4291, 0x4291,
|
||||
0x4291, 0x428f, 0x428f, 0x428f, 0x428e, 0x428e, 0x428e, 0x428e, 0x428e, 0x428e, 0x428e, 0x428e,
|
||||
0x428d, 0x428d, 0x428d, 0x428d, 0x428c, 0x428c, 0x428c, 0x428c, 0x428c, 0x428c, 0x428c, 0x428c,
|
||||
0x428b, 0x428b, 0x428b, 0x428a, 0x428a, 0x428a, 0x428a, 0x428a, 0x428a, 0x428a, 0x4289, 0x4289,
|
||||
0x4289, 0x4288, 0x4288, 0x4288, 0x4288, 0x4288, 0x4288, 0x4287, 0x4287, 0x4287, 0x4287, 0x4287,
|
||||
0x4286, 0x4286, 0x4286, 0x4286, 0x4286, 0x4286, 0x4286, 0x4286, 0x4285, 0x4285, 0x4285, 0x4285,
|
||||
0x4285, 0x4285, 0x4285, 0x4285, 0x4285, 0x4284, 0x4284, 0x4284, 0x4284, 0x4284, 0x4283, 0x4283,
|
||||
0x4282, 0x4282, 0x4282, 0x4282, 0x4282, 0x4281, 0x4281, 0x4281, 0x4281, 0x4281, 0x4281, 0x4281,
|
||||
0x4280, 0x4280, 0x4280, 0x427e, 0x427e, 0x427e, 0x427e, 0x427e, 0x427c, 0x427c, 0x427c, 0x427a,
|
||||
0x427a, 0x427a, 0x427a, 0x427a, 0x427a, 0x4278, 0x4278, 0x4278, 0x4277, 0x4277, 0x4277, 0x4277,
|
||||
0x4277, 0x4277, 0x4275, 0x4275, 0x4275, 0x4273, 0x4273, 0x4273, 0x4273, 0x4273, 0x4271, 0x4271,
|
||||
0x4271, 0x4270, 0x4270, 0x4270, 0x4270, 0x4270, 0x4270, 0x4270, 0x426e, 0x426c, 0x426c, 0x426c,
|
||||
0x426c, 0x426c, 0x426a, 0x426a, 0x426a, 0x426a, 0x4269, 0x4269, 0x4269, 0x4269, 0x4269, 0x4267,
|
||||
0x4267, 0x4266, 0x4266, 0x4266, 0x4266, 0x4266, 0x4264, 0x4264, 0x4264, 0x4262, 0x4262, 0x4262,
|
||||
0x4262, 0x4261, 0x4261, 0x4261, 0x425f, 0x425f, 0x425f, 0x425f, 0x425f, 0x425e, 0x425e, 0x425c,
|
||||
0x425c, 0x425c, 0x425c, 0x425c, 0x425b, 0x425b, 0x425b, 0x4259, 0x4259, 0x4259, 0x4259, 0x4257,
|
||||
0x4257, 0x4257, 0x4256, 0x4256, 0x4256, 0x4256, 0x4256, 0x4253, 0x4253, 0x4253, 0x4253, 0x4253,
|
||||
0x4253, 0x4253, 0x4250, 0x4250, 0x4250, 0x4250, 0x4250, 0x424f, 0x424f, 0x424d, 0x424d, 0x424d,
|
||||
0x424d, 0x424d, 0x424b, 0x424b, 0x424b, 0x424b, 0x424b, 0x4249, 0x4249, 0x4249, 0x4248, 0x4248,
|
||||
0x4248, 0x4248, 0x4247, 0x4247, 0x4247, 0x4245, 0x4245, 0x4244, 0x4244, 0x4244, 0x4243, 0x4243,
|
||||
0x4241, 0x4241, 0x4241, 0x4240, 0x4240, 0x4240, 0x4240, 0x4240, 0x423e, 0x423e, 0x423e, 0x423e,
|
||||
0x423b, 0x423b, 0x423b, 0x423b, 0x423b, 0x423a, 0x423a, 0x423a, 0x4239, 0x4239, 0x4237, 0x4237,
|
||||
0x4237, 0x4236, 0x4236, 0x4236, 0x4236, 0x4236, 0x4235, 0x4235, 0x4234, 0x4234, 0x4232, 0x4232,
|
||||
0x4232, 0x4232, 0x4232, 0x4231, 0x4231, 0x4231, 0x422f, 0x422f, 0x422d, 0x422d, 0x422d, 0x422d,
|
||||
0x422d, 0x422c, 0x422c, 0x422c, 0x422a, 0x422a, 0x422a, 0x422a, 0x4228, 0x4228, 0x4228, 0x4228,
|
||||
0x4228, 0x4227, 0x4227, 0x4227, 0x4225, 0x4225, 0x4223, 0x4223, 0x4223, 0x4223, 0x4223, 0x4223,
|
||||
0x4223, 0x4221, 0x4220, 0x4220, 0x4220, 0x4220, 0x421f, 0x421f, 0x421f, 0x421d, 0x421d, 0x421d,
|
||||
0x421d, 0x421d, 0x421b, 0x421b, 0x421b, 0x421b, 0x421b, 0x4219, 0x4219, 0x4218, 0x4218, 0x4218,
|
||||
0x4218, 0x4218, 0x4215, 0x4215, 0x4215, 0x4215, 0x4215, 0x4215, 0x4215, 0x4213, 0x4213, 0x4213,
|
||||
0x4212, 0x4212, 0x4211, 0x4211, 0x4211, 0x420f, 0x420f, 0x420f, 0x420f, 0x420d, 0x420d, 0x420d,
|
||||
0x420c, 0x420c, 0x420c, 0x420c, 0x420c, 0x420a, 0x420a, 0x4209, 0x4209, 0x4209, 0x4209, 0x4209,
|
||||
0x4207, 0x4207, 0x4207, 0x4206, 0x4206, 0x4206, 0x4206, 0x4204, 0x4204, 0x4204, 0x4202, 0x4202,
|
||||
0x4202, 0x4202, 0x4202, 0x4201, 0x4201, 0x41fe, 0x41fe, 0x41fe, 0x41fe, 0x41fe, 0x41fb, 0x41fb,
|
||||
0x41fb, 0x41fb, 0x41f8, 0x41f8, 0x41f8, 0x41f8, 0x41f8, 0x41f4, 0x41f1, 0x41f1, 0x41f1, 0x41f1,
|
||||
0x41f1, 0x41f1, 0x41f1, 0x41ed, 0x41ed, 0x41ed, 0x41ed, 0x41ed, 0x41ea, 0x41ea, 0x41ea, 0x41e6,
|
||||
0x41e6, 0x41e6, 0x41e3, 0x41e3, 0x41e3, 0x41e3, 0x41e3, 0x41df, 0x41df, 0x41df, 0x41df, 0x41dc,
|
||||
0x41dc, 0x41dc, 0x41dc, 0x41dc, 0x41d8, 0x41d8, 0x41d8, 0x41d8, 0x41d5, 0x41d5, 0x41d5, 0x41d5,
|
||||
0x41d5, 0x41d1, 0x41d1, 0x41d1, 0x41cd, 0x41cd, 0x41cd, 0x41cd, 0x41cd, 0x41cd, 0x41cd, 0x41c9,
|
||||
0x41c9, 0x41c9, 0x41c6, 0x41c6, 0x41c6, 0x41c6, 0x41c6, 0x41c6, 0x41c6, 0x41c2, 0x41c2, 0x41be,
|
||||
0x41be, 0x41be, 0x41be, 0x41be, 0x41be, 0x41ba, 0x41ba, 0x41ba, 0x41ba, 0x41ba, 0x41b6, 0x41b6,
|
||||
0x41b6, 0x41b6, 0x41b6, 0x41b6, 0x41b6, 0x41b2, 0x41b2, 0x41ae, 0x41ae, 0x41ae, 0x41ae, 0x41ae,
|
||||
0x41ae, 0x41ae, 0x41ae, 0x41aa, 0x41aa, 0x41aa, 0x41aa, 0x41aa, 0x41a6, 0x41a6, 0x41a6, 0x41a6,
|
||||
0x41a6, 0x41a2, 0x41a2, 0x41a2, 0x41a2, 0x419e, 0x419e, 0x419e, 0x419e, 0x419e, 0x419e, 0x419a,
|
||||
0x419a, 0x419a, 0x419a, 0x419a, 0x4196, 0x4196, 0x4196, 0x4196, 0x4196, 0x4196, 0x4196, 0x4196,
|
||||
0x4196, 0x4192, 0x418e, 0x418e, 0x418e, 0x418e, 0x418e, 0x418e, 0x418e, 0x418e, 0x418e, 0x418a,
|
||||
0x418a, 0x418a, 0x418a, 0x418a, 0x4186, 0x4186, 0x4186, 0x4186, 0x4186, 0x4186, 0x4186, 0x4181,
|
||||
0x4181, 0x4181, 0x4181, 0x4181, 0x417a, 0x417a, 0x417a, 0x417a, 0x417a, 0x417a, 0x417a, 0x417a,
|
||||
0x4172, 0x4172, 0x4172, 0x4172, 0x4172, 0x4169, 0x4169, 0x4169, 0x4169, 0x4169, 0x4169, 0x4161,
|
||||
0x4161, 0x4161, 0x4161, 0x4161, 0x4161, 0x4158, 0x4158, 0x4158, 0x4158, 0x4158, 0x4158, 0x4158,
|
||||
0x4158, 0x4158, 0x414f, 0x414f, 0x414f, 0x414f, 0x414f, 0x4147, 0x4147, 0x4147, 0x4147, 0x4147,
|
||||
0x4147, 0x4147, 0x4147, 0x413e, 0x413e, 0x413e, 0x413e, 0x413e, 0x4135, 0x4135, 0x4135, 0x4135,
|
||||
0x4135, 0x4135, 0x4135, 0x4135, 0x4135, 0x412c, 0x412c, 0x412c, 0x412c, 0x412c, 0x412c, 0x4123,
|
||||
0x4123, 0x4123, 0x4123, 0x4123, 0x4123, 0x4123, 0x4123, 0x4123, 0x411a, 0x411a, 0x411a, 0x411a,
|
||||
0x411a, 0x411a, 0x4111, 0x4111, 0x4111, 0x4111, 0x4111, 0x4111, 0x4111, 0x4111, 0x4108, 0x4108,
|
||||
0x4108, 0x4108, 0x4108, 0x4108, 0x4108, 0x4108, 0x40ff, 0x40ff, 0x40ff, 0x40ff, 0x40ff, 0x40ff,
|
||||
0x40ff, 0x40ff, 0x40ed, 0x40ed, 0x40ed, 0x40ed, 0x40ed, 0x40ed, 0x40ed, 0x40ed, 0x40db, 0x40db,
|
||||
0x40db, 0x40db, 0x40db, 0x40db, 0x40db, 0x40db, 0x40c9, 0x40c9, 0x40c9, 0x40c9, 0x40c9, 0x40c9,
|
||||
0x40c9, 0x40c9, 0x40b7, 0x40b7, 0x40b7, 0x40b7, 0x40b7, 0x40b7, 0x40b7, 0x40b7, 0x40b7, 0x40b7,
|
||||
0x40a5, 0x40a5, 0x40a5, 0x40a5, 0x40a5, 0x40a5, 0x40a5, 0x4092, 0x4092, 0x4092, 0x4092, 0x4092,
|
||||
0x4092, 0x4092, 0x4092, 0x4092, 0x4080, 0x4080, 0x4080, 0x4080, 0x4080, 0x4080, 0x4080, 0x4080,
|
||||
0x4080, 0x405c, 0x405c, 0x405c, 0x405c, 0x405c, 0x405c, 0x405c, 0x405c, 0x405c, 0x4037, 0x4037,
|
||||
0x4037, 0x4037, 0x4037, 0x4037, 0x4037, 0x4037, 0x4037, 0x4013, 0x4013, 0x4013, 0x4013, 0x4013,
|
||||
0x4013, 0x4013, 0x4013, 0x4013, 0x4013, 0x3fdc, 0x3fdc, 0x3fdc, 0x3fdc, 0x3fdc, 0x3fdc, 0x3fdc,
|
||||
0x3fdc, 0x3fdc, 0x3fdc, 0x3f93, 0x3f93, 0x3f93, 0x3f93, 0x3f93, 0x3f93, 0x3f93, 0x3f93, 0x3f93,
|
||||
0x3f13, 0x3f13, 0x3f13, 0x3f13, 0x3f13, 0x3f13, 0x3f13, 0x3f13, 0x3f13, 0x3f13, 0x0, 0x0,
|
||||
0x0, 0x0, 0x0, 0x0,
|
||||
};
|
||||
|
||||
// <! gen atan2 f(y, x) = 2 * atan(y / (pow(x*x+y*y, 0.5) + x))
|
||||
static double _gen_atan2_degree(float y, float x) { return atan2(y, x) * 180 / M_PI; }
|
||||
|
||||
static void tl_lut_ref(uint16_t *ofmap, uint16_t *ifmap, uint16_t *ifmap2,
|
||||
cvk_tl_shape_t ifmap_shape) {
|
||||
assert(ofmap);
|
||||
|
||||
for (uint32_t i = 0; i < tl_shape_size(&ifmap_shape); i++) {
|
||||
float y = convert_bf16_fp32(ifmap2[i]);
|
||||
float x = convert_bf16_fp32(ifmap[i]);
|
||||
double v = _gen_atan2_degree(y, x);
|
||||
ofmap[i] = convert_fp32_bf16(v);
|
||||
|
||||
if (mode == PRE_DATA_COMPARE_FIX) {
|
||||
ofmap[i] = golden_bf16[i];
|
||||
} else if (mode == DATA_COMPARE_U8) {
|
||||
ofmap[i] = (uint8_t)convert_bf16_s8(ofmap[i]);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static bool verify(uint16_t *ofmap_data, uint16_t *ref_data, uint16_t *ifmap, uint16_t *ifmap2,
|
||||
uint64_t ifmap_size, float epsilon) {
|
||||
uint64_t size = ifmap_size;
|
||||
|
||||
for (uint64_t i = 0; i < size; i++) {
|
||||
bool is_close;
|
||||
uint16_t ref = ref_data[i];
|
||||
uint16_t ofmap_data_bf16;
|
||||
float ref_f;
|
||||
float ofmap_data_f;
|
||||
|
||||
ref_f = convert_bf16_fp32(ref);
|
||||
ofmap_data_f = convert_bf16_fp32(ofmap_data[i]);
|
||||
ofmap_data_bf16 = ofmap_data[i];
|
||||
|
||||
if (mode == PRE_DATA_COMPARE_FIX) {
|
||||
is_close = ofmap_data[i] == ref;
|
||||
} else {
|
||||
is_close = fabs(ref_f - ofmap_data_f) < epsilon;
|
||||
if (abs(ofmap_data_f) * epsilon == 0) {
|
||||
// https://stackoverflow.com/questions/19837576/comparing-floating-point-number-to-zero
|
||||
is_close = abs(ref_f) < epsilon;
|
||||
} else {
|
||||
is_close = fabs(ref_f - ofmap_data_f) / fabs(std::max(ref_f, ofmap_data_f)) < epsilon;
|
||||
}
|
||||
}
|
||||
|
||||
if (!is_close) {
|
||||
float y = convert_bf16_fp32(ifmap2[i]);
|
||||
float x = convert_bf16_fp32(ifmap[i]);
|
||||
fprintf(stderr,
|
||||
"comparing failed at ofmap_data[%lu]\n"
|
||||
"\tgot %x, exp %x, fp32: got %f exp %f, atan2(%f, %f) = %f"
|
||||
"\ty %f(0x%x), x %f(0x%x)\n",
|
||||
i, ofmap_data_bf16, ref, ofmap_data_f, ref_f, y, x, _gen_atan2_degree(y, x), y,
|
||||
ifmap2[i], x, ifmap[i]);
|
||||
exit(-1);
|
||||
}
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
static void _gen_input(uint16_t *input_data, uint64_t ifmap_size, int range_start, int range_end) {
|
||||
std::random_device rd;
|
||||
std::mt19937 e2(rd());
|
||||
std::uniform_real_distribution<> dist(range_start, range_end);
|
||||
|
||||
float LO = pow(2, range_start);
|
||||
float HI = pow(2, range_end);
|
||||
for (uint64_t i = 0; i < ifmap_size; i++) {
|
||||
// input range is -8 ~ +8
|
||||
int table_hw = 256;
|
||||
float input =
|
||||
((int)i % (range_end - 2)) * (((int)i % 2) ? 1 : -1) + 0.03 + (i % table_hw) * 0.002;
|
||||
input = ((int)i % (range_end - 2)) * (((int)i % 2) ? 1 : 1) + 0.03 + (i % table_hw) * 0.002;
|
||||
input_data[i] = convert_fp32_bf16(input);
|
||||
input = dist(e2);
|
||||
input = LO + static_cast<float>(rand()) / (static_cast<float>(RAND_MAX / (HI - LO)));
|
||||
}
|
||||
}
|
||||
|
||||
static void gen_input(uint16_t *x, uint16_t *y, uint64_t ifmap_size, TEST_MODE mode,
|
||||
int range_start, int range_end) {
|
||||
if (mode == PRE_DATA_COMPARE_FIX) {
|
||||
memcpy(x, &test_pattern, sizeof(test_pattern));
|
||||
} else {
|
||||
range_start = abs(range_start);
|
||||
range_end = abs(range_end);
|
||||
_gen_input(x, ifmap_size, range_start, range_end);
|
||||
}
|
||||
|
||||
// invert for test
|
||||
for (uint64_t i = 0; i < ifmap_size; i++) {
|
||||
y[i] = x[(ifmap_size - 1) - i];
|
||||
}
|
||||
|
||||
if (mode == DATA_COMPARE_ACCURACY_X_GT_0) {
|
||||
// y = any
|
||||
uint32_t i = 0;
|
||||
for (; i < ifmap_size / 4; i++) {
|
||||
// y < 0
|
||||
y[i] = convert_fp32_bf16(-1 * convert_fp32_bf16(y[i]));
|
||||
y[i + ifmap_size / 4] = convert_fp32_bf16(0);
|
||||
}
|
||||
} else if (mode == DATA_COMPARE_ACCURACY_X_LT_0_Y_GE_0) {
|
||||
// x < 0 and y >= 0
|
||||
for (uint32_t i = 0; i < ifmap_size; i++) {
|
||||
x[i] = convert_fp32_bf16(-1 * convert_fp32_bf16(x[i]));
|
||||
}
|
||||
|
||||
for (uint32_t i = 0; i < ifmap_size / 4; i++) {
|
||||
y[i + ifmap_size / 4] = convert_fp32_bf16(0);
|
||||
}
|
||||
} else if (mode == DATA_COMPARE_ACCURACY_X_LT_0_Y_LT_0) {
|
||||
// x < 0 and y < 0
|
||||
for (uint32_t i = 0; i < ifmap_size; i++) {
|
||||
x[i] = convert_fp32_bf16(-1 * convert_fp32_bf16(x[i]));
|
||||
y[i] = convert_fp32_bf16(-1 * convert_fp32_bf16(y[i]));
|
||||
}
|
||||
} else if (mode == DATA_COMPARE_ACCURACY_X_0_Y_GT_0) {
|
||||
// pi / 2, x = 0 and y > 0
|
||||
for (uint32_t i = 0; i < ifmap_size; i++) {
|
||||
x[i] = convert_fp32_bf16(0);
|
||||
}
|
||||
} else if (mode == DATA_COMPARE_ACCURACY_X_0_Y_LT_0) {
|
||||
// -pi / 2, x = 0 and y < 0
|
||||
for (uint32_t i = 0; i < ifmap_size; i++) {
|
||||
x[i] = convert_fp32_bf16(0);
|
||||
y[i] = convert_fp32_bf16(-1 * convert_fp32_bf16(y[i]));
|
||||
}
|
||||
}
|
||||
|
||||
if (mode != PRE_DATA_COMPARE_FIX) {
|
||||
int i = 0;
|
||||
x[i] = convert_fp32_bf16(-10.0);
|
||||
y[i++] = convert_fp32_bf16(6.0);
|
||||
y[i] = convert_fp32_bf16(1.000000);
|
||||
x[i++] = convert_fp32_bf16(19.000000);
|
||||
y[i] = convert_fp32_bf16(5.000000);
|
||||
x[i++] = convert_fp32_bf16(-125.000000);
|
||||
y[i] = convert_fp32_bf16(1.070312);
|
||||
x[i++] = convert_fp32_bf16(0.498046);
|
||||
y[i] = convert_fp32_bf16(0.000000);
|
||||
x[i++] = convert_fp32_bf16(-8.000000);
|
||||
x[i] = convert_fp32_bf16(424.000);
|
||||
y[i++] = convert_fp32_bf16(-1.00);
|
||||
x[i] = convert_fp32_bf16(2.484375);
|
||||
y[i++] = convert_fp32_bf16(-7.531250);
|
||||
x[i] = convert_fp32_bf16(-2.484375);
|
||||
y[i++] = convert_fp32_bf16(-7.531250);
|
||||
x[i] = convert_fp32_bf16(0);
|
||||
y[i++] = convert_fp32_bf16(7.531250);
|
||||
x[i] = convert_fp32_bf16(0);
|
||||
y[i++] = convert_fp32_bf16(-7.531250);
|
||||
x[i] = convert_fp32_bf16(0);
|
||||
y[i++] = convert_fp32_bf16(0);
|
||||
x[i] = convert_fp32_bf16(0);
|
||||
y[i++] = convert_fp32_bf16(0.394531);
|
||||
y[i] = convert_fp32_bf16(-4.000000);
|
||||
x[i++] = convert_fp32_bf16(-64.000000);
|
||||
y[i] = convert_fp32_bf16(0.000000);
|
||||
x[i++] = convert_fp32_bf16(-4.000000);
|
||||
y[i] = convert_fp32_bf16(0.000000);
|
||||
x[i++] = convert_fp32_bf16(-40.000000);
|
||||
y[i] = convert_fp32_bf16(1.000000);
|
||||
x[i++] = convert_fp32_bf16(-53.000000);
|
||||
y[i] = convert_fp32_bf16(-9.000000);
|
||||
x[i++] = convert_fp32_bf16(-91.000000);
|
||||
y[i] = convert_fp32_bf16(12.000000);
|
||||
x[i++] = convert_fp32_bf16(-164.000000);
|
||||
y[i] = convert_fp32_bf16(-20.000000);
|
||||
x[i++] = convert_fp32_bf16(-320.000000);
|
||||
y[i] = convert_fp32_bf16(1.000000);
|
||||
x[i++] = convert_fp32_bf16(-71.000000);
|
||||
y[i] = convert_fp32_bf16(1.000000);
|
||||
x[i++] = convert_fp32_bf16(-155.000000);
|
||||
y[i] = convert_fp32_bf16(1.000000);
|
||||
x[i++] = convert_fp32_bf16(-247.000000);
|
||||
y[i] = convert_fp32_bf16(-2.000000);
|
||||
x[i++] = convert_fp32_bf16(-118.000000);
|
||||
y[i] = convert_fp32_bf16(-2.000000);
|
||||
x[i++] = convert_fp32_bf16(-54.000000);
|
||||
y[i] = convert_fp32_bf16(-5.000000);
|
||||
x[i++] = convert_fp32_bf16(-392.000000);
|
||||
y[i] = convert_fp32_bf16(-37.000000);
|
||||
x[i++] = convert_fp32_bf16(-520.000000);
|
||||
y[i] = convert_fp32_bf16(-1.000000);
|
||||
x[i++] = convert_fp32_bf16(-19.000000);
|
||||
y[i] = convert_fp32_bf16(0.000000);
|
||||
x[i++] = convert_fp32_bf16(-10.000000);
|
||||
y[i] = convert_fp32_bf16(0.000000);
|
||||
x[i++] = convert_fp32_bf16(-8.000000);
|
||||
y[i] = convert_fp32_bf16(0.000000);
|
||||
x[i++] = convert_fp32_bf16(-2.000000);
|
||||
y[i] = convert_fp32_bf16(0.000000);
|
||||
x[i++] = convert_fp32_bf16(-14.000000);
|
||||
y[i] = convert_fp32_bf16(0.000000);
|
||||
x[i++] = convert_fp32_bf16(-2.000000);
|
||||
y[i] = convert_fp32_bf16(0.000000);
|
||||
x[i++] = convert_fp32_bf16(-6.000000);
|
||||
y[i] = convert_fp32_bf16(-1.000000);
|
||||
x[i++] = convert_fp32_bf16(-21.000000);
|
||||
y[i] = convert_fp32_bf16(0.000000);
|
||||
x[i++] = convert_fp32_bf16(-14.000000);
|
||||
y[i] = convert_fp32_bf16(-1.000000);
|
||||
x[i++] = convert_fp32_bf16(-17.000000);
|
||||
y[i] = convert_fp32_bf16(-1.000000);
|
||||
x[i++] = convert_fp32_bf16(-17.000000);
|
||||
y[i] = convert_fp32_bf16(0.000000);
|
||||
x[i++] = convert_fp32_bf16(-8.000000);
|
||||
y[i] = convert_fp32_bf16(0.000000);
|
||||
x[i++] = convert_fp32_bf16(-4.000000);
|
||||
y[i] = convert_fp32_bf16(0.000000);
|
||||
x[i++] = convert_fp32_bf16(-10.000000);
|
||||
y[i] = convert_fp32_bf16(0.000000);
|
||||
x[i++] = convert_fp32_bf16(-8.000000);
|
||||
y[i] = convert_fp32_bf16(0.000000);
|
||||
x[i++] = convert_fp32_bf16(-14.000000);
|
||||
y[i] = convert_fp32_bf16(0.000000);
|
||||
x[i++] = convert_fp32_bf16(-4.000000);
|
||||
y[i] = convert_fp32_bf16(0.000000);
|
||||
x[i++] = convert_fp32_bf16(-2.000000);
|
||||
y[i] = convert_fp32_bf16(1.000000);
|
||||
x[i++] = convert_fp32_bf16(-41.000000);
|
||||
y[i] = convert_fp32_bf16(1.000000);
|
||||
x[i++] = convert_fp32_bf16(-69.000000);
|
||||
y[i] = convert_fp32_bf16(4.000000);
|
||||
x[i++] = convert_fp32_bf16(-86.000000);
|
||||
y[i] = convert_fp32_bf16(1.000000);
|
||||
x[i++] = convert_fp32_bf16(-41.000000);
|
||||
y[i] = convert_fp32_bf16(-2.000000);
|
||||
x[i++] = convert_fp32_bf16(-34.000000);
|
||||
y[i] = convert_fp32_bf16(0.000000);
|
||||
x[i++] = convert_fp32_bf16(-6.000000);
|
||||
y[i] = convert_fp32_bf16(1.000000);
|
||||
x[i++] = convert_fp32_bf16(-41.000000);
|
||||
y[i] = convert_fp32_bf16(0.000000);
|
||||
x[i++] = convert_fp32_bf16(-136.000000);
|
||||
y[i] = convert_fp32_bf16(-3.000000);
|
||||
x[i++] = convert_fp32_bf16(-79.000000);
|
||||
y[i] = convert_fp32_bf16(0.000000);
|
||||
x[i++] = convert_fp32_bf16(-38.000000);
|
||||
y[i] = convert_fp32_bf16(5.000000);
|
||||
x[i++] = convert_fp32_bf16(-173.000000);
|
||||
y[i] = convert_fp32_bf16(0.000000);
|
||||
x[i++] = convert_fp32_bf16(-78.000000);
|
||||
y[i] = convert_fp32_bf16(-2.000000);
|
||||
x[i++] = convert_fp32_bf16(-60.000000);
|
||||
y[i] = convert_fp32_bf16(3.000000);
|
||||
x[i++] = convert_fp32_bf16(-123.000000);
|
||||
y[i] = convert_fp32_bf16(-9.000000);
|
||||
x[i++] = convert_fp32_bf16(-280.000000);
|
||||
y[i] = convert_fp32_bf16(3.000000);
|
||||
x[i++] = convert_fp32_bf16(-39.000000);
|
||||
y[i] = convert_fp32_bf16(2.000000);
|
||||
x[i++] = convert_fp32_bf16(-524.000000);
|
||||
y[i] = convert_fp32_bf16(11.000000);
|
||||
x[i++] = convert_fp32_bf16(-376.000000);
|
||||
y[i] = convert_fp32_bf16(5.000000);
|
||||
x[i++] = convert_fp32_bf16(-131.000000);
|
||||
y[i] = convert_fp32_bf16(11.000000);
|
||||
x[i++] = convert_fp32_bf16(-324.000000);
|
||||
y[i] = convert_fp32_bf16(9.000000);
|
||||
x[i++] = convert_fp32_bf16(-125.000000);
|
||||
y[i] = convert_fp32_bf16(0.000000);
|
||||
x[i++] = convert_fp32_bf16(-92.000000);
|
||||
y[i] = convert_fp32_bf16(-7.000000);
|
||||
x[i++] = convert_fp32_bf16(-233.000000);
|
||||
y[i] = convert_fp32_bf16(10.000000);
|
||||
x[i++] = convert_fp32_bf16(-170.000000);
|
||||
y[i] = convert_fp32_bf16(0.000000);
|
||||
x[i++] = convert_fp32_bf16(-4.000000);
|
||||
y[i] = convert_fp32_bf16(0.000000);
|
||||
x[i++] = convert_fp32_bf16(-4.000000);
|
||||
y[i] = convert_fp32_bf16(0.000000);
|
||||
x[i++] = convert_fp32_bf16(-10.000000);
|
||||
y[i] = convert_fp32_bf16(-1.000000);
|
||||
x[i++] = convert_fp32_bf16(-23.000000);
|
||||
y[i] = convert_fp32_bf16(0.000000);
|
||||
x[i++] = convert_fp32_bf16(-6.000000);
|
||||
y[i] = convert_fp32_bf16(0.000000);
|
||||
x[i++] = convert_fp32_bf16(-6.000000);
|
||||
y[i] = convert_fp32_bf16(-3.000000);
|
||||
x[i++] = convert_fp32_bf16(-37.000000);
|
||||
|
||||
y[i] = convert_fp32_bf16(-9);
|
||||
x[i++] = convert_fp32_bf16(-1);
|
||||
|
||||
y[i] = convert_fp32_bf16(7.0);
|
||||
x[i++] = convert_fp32_bf16(-1);
|
||||
|
||||
y[i] = convert_fp32_bf16(0);
|
||||
x[i++] = convert_fp32_bf16(-1);
|
||||
}
|
||||
|
||||
#ifdef DBG
|
||||
for (uint64_t i = 0; i < ifmap_size; i++) {
|
||||
printf("source[%ld] y %f x %f\n", i, convert_bf16_fp32(y[i]), convert_bf16_fp32(x[i]));
|
||||
}
|
||||
#endif /* ifdef DBG */
|
||||
}
|
||||
|
||||
static void testbench(CVI_RT_HANDLE *ctx, cvk_context_t *bmk) {
|
||||
// TODO: check more shape / align
|
||||
cvk_chip_info_t chip_info = bmk->info;
|
||||
|
||||
uint32_t input_n = 1;
|
||||
uint32_t input_c = chip_info.npu_num;
|
||||
uint32_t input_h = 16;
|
||||
uint32_t input_w = 16;
|
||||
float epsilon = 0.2;
|
||||
int range_start = -8;
|
||||
int range_end = 8;
|
||||
|
||||
if (mode == PRE_DATA_COMPARE_FIX) {
|
||||
input_h = 4;
|
||||
input_w = 8;
|
||||
}
|
||||
|
||||
cvk_fmt_t fmt = CVK_FMT_BF16;
|
||||
|
||||
cvk_tl_shape_t ifmap_shape = {input_n, input_c, input_h, input_w};
|
||||
cvk_tl_shape_t ofmap_shape = ifmap_shape;
|
||||
|
||||
// get lut table shape and size
|
||||
cvk_tl_shape_t table_shape;
|
||||
uint64_t table_bytesize = cvm_lut_tbl_bytesize(bmk, &table_shape, fmt);
|
||||
|
||||
// get input / output size
|
||||
uint64_t ifmap_size = tl_shape_size(&ifmap_shape);
|
||||
uint64_t ofmap_size = tl_shape_size(&ofmap_shape);
|
||||
int data_type_size = bytesize_of_fmt(fmt);
|
||||
uint64_t ifmap_bytesize = ifmap_size * data_type_size;
|
||||
uint64_t ofmap_bytesize = ofmap_size * data_type_size;
|
||||
|
||||
// atan2 was two inputs
|
||||
cvk_tl_t *tl_ifmap = test_alloc_tl(bmk, ifmap_shape, fmt, /*align*/ 1);
|
||||
cvk_tl_t *tl_ifmap2 = test_alloc_tl(bmk, ifmap_shape, fmt, /*align*/ 1);
|
||||
cvk_tl_t *tl_ofmap_bf16 = test_alloc_tl(bmk, ofmap_shape, fmt, /*align*/ 1);
|
||||
cvk_tl_t *out = tl_ofmap_bf16;
|
||||
|
||||
// atan buf
|
||||
cvk_tl_t *tl_y0_buf = test_alloc_tl(bmk, table_shape, fmt, /*align*/ 1);
|
||||
cvk_tl_t *tl_invert_buf = test_alloc_tl(bmk, table_shape, fmt, /*align*/ 1);
|
||||
cvk_tl_t *tl_pos_neg_buf = test_alloc_tl(bmk, table_shape, fmt, /*align*/ 1);
|
||||
|
||||
// reciprocal buf
|
||||
cvk_tl_t *tl_reciprocal_table_answer = test_alloc_tl(bmk, table_shape, fmt, /*align*/ 1);
|
||||
cvk_tl_t *tl_reciprocal_table_answer_mantissa = test_alloc_tl(bmk, table_shape, fmt, /*align*/ 1);
|
||||
|
||||
// temp buf
|
||||
cvk_tl_t *tl_buf = test_alloc_tl(bmk, ofmap_shape, fmt, /*align*/ 1);
|
||||
cvk_tl_t *tl_buf2 = test_alloc_tl(bmk, tl_ofmap_bf16->shape, fmt, /*align*/ 1);
|
||||
cvk_tl_t *tl_buf3 = test_alloc_tl(bmk, tl_ofmap_bf16->shape, fmt, /*align*/ 1);
|
||||
|
||||
uint16_t *input_data = (uint16_t *)xmalloc(ifmap_bytesize);
|
||||
uint16_t *input_data2 = (uint16_t *)xmalloc(ifmap_bytesize);
|
||||
uint16_t *ref_data = (uint16_t *)xmalloc(ofmap_bytesize);
|
||||
|
||||
// for reciprocal
|
||||
uint16_t *table_reciprocal_data = (uint16_t *)xmalloc(table_bytesize);
|
||||
uint16_t *table_reciprocal_data_mantissa = (uint16_t *)xmalloc(table_bytesize);
|
||||
|
||||
// for atan
|
||||
uint16_t *table_data_atan_y0 = (uint16_t *)xmalloc(table_bytesize);
|
||||
uint16_t *table_data_atan_invert = (uint16_t *)xmalloc(table_bytesize);
|
||||
uint16_t *table_data_atan_pos_neg = (uint16_t *)xmalloc(table_bytesize);
|
||||
|
||||
// for search '0' index
|
||||
uint16_t *idx_0_table_data = (uint16_t *)xmalloc(table_bytesize);
|
||||
|
||||
// init input / ref
|
||||
// input_data is x, input_data2 is y
|
||||
gen_input(input_data, input_data2, ifmap_size, mode, range_start, range_end);
|
||||
tl_lut_ref(ref_data, input_data, input_data2, ifmap_shape);
|
||||
|
||||
// init lut table
|
||||
cvm_reciprocal_tbl(table_reciprocal_data, table_reciprocal_data_mantissa, &table_shape);
|
||||
cvm_atan_fast_degree_tbl(table_data_atan_y0, table_data_atan_invert, table_data_atan_pos_neg,
|
||||
&table_shape);
|
||||
cvm_gen_0_tbl(idx_0_table_data, &table_shape);
|
||||
|
||||
// sys->local
|
||||
test_put_tensor_g2l_comp(ctx, bmk, tl_ifmap, (uint8_t *)input_data);
|
||||
test_put_tensor_g2l_comp(ctx, bmk, tl_ifmap2, (uint8_t *)input_data2);
|
||||
test_put_tensor_g2l_comp(ctx, bmk, tl_reciprocal_table_answer, (uint8_t *)table_reciprocal_data);
|
||||
test_put_tensor_g2l_comp(ctx, bmk, tl_reciprocal_table_answer_mantissa,
|
||||
(uint8_t *)table_reciprocal_data_mantissa);
|
||||
|
||||
test_put_tensor_g2l_comp(ctx, bmk, tl_y0_buf, (uint8_t *)table_data_atan_y0);
|
||||
test_put_tensor_g2l_comp(ctx, bmk, tl_invert_buf, (uint8_t *)table_data_atan_invert);
|
||||
test_put_tensor_g2l_comp(ctx, bmk, tl_pos_neg_buf, (uint8_t *)table_data_atan_pos_neg);
|
||||
|
||||
cvm_atan2_fast_degree_emit(bmk, tl_ifmap2, tl_ifmap, tl_buf, tl_buf2, tl_buf3, tl_y0_buf,
|
||||
tl_invert_buf, tl_pos_neg_buf, tl_reciprocal_table_answer,
|
||||
tl_reciprocal_table_answer_mantissa, OUT tl_ofmap_bf16, fmt);
|
||||
|
||||
uint16_t *ofmap_data = (uint16_t *)test_get_tensor_l2g_comp(ctx, bmk, out);
|
||||
verify(ofmap_data, ref_data, input_data, input_data2, ifmap_size, epsilon);
|
||||
|
||||
free_tl(bmk, tl_buf3);
|
||||
free_tl(bmk, tl_buf2);
|
||||
free_tl(bmk, tl_buf);
|
||||
free_tl(bmk, tl_reciprocal_table_answer_mantissa);
|
||||
free_tl(bmk, tl_reciprocal_table_answer);
|
||||
free_tl(bmk, tl_pos_neg_buf);
|
||||
free_tl(bmk, tl_invert_buf);
|
||||
free_tl(bmk, tl_y0_buf);
|
||||
free_tl(bmk, tl_ofmap_bf16);
|
||||
free_tl(bmk, tl_ifmap2);
|
||||
free_tl(bmk, tl_ifmap);
|
||||
|
||||
free(table_data_atan_y0);
|
||||
free(idx_0_table_data);
|
||||
free(table_data_atan_invert);
|
||||
free(table_data_atan_pos_neg);
|
||||
free(table_reciprocal_data);
|
||||
free(table_reciprocal_data_mantissa);
|
||||
free(input_data);
|
||||
free(ref_data);
|
||||
free(ofmap_data);
|
||||
free(input_data2);
|
||||
}
|
||||
|
||||
int main() {
|
||||
CVI_RT_HANDLE ctx;
|
||||
cvk_context_t *bmk;
|
||||
int round_mode;
|
||||
|
||||
round_mode = set_store_feround();
|
||||
|
||||
test_init(&ctx, &bmk);
|
||||
|
||||
// for (int i = PRE_DATA_COMPARE_FIX; i < TEST_MODE_MAX; i++)
|
||||
// for (int i = PRE_DATA_COMPARE_FIX; i < DATA_COMPARE_ACCURACY; i++) {
|
||||
// for (int i = DATA_COMPARE_ACCURACY; i < DATA_COMPARE_U8; i++) {
|
||||
// for (int i = DATA_COMPARE_ACCURACY; i < DATA_COMPARE_ACCURACY_X_GT_0; i++)
|
||||
// {
|
||||
for (int i = PRE_DATA_COMPARE_FIX; i < DATA_COMPARE_U8; i++) {
|
||||
mode = static_cast<TEST_MODE>(i);
|
||||
printf("test mode %d...\n", mode);
|
||||
testbench(&ctx, bmk);
|
||||
}
|
||||
printf("pass\n");
|
||||
|
||||
test_exit(&ctx, bmk);
|
||||
restore_feround(round_mode);
|
||||
return 0;
|
||||
}
|
||||
719
cvimath/tests/cvi1835/atan2_radian.cpp
Normal file
719
cvimath/tests/cvi1835/atan2_radian.cpp
Normal file
@ -0,0 +1,719 @@
|
||||
/**
|
||||
* \breif atan2 is implemented by atan, you can refer
|
||||
* [wiki](https://en.wikipedia.org/wiki/Atan2) for more details
|
||||
*/
|
||||
|
||||
#include <cvimath_internal.h>
|
||||
#include <test_cvikernel_util.h>
|
||||
|
||||
#define OUT
|
||||
#define IN
|
||||
#include <cfloat>
|
||||
#include <iomanip>
|
||||
#include <iostream>
|
||||
#include <map>
|
||||
#include <random>
|
||||
#include <string>
|
||||
//#define DBG
|
||||
|
||||
/**
|
||||
* pre_data means we test fixed pattern, it should be same sa lut
|
||||
*/
|
||||
enum TEST_MODE {
|
||||
PRE_DATA_COMPARE_FIX = 0, // pre-data + fix compare
|
||||
DATA_COMPARE_ACCURACY, // generate \range_start to \range_end value that
|
||||
// check epsilon, default set x > 0, y > 0
|
||||
|
||||
DATA_COMPARE_ACCURACY_X_GT_0, // atan(y/x), x > 0, y = 0
|
||||
DATA_COMPARE_ACCURACY_X_LT_0_Y_GE_0, // atan(y/x) + PI , x < 0 and y >= 0
|
||||
DATA_COMPARE_ACCURACY_X_LT_0_Y_LT_0, // atan(y/x) - PI , x < 0 and y < 0
|
||||
DATA_COMPARE_ACCURACY_X_0_Y_GT_0, // pi / 2, x = 0 and y > 0
|
||||
DATA_COMPARE_ACCURACY_X_0_Y_LT_0, // -pi / 2, x = 0 and y < 0
|
||||
DATA_COMPARE_U8, // generate \range_start to \range_end value that check
|
||||
// epsilon, result bf16->uint8_t
|
||||
TEST_MODE_MAX,
|
||||
};
|
||||
|
||||
static TEST_MODE mode;
|
||||
|
||||
static uint16_t test_pattern[] = {
|
||||
0x0000, 0x38D2, 0x3952, 0x399D, 0x39D2, 0x3A03, 0x3A1D, 0x3A38, 0x3A52, 0x3A6C, 0x3A83, 0x3A90,
|
||||
0x3A9D, 0x3AAA, 0x3AB8, 0x3AC5, 0x3AD2, 0x3ADF, 0x3AEC, 0x3AF9, 0x3B03, 0x3B0A, 0x3B10, 0x3B17,
|
||||
0x3B1D, 0x3B24, 0x3B2A, 0x3B31, 0x3B38, 0x3B3E, 0x3B45, 0x3B4B, 0x3B52, 0x3B58, 0x3B5F, 0x3B65,
|
||||
0x3B6C, 0x3B72, 0x3B79, 0x3B80, 0x3B83, 0x3B86, 0x3B8A, 0x3B8D, 0x3B90, 0x3B93, 0x3B97, 0x3B9A,
|
||||
0x3B9D, 0x3BA1, 0x3BA4, 0x3BA7, 0x3BAA, 0x3BAE, 0x3BB1, 0x3BB4, 0x3BB8, 0x3BBB, 0x3BBE, 0x3BC1,
|
||||
0x3BC5, 0x3BC8, 0x3BCB, 0x3BCE, 0x3BD2, 0x3BD5, 0x3BD8, 0x3BDC, 0x3BDF, 0x3BE2, 0x3BE5, 0x3BE9,
|
||||
0x3BEC, 0x3BEF, 0x3BF2, 0x3BF6, 0x3BF9, 0x3BFC, 0x3C00, 0x3C01, 0x3C03, 0x3C05, 0x3C06, 0x3C08,
|
||||
0x3C0A, 0x3C0B, 0x3C0D, 0x3C0F, 0x3C10, 0x3C12, 0x3C13, 0x3C15, 0x3C17, 0x3C18, 0x3C1A, 0x3C1C,
|
||||
0x3C1D, 0x3C1F, 0x3C21, 0x3C22, 0x3C24, 0x3C25, 0x3C27, 0x3C29, 0x3C2A, 0x3C2C, 0x3C2E, 0x3C2F,
|
||||
0x3C31, 0x3C33, 0x3C34, 0x3C36, 0x3C38, 0x3C39, 0x3C3B, 0x3C3C, 0x3C3E, 0x3C40, 0x3C41, 0x3C43,
|
||||
0x3C45, 0x3C46, 0x3C48, 0x3C4A, 0x3C4B, 0x3C4D, 0x3C4E, 0x3C50, 0x3C52, 0x3C53, 0x3C55, 0x3C57,
|
||||
0x3C58, 0x3C5A, 0x3C5C, 0x3C5D, 0x3C5F, 0x3C60, 0x3C62, 0x3C64, 0x3C65, 0x3C67, 0x3C69, 0x3C6A,
|
||||
0x3C6C, 0x3C6E, 0x3C6F, 0x3C71, 0x3C72, 0x3C74, 0x3C76, 0x3C77, 0x3C79, 0x3C7B, 0x3C7C, 0x3C7E,
|
||||
0x3C80, 0x3C81, 0x3C81, 0x3C82, 0x3C83, 0x3C84, 0x3C85, 0x3C86, 0x3C86, 0x3C87, 0x3C88, 0x3C89,
|
||||
0x3C8A, 0x3C8A, 0x3C8B, 0x3C8C, 0x3C8D, 0x3C8E, 0x3C8F, 0x3C8F, 0x3C90, 0x3C91, 0x3C92, 0x3C93,
|
||||
0x3C93, 0x3C94, 0x3C95, 0x3C96, 0x3C97, 0x3C98, 0x3C98, 0x3C99, 0x3C9A, 0x3C9B, 0x3C9C, 0x3C9C,
|
||||
0x3C9D, 0x3C9E, 0x3C9F, 0x3CA0, 0x3CA1, 0x3CA1, 0x3CA2, 0x3CA3, 0x3CA4, 0x3CA5, 0x3CA5, 0x3CA6,
|
||||
0x3CA7, 0x3CA8, 0x3CA9, 0x3CAA, 0x3CAA, 0x3CAB, 0x3CAC, 0x3CAD, 0x3CAE, 0x3CAE, 0x3CAF, 0x3CB0,
|
||||
0x3CB1, 0x3CB2, 0x3CB3, 0x3CB3, 0x3CB4, 0x3CB5, 0x3CB6, 0x3CB7, 0x3CB8, 0x3CB8, 0x3CB9, 0x3CBA,
|
||||
0x3CBB, 0x3CBC, 0x3CBC, 0x3CBD, 0x3CBE, 0x3CBF, 0x3CC0, 0x3CC1, 0x3CC1, 0x3CC2, 0x3CC3, 0x3CC4,
|
||||
0x3CC5, 0x3CC5, 0x3CC6, 0x3CC7, 0x3CC8, 0x3CC9, 0x3CCA, 0x3CCA, 0x3CCB, 0x3CCC, 0x3CCD, 0x3CCE,
|
||||
0x3CCE, 0x3CCF, 0x3CD0, 0x3CD1, 0x3CD2, 0x3CD3, 0x3CD3, 0x3CD4, 0x3CD5, 0x3CD6, 0x3CD7, 0x3CD7,
|
||||
0x3CD8, 0x3CD9, 0x3CDA, 0x3CDB, 0x3CDC, 0x3CDC, 0x3CDD, 0x3CDE, 0x3CDF, 0x3CE0, 0x3CE0, 0x3CE1,
|
||||
0x3CE2, 0x3CE3, 0x3CE4, 0x3CE5, 0x3CE5, 0x3CE6, 0x3CE7, 0x3CE8, 0x3CE9, 0x3CE9, 0x3CEA, 0x3CEB,
|
||||
0x3CEC, 0x3CED, 0x3CEE, 0x3CEE, 0x3CEF, 0x3CF0, 0x3CF1, 0x3CF2, 0x3CF2, 0x3CF3, 0x3CF4, 0x3CF5,
|
||||
0x3CF6, 0x3CF7, 0x3CF7, 0x3CF8, 0x3CF9, 0x3CFA, 0x3CFB, 0x3CFB, 0x3CFC, 0x3CFD, 0x3CFE, 0x3CFF,
|
||||
0x3D00, 0x3D00, 0x3D01, 0x3D01, 0x3D01, 0x3D02, 0x3D02, 0x3D03, 0x3D03, 0x3D03, 0x3D04, 0x3D04,
|
||||
0x3D05, 0x3D05, 0x3D06, 0x3D06, 0x3D06, 0x3D07, 0x3D07, 0x3D08, 0x3D08, 0x3D08, 0x3D09, 0x3D09,
|
||||
0x3D0A, 0x3D0A, 0x3D0A, 0x3D0B, 0x3D0B, 0x3D0C, 0x3D0C, 0x3D0C, 0x3D0D, 0x3D0D, 0x3D0E, 0x3D0E,
|
||||
0x3D0F, 0x3D0F, 0x3D0F, 0x3D10, 0x3D10, 0x3D11, 0x3D11, 0x3D11, 0x3D12, 0x3D12, 0x3D13, 0x3D13,
|
||||
0x3D13, 0x3D14, 0x3D14, 0x3D15, 0x3D15, 0x3D16, 0x3D16, 0x3D16, 0x3D17, 0x3D17, 0x3D18, 0x3D18,
|
||||
0x3D18, 0x3D19, 0x3D19, 0x3D1A, 0x3D1A, 0x3D1A, 0x3D1B, 0x3D1B, 0x3D1C, 0x3D1C, 0x3D1C, 0x3D1D,
|
||||
0x3D1D, 0x3D1E, 0x3D1E, 0x3D1F, 0x3D1F, 0x3D1F, 0x3D20, 0x3D20, 0x3D21, 0x3D21, 0x3D21, 0x3D22,
|
||||
0x3D22, 0x3D23, 0x3D23, 0x3D23, 0x3D24, 0x3D24, 0x3D25, 0x3D25, 0x3D25, 0x3D26, 0x3D26, 0x3D27,
|
||||
0x3D27, 0x3D28, 0x3D28, 0x3D28, 0x3D29, 0x3D29, 0x3D2A, 0x3D2A, 0x3D2A, 0x3D2B, 0x3D2B, 0x3D2C,
|
||||
0x3D2C, 0x3D2C, 0x3D2D, 0x3D2D, 0x3D2E, 0x3D2E, 0x3D2E, 0x3D2F, 0x3D2F, 0x3D30, 0x3D30, 0x3D31,
|
||||
0x3D31, 0x3D31, 0x3D32, 0x3D32, 0x3D33, 0x3D33, 0x3D33, 0x3D34, 0x3D34, 0x3D35, 0x3D35, 0x3D35,
|
||||
0x3D36, 0x3D36, 0x3D37, 0x3D37, 0x3D38, 0x3D38, 0x3D38, 0x3D39, 0x3D39, 0x3D3A, 0x3D3A, 0x3D3A,
|
||||
0x3D3B, 0x3D3B, 0x3D3C, 0x3D3C, 0x3D3C, 0x3D3D, 0x3D3D, 0x3D3E, 0x3D3E, 0x3D3E, 0x3D3F, 0x3D3F,
|
||||
0x3D40, 0x3D40, 0x3D41, 0x3D41, 0x3D41, 0x3D42, 0x3D42, 0x3D43, 0x3D43, 0x3D43, 0x3D44, 0x3D44,
|
||||
0x3D45, 0x3D45, 0x3D45, 0x3D46, 0x3D46, 0x3D47, 0x3D47, 0x3D47, 0x3D48, 0x3D48, 0x3D49, 0x3D49,
|
||||
0x3D4A, 0x3D4A, 0x3D4A, 0x3D4B, 0x3D4B, 0x3D4C, 0x3D4C, 0x3D4C, 0x3D4D, 0x3D4D, 0x3D4E, 0x3D4E,
|
||||
0x3D4E, 0x3D4F, 0x3D4F, 0x3D50, 0x3D50, 0x3D50, 0x3D51, 0x3D51, 0x3D52, 0x3D52, 0x3D53, 0x3D53,
|
||||
0x3D53, 0x3D54, 0x3D54, 0x3D55, 0x3D55, 0x3D55, 0x3D56, 0x3D56, 0x3D57, 0x3D57, 0x3D57, 0x3D58,
|
||||
0x3D58, 0x3D59, 0x3D59, 0x3D59, 0x3D5A, 0x3D5A, 0x3D5B, 0x3D5B, 0x3D5C, 0x3D5C, 0x3D5C, 0x3D5D,
|
||||
0x3D5D, 0x3D5E, 0x3D5E, 0x3D5E, 0x3D5F, 0x3D5F, 0x3D60, 0x3D60, 0x3D60, 0x3D61, 0x3D61, 0x3D62,
|
||||
0x3D62, 0x3D63, 0x3D63, 0x3D63, 0x3D64, 0x3D64, 0x3D65, 0x3D65, 0x3D65, 0x3D66, 0x3D66, 0x3D67,
|
||||
0x3D67, 0x3D67, 0x3D68, 0x3D68, 0x3D69, 0x3D69, 0x3D69, 0x3D6A, 0x3D6A, 0x3D6B, 0x3D6B, 0x3D6C,
|
||||
0x3D6C, 0x3D6C, 0x3D6D, 0x3D6D, 0x3D6E, 0x3D6E, 0x3D6E, 0x3D6F, 0x3D6F, 0x3D70, 0x3D70, 0x3D70,
|
||||
0x3D71, 0x3D71, 0x3D72, 0x3D72, 0x3D72, 0x3D73, 0x3D73, 0x3D74, 0x3D74, 0x3D75, 0x3D75, 0x3D75,
|
||||
0x3D76, 0x3D76, 0x3D77, 0x3D77, 0x3D77, 0x3D78, 0x3D78, 0x3D79, 0x3D79, 0x3D79, 0x3D7A, 0x3D7A,
|
||||
0x3D7B, 0x3D7B, 0x3D7B, 0x3D7C, 0x3D7C, 0x3D7D, 0x3D7D, 0x3D7E, 0x3D7E, 0x3D7E, 0x3D7F, 0x3D7F,
|
||||
0x3D80, 0x3D80, 0x3D80, 0x3D80, 0x3D81, 0x3D81, 0x3D81, 0x3D81, 0x3D81, 0x3D82, 0x3D82, 0x3D82,
|
||||
0x3D82, 0x3D82, 0x3D83, 0x3D83, 0x3D83, 0x3D83, 0x3D83, 0x3D84, 0x3D84, 0x3D84, 0x3D84, 0x3D85,
|
||||
0x3D85, 0x3D85, 0x3D85, 0x3D85, 0x3D86, 0x3D86, 0x3D86, 0x3D86, 0x3D86, 0x3D87, 0x3D87, 0x3D87,
|
||||
0x3D87, 0x3D87, 0x3D88, 0x3D88, 0x3D88, 0x3D88, 0x3D88, 0x3D89, 0x3D89, 0x3D89, 0x3D89, 0x3D89,
|
||||
0x3D8A, 0x3D8A, 0x3D8A, 0x3D8A, 0x3D8A, 0x3D8B, 0x3D8B, 0x3D8B, 0x3D8B, 0x3D8B, 0x3D8C, 0x3D8C,
|
||||
0x3D8C, 0x3D8C, 0x3D8C, 0x3D8D, 0x3D8D, 0x3D8D, 0x3D8D, 0x3D8E, 0x3D8E, 0x3D8E, 0x3D8E, 0x3D8E,
|
||||
0x3D8F, 0x3D8F, 0x3D8F, 0x3D8F, 0x3D8F, 0x3D90, 0x3D90, 0x3D90, 0x3D90, 0x3D90, 0x3D91, 0x3D91,
|
||||
0x3D91, 0x3D91, 0x3D91, 0x3D92, 0x3D92, 0x3D92, 0x3D92, 0x3D92, 0x3D93, 0x3D93, 0x3D93, 0x3D93,
|
||||
0x3D93, 0x3D94, 0x3D94, 0x3D94, 0x3D94, 0x3D94, 0x3D95, 0x3D95, 0x3D95, 0x3D95, 0x3D96, 0x3D96,
|
||||
0x3D96, 0x3D96, 0x3D96, 0x3D97, 0x3D97, 0x3D97, 0x3D97, 0x3D97, 0x3D98, 0x3D98, 0x3D98, 0x3D98,
|
||||
0x3D98, 0x3D99, 0x3D99, 0x3D99, 0x3D99, 0x3D99, 0x3D9A, 0x3D9A, 0x3D9A, 0x3D9A, 0x3D9A, 0x3D9B,
|
||||
0x3D9B, 0x3D9B, 0x3D9B, 0x3D9B, 0x3D9C, 0x3D9C, 0x3D9C, 0x3D9C, 0x3D9C, 0x3D9D, 0x3D9D, 0x3D9D,
|
||||
0x3D9D, 0x3D9D, 0x3D9E, 0x3D9E, 0x3D9E, 0x3D9E, 0x3D9F, 0x3D9F, 0x3D9F, 0x3D9F, 0x3D9F, 0x3DA0,
|
||||
0x3DA0, 0x3DA0, 0x3DA0, 0x3DA0, 0x3DA1, 0x3DA1, 0x3DA1, 0x3DA1, 0x3DA1, 0x3DA2, 0x3DA2, 0x3DA2,
|
||||
0x3DA2, 0x3DA2, 0x3DA3, 0x3DA3, 0x3DA3, 0x3DA3, 0x3DA3, 0x3DA4, 0x3DA4, 0x3DA4, 0x3DA4, 0x3DA4,
|
||||
0x3DA5, 0x3DA5, 0x3DA5, 0x3DA5, 0x3DA5, 0x3DA6, 0x3DA6, 0x3DA6, 0x3DA6, 0x3DA7, 0x3DA7, 0x3DA7,
|
||||
0x3DA7, 0x3DA7, 0x3DA8, 0x3DA8, 0x3DA8, 0x3DA8, 0x3DA8, 0x3DA9, 0x3DA9, 0x3DA9, 0x3DA9, 0x3DA9,
|
||||
0x3DAA, 0x3DAA, 0x3DAA, 0x3DAA, 0x3DAA, 0x3DAB, 0x3DAB, 0x3DAB, 0x3DAB, 0x3DAB, 0x3DAC, 0x3DAC,
|
||||
0x3DAC, 0x3DAC, 0x3DAC, 0x3DAD, 0x3DAD, 0x3DAD, 0x3DAD, 0x3DAD, 0x3DAE, 0x3DAE, 0x3DAE, 0x3DAE,
|
||||
0x3DAE, 0x3DAF, 0x3DAF, 0x3DAF, 0x3DAF, 0x3DB0, 0x3DB0, 0x3DB0, 0x3DB0, 0x3DB0, 0x3DB1, 0x3DB1,
|
||||
0x3DB1, 0x3DB1, 0x3DB1, 0x3DB2, 0x3DB2, 0x3DB2, 0x3DB2, 0x3DB2, 0x3DB3, 0x3DB3, 0x3DB3, 0x3DB3,
|
||||
0x3DB3, 0x3DB4, 0x3DB4, 0x3DB4, 0x3DB4, 0x3DB4, 0x3DB5, 0x3DB5, 0x3DB5, 0x3DB5, 0x3DB5, 0x3DB6,
|
||||
0x3DB6, 0x3DB6, 0x3DB6, 0x3DB6, 0x3DB7, 0x3DB7, 0x3DB7, 0x3DB7, 0x3DB8, 0x3DB8, 0x3DB8, 0x3DB8,
|
||||
0x3DB8, 0x3DB9, 0x3DB9, 0x3DB9, 0x3DB9, 0x3DB9, 0x3DBA, 0x3DBA, 0x3DBA, 0x3DBA, 0x3DBA, 0x3DBB,
|
||||
0x3DBB, 0x3DBB, 0x3DBB, 0x3DBB, 0x3DBC, 0x3DBC, 0x3DBC, 0x3DBC, 0x3DBC, 0x3DBD, 0x3DBD, 0x3DBD,
|
||||
0x3DBD, 0x3DBD, 0x3DBE, 0x3DBE, 0x3DBE, 0x3DBE, 0x3DBE, 0x3DBF, 0x3DBF, 0x3DBF, 0x3DBF, 0x3DBF,
|
||||
0x3DC0, 0x3DC0, 0x3DC0, 0x3DC0, 0x3DC1, 0x3DC1, 0x3DC1, 0x3DC1, 0x3DC1, 0x3DC2, 0x3DC2, 0x3DC2,
|
||||
0x3DC2, 0x3DC2, 0x3DC3, 0x3DC3, 0x3DC3, 0x3DC3, 0x3DC3, 0x3DC4, 0x3DC4, 0x3DC4, 0x3DC4, 0x3DC4,
|
||||
0x3DC5, 0x3DC5, 0x3DC5, 0x3DC5, 0x3DC5, 0x3DC6, 0x3DC6, 0x3DC6, 0x3DC6, 0x3DC6, 0x3DC7, 0x3DC7,
|
||||
0x3DC7, 0x3DC7, 0x3DC7, 0x3DC8, 0x3DC8, 0x3DC8, 0x3DC8, 0x3DC8, 0x3DC9, 0x3DC9, 0x3DC9, 0x3DC9,
|
||||
0x3DCA, 0x3DCA, 0x3DCA, 0x3DCA, 0x3DCA, 0x3DCB, 0x3DCB, 0x3DCB, 0x3DCB, 0x3DCB, 0x3DCC, 0x3DCC,
|
||||
0x3DCC, 0x3DCC, 0x3DCC, 0x3DCD, 0x3DCE, 0x3DCF, 0x3DD0, 0x3DD1, 0x3DD2, 0x3DD3, 0x3DD4, 0x3DD5,
|
||||
0x3DD6, 0x3DD7, 0x3DD8, 0x3DD9, 0x3DDA, 0x3DDB, 0x3DDC, 0x3DDD, 0x3DDE, 0x3DDF, 0x3DE0, 0x3DE1,
|
||||
0x3DE2, 0x3DE3, 0x3DE4, 0x3DE5,
|
||||
};
|
||||
|
||||
static uint16_t golden_bf16[] = {
|
||||
0x3fc9, 0x3fc9, 0x3fc9, 0x3fc9, 0x3fc9, 0x3fc9, 0x3fc8, 0x3fc8, 0x3fc8, 0x3fc8, 0x3fc8, 0x3fc8,
|
||||
0x3fc8, 0x3fc8, 0x3fc8, 0x3fc8, 0x3fc7, 0x3fc7, 0x3fc7, 0x3fc7, 0x3fc7, 0x3fc7, 0x3fc7, 0x3fc7,
|
||||
0x3fc7, 0x3fc5, 0x3fc5, 0x3fc5, 0x3fc5, 0x3fc5, 0x3fc5, 0x3fc5, 0x3fc5, 0x3fc5, 0x3fc5, 0x3fc4,
|
||||
0x3fc4, 0x3fc4, 0x3fc4, 0x3fc4, 0x3fc4, 0x3fc4, 0x3fc4, 0x3fc4, 0x3fc4, 0x3fc3, 0x3fc3, 0x3fc3,
|
||||
0x3fc3, 0x3fc3, 0x3fc3, 0x3fc3, 0x3fc3, 0x3fc3, 0x3fc1, 0x3fc1, 0x3fc1, 0x3fc1, 0x3fc1, 0x3fc1,
|
||||
0x3fc1, 0x3fc1, 0x3fc1, 0x3fc0, 0x3fc0, 0x3fc0, 0x3fc0, 0x3fc0, 0x3fc0, 0x3fc0, 0x3fc0, 0x3fc0,
|
||||
0x3fbf, 0x3fbf, 0x3fbf, 0x3fbf, 0x3fbf, 0x3fbf, 0x3fbf, 0x3fbf, 0x3fbf, 0x3fbe, 0x3fbe, 0x3fbe,
|
||||
0x3fbe, 0x3fbe, 0x3fbe, 0x3fbe, 0x3fbe, 0x3fbc, 0x3fbc, 0x3fbc, 0x3fbc, 0x3fbc, 0x3fbc, 0x3fbc,
|
||||
0x3fbc, 0x3fbc, 0x3fbb, 0x3fbb, 0x3fbb, 0x3fbb, 0x3fbb, 0x3fbb, 0x3fbb, 0x3fbb, 0x3fba, 0x3fba,
|
||||
0x3fba, 0x3fba, 0x3fba, 0x3fba, 0x3fba, 0x3fba, 0x3fb9, 0x3fb9, 0x3fb9, 0x3fb9, 0x3fb9, 0x3fb9,
|
||||
0x3fb9, 0x3fb7, 0x3fb7, 0x3fb7, 0x3fb7, 0x3fb7, 0x3fb7, 0x3fb7, 0x3fb7, 0x3fb7, 0x3fb6, 0x3fb6,
|
||||
0x3fb6, 0x3fb6, 0x3fb6, 0x3fb6, 0x3fb6, 0x3fb5, 0x3fb5, 0x3fb5, 0x3fb5, 0x3fb5, 0x3fb5, 0x3fb5,
|
||||
0x3fb5, 0x3fb5, 0x3fb4, 0x3fb4, 0x3fb4, 0x3fb4, 0x3fb4, 0x3fb4, 0x3fb4, 0x3fb2, 0x3fb2, 0x3fb2,
|
||||
0x3fb2, 0x3fb2, 0x3fb2, 0x3fb2, 0x3fb2, 0x3fb1, 0x3fb1, 0x3fb1, 0x3fb1, 0x3fb1, 0x3fb1, 0x3fb0,
|
||||
0x3fb0, 0x3fb0, 0x3fb0, 0x3fb0, 0x3fb0, 0x3fb0, 0x3fb0, 0x3fb0, 0x3faf, 0x3faf, 0x3faf, 0x3faf,
|
||||
0x3faf, 0x3fad, 0x3fad, 0x3fad, 0x3fad, 0x3fad, 0x3fad, 0x3fad, 0x3fad, 0x3fac, 0x3fac, 0x3fac,
|
||||
0x3fac, 0x3fac, 0x3fac, 0x3fab, 0x3fab, 0x3fab, 0x3fab, 0x3fab, 0x3fab, 0x3fab, 0x3fab, 0x3faa,
|
||||
0x3faa, 0x3faa, 0x3faa, 0x3faa, 0x3faa, 0x3fa9, 0x3fa9, 0x3fa9, 0x3fa9, 0x3fa9, 0x3fa9, 0x3fa7,
|
||||
0x3fa7, 0x3fa7, 0x3fa7, 0x3fa7, 0x3fa6, 0x3fa6, 0x3fa6, 0x3fa6, 0x3fa6, 0x3fa6, 0x3fa6, 0x3fa6,
|
||||
0x3fa5, 0x3fa5, 0x3fa5, 0x3fa5, 0x3fa5, 0x3fa5, 0x3fa4, 0x3fa4, 0x3fa4, 0x3fa4, 0x3fa4, 0x3fa4,
|
||||
0x3fa3, 0x3fa3, 0x3fa3, 0x3fa3, 0x3fa3, 0x3fa1, 0x3fa1, 0x3fa1, 0x3fa1, 0x3fa1, 0x3fa1, 0x3fa1,
|
||||
0x3fa1, 0x3fa0, 0x3fa0, 0x3fa0, 0x3f9f, 0x3f9f, 0x3f9f, 0x3f9f, 0x3f9f, 0x3f9f, 0x3f9f, 0x3f9f,
|
||||
0x3f9e, 0x3f9e, 0x3f9e, 0x3f9e, 0x3f9d, 0x3f9d, 0x3f9d, 0x3f9d, 0x3f9d, 0x3f9d, 0x3f9d, 0x3f9d,
|
||||
0x3f9c, 0x3f9c, 0x3f9c, 0x3f9b, 0x3f9b, 0x3f9b, 0x3f9b, 0x3f9b, 0x3f9b, 0x3f9b, 0x3f99, 0x3f99,
|
||||
0x3f99, 0x3f98, 0x3f98, 0x3f98, 0x3f98, 0x3f98, 0x3f98, 0x3f97, 0x3f97, 0x3f97, 0x3f97, 0x3f97,
|
||||
0x3f96, 0x3f96, 0x3f96, 0x3f96, 0x3f96, 0x3f96, 0x3f96, 0x3f96, 0x3f95, 0x3f95, 0x3f94, 0x3f94,
|
||||
0x3f94, 0x3f94, 0x3f94, 0x3f94, 0x3f94, 0x3f93, 0x3f93, 0x3f93, 0x3f93, 0x3f93, 0x3f92, 0x3f92,
|
||||
0x3f91, 0x3f91, 0x3f91, 0x3f91, 0x3f91, 0x3f90, 0x3f90, 0x3f90, 0x3f90, 0x3f90, 0x3f90, 0x3f90,
|
||||
0x3f8f, 0x3f8f, 0x3f8f, 0x3f8e, 0x3f8e, 0x3f8e, 0x3f8e, 0x3f8e, 0x3f8d, 0x3f8d, 0x3f8d, 0x3f8c,
|
||||
0x3f8c, 0x3f8c, 0x3f8c, 0x3f8c, 0x3f8c, 0x3f8b, 0x3f8b, 0x3f8b, 0x3f8a, 0x3f8a, 0x3f8a, 0x3f8a,
|
||||
0x3f8a, 0x3f8a, 0x3f89, 0x3f89, 0x3f89, 0x3f88, 0x3f88, 0x3f88, 0x3f88, 0x3f88, 0x3f87, 0x3f87,
|
||||
0x3f87, 0x3f86, 0x3f86, 0x3f86, 0x3f86, 0x3f86, 0x3f86, 0x3f86, 0x3f85, 0x3f84, 0x3f84, 0x3f84,
|
||||
0x3f84, 0x3f84, 0x3f83, 0x3f83, 0x3f83, 0x3f83, 0x3f82, 0x3f82, 0x3f82, 0x3f82, 0x3f82, 0x3f81,
|
||||
0x3f81, 0x3f80, 0x3f80, 0x3f80, 0x3f80, 0x3f80, 0x3f7f, 0x3f7f, 0x3f7f, 0x3f7d, 0x3f7d, 0x3f7d,
|
||||
0x3f7d, 0x3f7b, 0x3f7b, 0x3f7b, 0x3f79, 0x3f79, 0x3f79, 0x3f79, 0x3f79, 0x3f78, 0x3f78, 0x3f76,
|
||||
0x3f76, 0x3f76, 0x3f76, 0x3f76, 0x3f74, 0x3f74, 0x3f74, 0x3f72, 0x3f72, 0x3f72, 0x3f72, 0x3f71,
|
||||
0x3f71, 0x3f71, 0x3f6f, 0x3f6f, 0x3f6f, 0x3f6f, 0x3f6f, 0x3f6c, 0x3f6c, 0x3f6c, 0x3f6c, 0x3f6c,
|
||||
0x3f6c, 0x3f6c, 0x3f69, 0x3f69, 0x3f69, 0x3f69, 0x3f69, 0x3f67, 0x3f67, 0x3f65, 0x3f65, 0x3f65,
|
||||
0x3f65, 0x3f65, 0x3f62, 0x3f62, 0x3f62, 0x3f62, 0x3f62, 0x3f61, 0x3f61, 0x3f61, 0x3f5f, 0x3f5f,
|
||||
0x3f5f, 0x3f5f, 0x3f5e, 0x3f5e, 0x3f5e, 0x3f5c, 0x3f5c, 0x3f5b, 0x3f5b, 0x3f5b, 0x3f59, 0x3f59,
|
||||
0x3f58, 0x3f58, 0x3f58, 0x3f57, 0x3f57, 0x3f57, 0x3f57, 0x3f57, 0x3f54, 0x3f54, 0x3f54, 0x3f54,
|
||||
0x3f51, 0x3f51, 0x3f51, 0x3f51, 0x3f51, 0x3f50, 0x3f50, 0x3f50, 0x3f4e, 0x3f4e, 0x3f4d, 0x3f4d,
|
||||
0x3f4d, 0x3f4c, 0x3f4c, 0x3f4c, 0x3f4c, 0x3f4c, 0x3f4a, 0x3f4a, 0x3f49, 0x3f49, 0x3f46, 0x3f46,
|
||||
0x3f46, 0x3f46, 0x3f46, 0x3f45, 0x3f45, 0x3f45, 0x3f44, 0x3f44, 0x3f41, 0x3f41, 0x3f41, 0x3f41,
|
||||
0x3f41, 0x3f40, 0x3f40, 0x3f40, 0x3f3e, 0x3f3e, 0x3f3e, 0x3f3e, 0x3f3c, 0x3f3c, 0x3f3c, 0x3f3c,
|
||||
0x3f3c, 0x3f3a, 0x3f3a, 0x3f3a, 0x3f39, 0x3f39, 0x3f36, 0x3f36, 0x3f36, 0x3f36, 0x3f36, 0x3f36,
|
||||
0x3f36, 0x3f34, 0x3f33, 0x3f33, 0x3f33, 0x3f33, 0x3f31, 0x3f31, 0x3f31, 0x3f30, 0x3f30, 0x3f30,
|
||||
0x3f30, 0x3f30, 0x3f2d, 0x3f2d, 0x3f2d, 0x3f2d, 0x3f2d, 0x3f2b, 0x3f2b, 0x3f2a, 0x3f2a, 0x3f2a,
|
||||
0x3f2a, 0x3f2a, 0x3f26, 0x3f26, 0x3f26, 0x3f26, 0x3f26, 0x3f26, 0x3f26, 0x3f25, 0x3f25, 0x3f25,
|
||||
0x3f23, 0x3f23, 0x3f21, 0x3f21, 0x3f21, 0x3f20, 0x3f20, 0x3f20, 0x3f20, 0x3f1e, 0x3f1e, 0x3f1e,
|
||||
0x3f1c, 0x3f1c, 0x3f1c, 0x3f1c, 0x3f1c, 0x3f1b, 0x3f1b, 0x3f19, 0x3f19, 0x3f19, 0x3f19, 0x3f19,
|
||||
0x3f17, 0x3f17, 0x3f17, 0x3f15, 0x3f15, 0x3f15, 0x3f15, 0x3f14, 0x3f14, 0x3f14, 0x3f12, 0x3f12,
|
||||
0x3f12, 0x3f12, 0x3f12, 0x3f10, 0x3f10, 0x3f0e, 0x3f0e, 0x3f0e, 0x3f0e, 0x3f0e, 0x3f0c, 0x3f0c,
|
||||
0x3f0c, 0x3f0c, 0x3f0a, 0x3f0a, 0x3f0a, 0x3f0a, 0x3f0a, 0x3f08, 0x3f07, 0x3f07, 0x3f07, 0x3f07,
|
||||
0x3f07, 0x3f07, 0x3f07, 0x3f05, 0x3f05, 0x3f05, 0x3f05, 0x3f05, 0x3f03, 0x3f03, 0x3f03, 0x3f01,
|
||||
0x3f01, 0x3f01, 0x3efe, 0x3efe, 0x3efe, 0x3efe, 0x3efe, 0x3efa, 0x3efa, 0x3efa, 0x3efa, 0x3ef6,
|
||||
0x3ef6, 0x3ef6, 0x3ef6, 0x3ef6, 0x3ef1, 0x3ef1, 0x3ef1, 0x3ef1, 0x3eed, 0x3eed, 0x3eed, 0x3eed,
|
||||
0x3eed, 0x3ee9, 0x3ee9, 0x3ee9, 0x3ee5, 0x3ee5, 0x3ee5, 0x3ee5, 0x3ee5, 0x3ee5, 0x3ee5, 0x3ee1,
|
||||
0x3ee1, 0x3ee1, 0x3edd, 0x3edd, 0x3edd, 0x3edd, 0x3edd, 0x3edd, 0x3edd, 0x3ed9, 0x3ed9, 0x3ed4,
|
||||
0x3ed4, 0x3ed4, 0x3ed4, 0x3ed4, 0x3ed4, 0x3ed0, 0x3ed0, 0x3ed0, 0x3ed0, 0x3ed0, 0x3ecc, 0x3ecc,
|
||||
0x3ecc, 0x3ecc, 0x3ecc, 0x3ecc, 0x3ecc, 0x3ec7, 0x3ec7, 0x3ec3, 0x3ec3, 0x3ec3, 0x3ec3, 0x3ec3,
|
||||
0x3ec3, 0x3ec3, 0x3ec3, 0x3ebe, 0x3ebe, 0x3ebe, 0x3ebe, 0x3ebe, 0x3eba, 0x3eba, 0x3eba, 0x3eba,
|
||||
0x3eba, 0x3eb5, 0x3eb5, 0x3eb5, 0x3eb5, 0x3eb1, 0x3eb1, 0x3eb1, 0x3eb1, 0x3eb1, 0x3eb1, 0x3eac,
|
||||
0x3eac, 0x3eac, 0x3eac, 0x3eac, 0x3ea8, 0x3ea8, 0x3ea8, 0x3ea8, 0x3ea8, 0x3ea8, 0x3ea8, 0x3ea8,
|
||||
0x3ea8, 0x3ea3, 0x3e9f, 0x3e9f, 0x3e9f, 0x3e9f, 0x3e9f, 0x3e9f, 0x3e9f, 0x3e9f, 0x3e9f, 0x3e9a,
|
||||
0x3e9a, 0x3e9a, 0x3e9a, 0x3e9a, 0x3e95, 0x3e95, 0x3e95, 0x3e95, 0x3e95, 0x3e95, 0x3e95, 0x3e91,
|
||||
0x3e91, 0x3e91, 0x3e91, 0x3e91, 0x3e8c, 0x3e8c, 0x3e8c, 0x3e8c, 0x3e8c, 0x3e8c, 0x3e8c, 0x3e8c,
|
||||
0x3e87, 0x3e87, 0x3e87, 0x3e87, 0x3e87, 0x3e82, 0x3e82, 0x3e82, 0x3e82, 0x3e82, 0x3e82, 0x3e7b,
|
||||
0x3e7b, 0x3e7b, 0x3e7b, 0x3e7b, 0x3e7b, 0x3e71, 0x3e71, 0x3e71, 0x3e71, 0x3e71, 0x3e71, 0x3e71,
|
||||
0x3e71, 0x3e71, 0x3e67, 0x3e67, 0x3e67, 0x3e67, 0x3e67, 0x3e5e, 0x3e5e, 0x3e5e, 0x3e5e, 0x3e5e,
|
||||
0x3e5e, 0x3e5e, 0x3e5e, 0x3e54, 0x3e54, 0x3e54, 0x3e54, 0x3e54, 0x3e4a, 0x3e4a, 0x3e4a, 0x3e4a,
|
||||
0x3e4a, 0x3e4a, 0x3e4a, 0x3e4a, 0x3e4a, 0x3e40, 0x3e40, 0x3e40, 0x3e40, 0x3e40, 0x3e40, 0x3e36,
|
||||
0x3e36, 0x3e36, 0x3e36, 0x3e36, 0x3e36, 0x3e36, 0x3e36, 0x3e36, 0x3e2c, 0x3e2c, 0x3e2c, 0x3e2c,
|
||||
0x3e2c, 0x3e2c, 0x3e22, 0x3e22, 0x3e22, 0x3e22, 0x3e22, 0x3e22, 0x3e22, 0x3e22, 0x3e18, 0x3e18,
|
||||
0x3e18, 0x3e18, 0x3e18, 0x3e18, 0x3e18, 0x3e18, 0x3e0e, 0x3e0e, 0x3e0e, 0x3e0e, 0x3e0e, 0x3e0e,
|
||||
0x3e0e, 0x3e0e, 0x3e04, 0x3e04, 0x3e04, 0x3e04, 0x3e04, 0x3e04, 0x3e04, 0x3e04, 0x3df5, 0x3df5,
|
||||
0x3df5, 0x3df5, 0x3df5, 0x3df5, 0x3df5, 0x3df5, 0x3de0, 0x3de0, 0x3de0, 0x3de0, 0x3de0, 0x3de0,
|
||||
0x3de0, 0x3de0, 0x3dcc, 0x3dcc, 0x3dcc, 0x3dcc, 0x3dcc, 0x3dcc, 0x3dcc, 0x3dcc, 0x3dcc, 0x3dcc,
|
||||
0x3db8, 0x3db8, 0x3db8, 0x3db8, 0x3db8, 0x3db8, 0x3db8, 0x3da3, 0x3da3, 0x3da3, 0x3da3, 0x3da3,
|
||||
0x3da3, 0x3da3, 0x3da3, 0x3da3, 0x3d8f, 0x3d8f, 0x3d8f, 0x3d8f, 0x3d8f, 0x3d8f, 0x3d8f, 0x3d8f,
|
||||
0x3d8f, 0x3d75, 0x3d75, 0x3d75, 0x3d75, 0x3d75, 0x3d75, 0x3d75, 0x3d75, 0x3d75, 0x3d4d, 0x3d4d,
|
||||
0x3d4d, 0x3d4d, 0x3d4d, 0x3d4d, 0x3d4d, 0x3d4d, 0x3d4d, 0x3d24, 0x3d24, 0x3d24, 0x3d24, 0x3d24,
|
||||
0x3d24, 0x3d24, 0x3d24, 0x3d24, 0x3d24, 0x3cf6, 0x3cf6, 0x3cf6, 0x3cf6, 0x3cf6, 0x3cf6, 0x3cf6,
|
||||
0x3cf6, 0x3cf6, 0x3cf6, 0x3ca4, 0x3ca4, 0x3ca4, 0x3ca4, 0x3ca4, 0x3ca4, 0x3ca4, 0x3ca4, 0x3ca4,
|
||||
0x3c24, 0x3c24, 0x3c24, 0x3c24, 0x3c24, 0x3c24, 0x3c24, 0x3c24, 0x3c24, 0x3c24, 0x0, 0x0,
|
||||
0x0, 0x0, 0x0,
|
||||
};
|
||||
|
||||
// <! gen atan2 f(y, x) = 2 * atan(y / (pow(x*x+y*y, 0.5) + x))
|
||||
static double _gen_atan2(float y, float x) { return atan2(y, x); }
|
||||
|
||||
static void tl_lut_ref(uint16_t *ofmap, uint16_t *ifmap, uint16_t *ifmap2,
|
||||
cvk_tl_shape_t ifmap_shape) {
|
||||
assert(ofmap);
|
||||
|
||||
uint32_t size = tl_shape_size(&ifmap_shape);
|
||||
if (mode == PRE_DATA_COMPARE_FIX) {
|
||||
size = sizeof(golden_bf16) / sizeof(golden_bf16[0]);
|
||||
}
|
||||
|
||||
for (uint32_t i = 0; i < size; i++) {
|
||||
float y = convert_bf16_fp32(ifmap2[i]);
|
||||
float x = convert_bf16_fp32(ifmap[i]);
|
||||
double v = _gen_atan2(y, x);
|
||||
ofmap[i] = convert_fp32_bf16(v);
|
||||
|
||||
if (mode == PRE_DATA_COMPARE_FIX) {
|
||||
ofmap[i] = golden_bf16[i];
|
||||
} else if (mode == DATA_COMPARE_U8) {
|
||||
ofmap[i] = (uint8_t)convert_bf16_s8(ofmap[i]);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static bool verify(uint16_t *ofmap_data, uint16_t *ref_data, uint16_t *ifmap, uint16_t *ifmap2,
|
||||
uint64_t ifmap_size, float epsilon) {
|
||||
uint64_t size = ifmap_size;
|
||||
if (mode == PRE_DATA_COMPARE_FIX) {
|
||||
size = sizeof(golden_bf16) / sizeof(golden_bf16[0]);
|
||||
}
|
||||
|
||||
int tolerant_max = 20;
|
||||
tolerant_max = -1;
|
||||
int tolerant_cnt = 0;
|
||||
for (uint64_t i = 0; i < size; i++) {
|
||||
bool is_close;
|
||||
uint16_t ref = ref_data[i];
|
||||
uint16_t ofmap_data_bf16;
|
||||
float ref_f;
|
||||
float ofmap_data_f;
|
||||
|
||||
ref_f = convert_bf16_fp32(ref);
|
||||
ofmap_data_f = convert_bf16_fp32(ofmap_data[i]);
|
||||
ofmap_data_bf16 = ofmap_data[i];
|
||||
|
||||
if (mode == PRE_DATA_COMPARE_FIX) {
|
||||
is_close = ofmap_data[i] == ref;
|
||||
} else {
|
||||
is_close = fabs(ref_f - ofmap_data_f) < epsilon;
|
||||
}
|
||||
|
||||
if (!is_close) {
|
||||
float y = convert_bf16_fp32(ifmap2[i]);
|
||||
float x = convert_bf16_fp32(ifmap[i]);
|
||||
fprintf(stderr,
|
||||
"comparing failed at ofmap_data[%lu]\n"
|
||||
"\tgot %x, exp %x, fp32: got %f exp %f, atan2(%f, %f) = %f"
|
||||
"\ty %f(0x%x), x %f(0x%x)\n",
|
||||
i, ofmap_data_bf16, ref, ofmap_data_f, ref_f, y, x, _gen_atan2(y, x), y, ifmap2[i], x,
|
||||
ifmap[i]);
|
||||
|
||||
if (tolerant_cnt++ >= tolerant_max) {
|
||||
exit(-1);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
static void _gen_input(uint16_t *input_data, uint64_t ifmap_size, int range_start, int range_end) {
|
||||
std::random_device rd;
|
||||
std::mt19937 e2(rd());
|
||||
std::uniform_real_distribution<> dist(range_start, range_end);
|
||||
|
||||
float LO = pow(2, range_start);
|
||||
float HI = pow(2, range_end);
|
||||
for (uint64_t i = 0; i < ifmap_size; i++) {
|
||||
// input range is -8 ~ +8
|
||||
int table_hw = 256;
|
||||
float input =
|
||||
((int)i % (range_end - 2)) * (((int)i % 2) ? 1 : -1) + 0.03 + (i % table_hw) * 0.002;
|
||||
input = ((int)i % (range_end - 2)) * (((int)i % 2) ? 1 : 1) + 0.03 + (i % table_hw) * 0.002;
|
||||
input_data[i] = convert_fp32_bf16(input);
|
||||
input = dist(e2);
|
||||
input = LO + static_cast<float>(rand()) / (static_cast<float>(RAND_MAX / (HI - LO)));
|
||||
}
|
||||
}
|
||||
|
||||
static void gen_input(uint16_t *x, uint16_t *y, uint64_t ifmap_size, TEST_MODE mode,
|
||||
int range_start, int range_end) {
|
||||
if (mode == PRE_DATA_COMPARE_FIX) {
|
||||
memcpy(x, &test_pattern, sizeof(test_pattern));
|
||||
} else {
|
||||
range_start = abs(range_start);
|
||||
range_end = abs(range_end);
|
||||
_gen_input(x, ifmap_size, range_start, range_end);
|
||||
}
|
||||
|
||||
// invert for test
|
||||
for (uint64_t i = 0; i < ifmap_size; i++) {
|
||||
y[i] = x[(ifmap_size - 1) - i];
|
||||
}
|
||||
|
||||
if (mode == DATA_COMPARE_ACCURACY_X_GT_0) {
|
||||
// y = any
|
||||
uint32_t i = 0;
|
||||
for (; i < ifmap_size / 4; i++) {
|
||||
// y < 0
|
||||
y[i] = convert_fp32_bf16(-1 * convert_fp32_bf16(y[i]));
|
||||
y[i + ifmap_size / 4] = convert_fp32_bf16(0);
|
||||
}
|
||||
} else if (mode == DATA_COMPARE_ACCURACY_X_LT_0_Y_GE_0) {
|
||||
// x < 0 and y >= 0
|
||||
for (uint32_t i = 0; i < ifmap_size; i++) {
|
||||
x[i] = convert_fp32_bf16(-1 * convert_fp32_bf16(x[i]));
|
||||
}
|
||||
|
||||
for (uint32_t i = 0; i < ifmap_size / 4; i++) {
|
||||
y[i + ifmap_size / 4] = convert_fp32_bf16(0);
|
||||
}
|
||||
} else if (mode == DATA_COMPARE_ACCURACY_X_LT_0_Y_LT_0) {
|
||||
// x < 0 and y < 0
|
||||
for (uint32_t i = 0; i < ifmap_size; i++) {
|
||||
x[i] = convert_fp32_bf16(-1 * convert_fp32_bf16(x[i]));
|
||||
y[i] = convert_fp32_bf16(-1 * convert_fp32_bf16(y[i]));
|
||||
}
|
||||
} else if (mode == DATA_COMPARE_ACCURACY_X_0_Y_GT_0) {
|
||||
// pi / 2, x = 0 and y > 0
|
||||
for (uint32_t i = 0; i < ifmap_size; i++) {
|
||||
x[i] = convert_fp32_bf16(0);
|
||||
}
|
||||
} else if (mode == DATA_COMPARE_ACCURACY_X_0_Y_LT_0) {
|
||||
// -pi / 2, x = 0 and y < 0
|
||||
for (uint32_t i = 0; i < ifmap_size; i++) {
|
||||
x[i] = convert_fp32_bf16(0);
|
||||
y[i] = convert_fp32_bf16(-1 * convert_fp32_bf16(y[i]));
|
||||
}
|
||||
}
|
||||
|
||||
#if 1
|
||||
|
||||
if (mode != PRE_DATA_COMPARE_FIX) {
|
||||
int i = 0;
|
||||
x[i] = convert_fp32_bf16(0);
|
||||
y[i++] = convert_fp32_bf16(1.394531);
|
||||
x[i] = convert_fp32_bf16(0);
|
||||
y[i++] = convert_fp32_bf16(0.394531);
|
||||
x[i] = convert_fp32_bf16(0);
|
||||
y[i++] = convert_fp32_bf16(0.594531);
|
||||
x[i] = convert_fp32_bf16(-10.0);
|
||||
y[i++] = convert_fp32_bf16(6.0);
|
||||
x[i] = convert_fp32_bf16(1.0);
|
||||
y[i++] = convert_fp32_bf16(-1.);
|
||||
x[i] = convert_fp32_bf16(-1.0);
|
||||
y[i++] = convert_fp32_bf16(1.);
|
||||
x[i] = convert_fp32_bf16(0.111816);
|
||||
y[i++] = convert_fp32_bf16(0);
|
||||
x[i] = convert_fp32_bf16(2.031250);
|
||||
y[i++] = convert_fp32_bf16(0.0);
|
||||
x[i] = convert_fp32_bf16(-2.031250);
|
||||
x[i] = convert_fp32_bf16(0);
|
||||
y[i++] = convert_fp32_bf16(-1.394531);
|
||||
y[i++] = convert_fp32_bf16(0.0);
|
||||
x[i] = convert_fp32_bf16(0);
|
||||
y[i++] = convert_fp32_bf16(-6.0);
|
||||
x[i] = convert_fp32_bf16(0);
|
||||
y[i++] = convert_fp32_bf16(-0.394531);
|
||||
x[i] = convert_fp32_bf16(0);
|
||||
y[i++] = convert_fp32_bf16(-0.594531);
|
||||
x[i] = convert_fp32_bf16(0);
|
||||
y[i++] = convert_fp32_bf16(0.0);
|
||||
x[i] = convert_fp32_bf16(-8);
|
||||
y[i++] = convert_fp32_bf16(0);
|
||||
x[i] = convert_fp32_bf16(0);
|
||||
y[i++] = convert_fp32_bf16(3.0);
|
||||
x[i] = convert_fp32_bf16(-1.0);
|
||||
y[i++] = convert_fp32_bf16(-5.0);
|
||||
x[i] = convert_fp32_bf16(-2.484375);
|
||||
y[i++] = convert_fp32_bf16(-7.531250);
|
||||
x[i++] = convert_fp32_bf16(-125.000000);
|
||||
y[i] = convert_fp32_bf16(5.000000);
|
||||
x[i++] = convert_fp32_bf16(-8.000000);
|
||||
y[i] = convert_fp32_bf16(1.000000);
|
||||
x[i++] = convert_fp32_bf16(19.000000);
|
||||
y[i] = convert_fp32_bf16(1.070312);
|
||||
x[i++] = convert_fp32_bf16(0.498046);
|
||||
y[i] = convert_fp32_bf16(0.000000);
|
||||
x[i] = convert_fp32_bf16(424.000);
|
||||
y[i++] = convert_fp32_bf16(-1.00);
|
||||
x[i] = convert_fp32_bf16(2.484375);
|
||||
y[i++] = convert_fp32_bf16(-7.531250);
|
||||
x[i] = convert_fp32_bf16(0);
|
||||
y[i++] = convert_fp32_bf16(7.531250);
|
||||
|
||||
x[i] = convert_fp32_bf16(0);
|
||||
y[i++] = convert_fp32_bf16(-7.531250);
|
||||
|
||||
x[i] = convert_fp32_bf16(0);
|
||||
y[i++] = convert_fp32_bf16(0.394531);
|
||||
y[i] = convert_fp32_bf16(-4.000000);
|
||||
x[i++] = convert_fp32_bf16(-64.000000);
|
||||
y[i] = convert_fp32_bf16(0.000000);
|
||||
x[i++] = convert_fp32_bf16(-4.000000);
|
||||
y[i] = convert_fp32_bf16(0.000000);
|
||||
x[i++] = convert_fp32_bf16(-40.000000);
|
||||
y[i] = convert_fp32_bf16(1.000000);
|
||||
x[i++] = convert_fp32_bf16(-53.000000);
|
||||
y[i] = convert_fp32_bf16(-9.000000);
|
||||
x[i++] = convert_fp32_bf16(-91.000000);
|
||||
y[i] = convert_fp32_bf16(12.000000);
|
||||
x[i++] = convert_fp32_bf16(-164.000000);
|
||||
y[i] = convert_fp32_bf16(-20.000000);
|
||||
x[i++] = convert_fp32_bf16(-320.000000);
|
||||
y[i] = convert_fp32_bf16(1.000000);
|
||||
x[i++] = convert_fp32_bf16(-71.000000);
|
||||
y[i] = convert_fp32_bf16(1.000000);
|
||||
x[i++] = convert_fp32_bf16(-155.000000);
|
||||
y[i] = convert_fp32_bf16(1.000000);
|
||||
x[i++] = convert_fp32_bf16(-247.000000);
|
||||
y[i] = convert_fp32_bf16(-2.000000);
|
||||
x[i++] = convert_fp32_bf16(-118.000000);
|
||||
y[i] = convert_fp32_bf16(-2.000000);
|
||||
x[i++] = convert_fp32_bf16(-54.000000);
|
||||
y[i] = convert_fp32_bf16(-5.000000);
|
||||
x[i++] = convert_fp32_bf16(-392.000000);
|
||||
y[i] = convert_fp32_bf16(-37.000000);
|
||||
x[i++] = convert_fp32_bf16(-520.000000);
|
||||
y[i] = convert_fp32_bf16(-1.000000);
|
||||
x[i++] = convert_fp32_bf16(-19.000000);
|
||||
y[i] = convert_fp32_bf16(0.000000);
|
||||
x[i++] = convert_fp32_bf16(-10.000000);
|
||||
y[i] = convert_fp32_bf16(0.000000);
|
||||
x[i++] = convert_fp32_bf16(-8.000000);
|
||||
y[i] = convert_fp32_bf16(0.000000);
|
||||
x[i++] = convert_fp32_bf16(-2.000000);
|
||||
y[i] = convert_fp32_bf16(0.000000);
|
||||
x[i++] = convert_fp32_bf16(-14.000000);
|
||||
y[i] = convert_fp32_bf16(0.000000);
|
||||
x[i++] = convert_fp32_bf16(-2.000000);
|
||||
y[i] = convert_fp32_bf16(0.000000);
|
||||
x[i++] = convert_fp32_bf16(-6.000000);
|
||||
y[i] = convert_fp32_bf16(-1.000000);
|
||||
x[i++] = convert_fp32_bf16(-21.000000);
|
||||
y[i] = convert_fp32_bf16(0.000000);
|
||||
x[i++] = convert_fp32_bf16(-14.000000);
|
||||
y[i] = convert_fp32_bf16(-1.000000);
|
||||
x[i++] = convert_fp32_bf16(-17.000000);
|
||||
y[i] = convert_fp32_bf16(-1.000000);
|
||||
x[i++] = convert_fp32_bf16(-17.000000);
|
||||
y[i] = convert_fp32_bf16(0.000000);
|
||||
x[i++] = convert_fp32_bf16(-8.000000);
|
||||
y[i] = convert_fp32_bf16(0.000000);
|
||||
x[i++] = convert_fp32_bf16(-4.000000);
|
||||
y[i] = convert_fp32_bf16(0.000000);
|
||||
x[i++] = convert_fp32_bf16(-10.000000);
|
||||
y[i] = convert_fp32_bf16(0.000000);
|
||||
x[i++] = convert_fp32_bf16(-8.000000);
|
||||
y[i] = convert_fp32_bf16(0.000000);
|
||||
x[i++] = convert_fp32_bf16(-14.000000);
|
||||
y[i] = convert_fp32_bf16(0.000000);
|
||||
x[i++] = convert_fp32_bf16(-4.000000);
|
||||
y[i] = convert_fp32_bf16(0.000000);
|
||||
x[i++] = convert_fp32_bf16(-2.000000);
|
||||
y[i] = convert_fp32_bf16(1.000000);
|
||||
x[i++] = convert_fp32_bf16(-41.000000);
|
||||
y[i] = convert_fp32_bf16(1.000000);
|
||||
x[i++] = convert_fp32_bf16(-69.000000);
|
||||
y[i] = convert_fp32_bf16(4.000000);
|
||||
x[i++] = convert_fp32_bf16(-86.000000);
|
||||
y[i] = convert_fp32_bf16(1.000000);
|
||||
x[i++] = convert_fp32_bf16(-41.000000);
|
||||
y[i] = convert_fp32_bf16(-2.000000);
|
||||
x[i++] = convert_fp32_bf16(-34.000000);
|
||||
y[i] = convert_fp32_bf16(0.000000);
|
||||
x[i++] = convert_fp32_bf16(-6.000000);
|
||||
y[i] = convert_fp32_bf16(1.000000);
|
||||
x[i++] = convert_fp32_bf16(-41.000000);
|
||||
y[i] = convert_fp32_bf16(0.000000);
|
||||
x[i++] = convert_fp32_bf16(-136.000000);
|
||||
y[i] = convert_fp32_bf16(-3.000000);
|
||||
x[i++] = convert_fp32_bf16(-79.000000);
|
||||
y[i] = convert_fp32_bf16(0.000000);
|
||||
x[i++] = convert_fp32_bf16(-38.000000);
|
||||
y[i] = convert_fp32_bf16(5.000000);
|
||||
x[i++] = convert_fp32_bf16(-173.000000);
|
||||
y[i] = convert_fp32_bf16(0.000000);
|
||||
x[i++] = convert_fp32_bf16(-78.000000);
|
||||
y[i] = convert_fp32_bf16(-2.000000);
|
||||
x[i++] = convert_fp32_bf16(-60.000000);
|
||||
y[i] = convert_fp32_bf16(3.000000);
|
||||
x[i++] = convert_fp32_bf16(-123.000000);
|
||||
y[i] = convert_fp32_bf16(-9.000000);
|
||||
x[i++] = convert_fp32_bf16(-280.000000);
|
||||
y[i] = convert_fp32_bf16(3.000000);
|
||||
x[i++] = convert_fp32_bf16(-39.000000);
|
||||
y[i] = convert_fp32_bf16(2.000000);
|
||||
x[i++] = convert_fp32_bf16(-524.000000);
|
||||
y[i] = convert_fp32_bf16(11.000000);
|
||||
x[i++] = convert_fp32_bf16(-376.000000);
|
||||
y[i] = convert_fp32_bf16(5.000000);
|
||||
x[i++] = convert_fp32_bf16(-131.000000);
|
||||
y[i] = convert_fp32_bf16(11.000000);
|
||||
x[i++] = convert_fp32_bf16(-324.000000);
|
||||
y[i] = convert_fp32_bf16(9.000000);
|
||||
x[i++] = convert_fp32_bf16(-125.000000);
|
||||
y[i] = convert_fp32_bf16(0.000000);
|
||||
x[i++] = convert_fp32_bf16(-92.000000);
|
||||
y[i] = convert_fp32_bf16(-7.000000);
|
||||
x[i++] = convert_fp32_bf16(-233.000000);
|
||||
y[i] = convert_fp32_bf16(10.000000);
|
||||
x[i++] = convert_fp32_bf16(-170.000000);
|
||||
y[i] = convert_fp32_bf16(0.000000);
|
||||
x[i++] = convert_fp32_bf16(-4.000000);
|
||||
y[i] = convert_fp32_bf16(0.000000);
|
||||
x[i++] = convert_fp32_bf16(-4.000000);
|
||||
y[i] = convert_fp32_bf16(0.000000);
|
||||
x[i++] = convert_fp32_bf16(-10.000000);
|
||||
y[i] = convert_fp32_bf16(-1.000000);
|
||||
x[i++] = convert_fp32_bf16(-23.000000);
|
||||
y[i] = convert_fp32_bf16(0.000000);
|
||||
x[i++] = convert_fp32_bf16(-6.000000);
|
||||
y[i] = convert_fp32_bf16(0.000000);
|
||||
x[i++] = convert_fp32_bf16(-6.000000);
|
||||
y[i] = convert_fp32_bf16(-3.000000);
|
||||
x[i++] = convert_fp32_bf16(-37.000000);
|
||||
|
||||
y[i] = convert_fp32_bf16(-9);
|
||||
x[i++] = convert_fp32_bf16(-1);
|
||||
|
||||
y[i] = convert_fp32_bf16(7.0);
|
||||
x[i++] = convert_fp32_bf16(-1);
|
||||
|
||||
y[i] = convert_fp32_bf16(0);
|
||||
x[i++] = convert_fp32_bf16(-1);
|
||||
}
|
||||
#else
|
||||
for (uint64_t i = 0; i < ifmap_size; i++) {
|
||||
x[i] = convert_fp32_bf16(5.375000);
|
||||
y[i] = convert_fp32_bf16(2.203125);
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifdef DBG
|
||||
for (uint64_t i = 0; i < ifmap_size; i++) {
|
||||
printf("source[%ld] y %f x %f\n", i, convert_bf16_fp32(y[i]), convert_bf16_fp32(x[i]));
|
||||
}
|
||||
#endif /* ifdef DBG */
|
||||
}
|
||||
|
||||
static void testbench(CVI_RT_HANDLE *ctx, cvk_context_t *bmk) {
|
||||
// TODO: check more shape / align
|
||||
cvk_chip_info_t chip_info = bmk->info;
|
||||
|
||||
uint32_t input_n = 1;
|
||||
uint32_t input_c = chip_info.npu_num;
|
||||
uint32_t input_h = 16;
|
||||
uint32_t input_w = 16;
|
||||
float epsilon = 0.1;
|
||||
int range_start = -8;
|
||||
int range_end = 8;
|
||||
|
||||
if (mode == PRE_DATA_COMPARE_FIX) {
|
||||
input_h = 4;
|
||||
input_w = 8;
|
||||
}
|
||||
|
||||
cvk_fmt_t fmt = CVK_FMT_BF16;
|
||||
|
||||
cvk_tl_shape_t ifmap_shape = {input_n, input_c, input_h, input_w};
|
||||
cvk_tl_shape_t ofmap_shape = ifmap_shape;
|
||||
|
||||
// get lut table shape and size
|
||||
cvk_tl_shape_t table_shape;
|
||||
uint64_t table_bytesize = cvm_lut_tbl_bytesize(bmk, &table_shape, fmt);
|
||||
|
||||
// get input / output size
|
||||
uint64_t ifmap_size = tl_shape_size(&ifmap_shape);
|
||||
uint64_t ofmap_size = tl_shape_size(&ofmap_shape);
|
||||
int data_type_size = bytesize_of_fmt(fmt);
|
||||
uint64_t ifmap_bytesize = ifmap_size * data_type_size;
|
||||
|
||||
uint64_t ofmap_bytesize = ofmap_size * data_type_size;
|
||||
if (mode == PRE_DATA_COMPARE_FIX) {
|
||||
ofmap_bytesize = sizeof(golden_bf16) / sizeof(golden_bf16[0]) * data_type_size;
|
||||
}
|
||||
|
||||
// atan2 was two inputs
|
||||
cvk_tl_t *tl_ifmap = test_alloc_tl(bmk, ifmap_shape, fmt, /*align*/ 1);
|
||||
cvk_tl_t *tl_ifmap2 = test_alloc_tl(bmk, ifmap_shape, fmt, /*align*/ 1);
|
||||
cvk_tl_t *tl_ofmap_bf16 = test_alloc_tl(bmk, ofmap_shape, fmt, /*align*/ 1);
|
||||
cvk_tl_t *out = tl_ofmap_bf16;
|
||||
|
||||
// atan buf
|
||||
cvk_tl_t *tl_y0_buf = test_alloc_tl(bmk, table_shape, fmt, /*align*/ 1);
|
||||
cvk_tl_t *tl_invert_buf = test_alloc_tl(bmk, table_shape, fmt, /*align*/ 1);
|
||||
cvk_tl_t *tl_pos_neg_buf = test_alloc_tl(bmk, table_shape, fmt, /*align*/ 1);
|
||||
|
||||
// reciprocal buf
|
||||
cvk_tl_t *tl_reciprocal_table_answer = test_alloc_tl(bmk, table_shape, fmt, /*align*/ 1);
|
||||
cvk_tl_t *tl_reciprocal_table_answer_mantissa = test_alloc_tl(bmk, table_shape, fmt, /*align*/ 1);
|
||||
|
||||
// temp buf
|
||||
cvk_tl_t *tl_buf = test_alloc_tl(bmk, ofmap_shape, fmt, /*align*/ 1);
|
||||
cvk_tl_t *tl_buf2 = test_alloc_tl(bmk, tl_ofmap_bf16->shape, fmt, /*align*/ 1);
|
||||
cvk_tl_t *tl_buf3 = test_alloc_tl(bmk, tl_ofmap_bf16->shape, fmt, /*align*/ 1);
|
||||
|
||||
uint16_t *input_data = (uint16_t *)xmalloc(ifmap_bytesize);
|
||||
uint16_t *input_data2 = (uint16_t *)xmalloc(ifmap_bytesize);
|
||||
uint16_t *ref_data = (uint16_t *)xmalloc(ofmap_bytesize);
|
||||
|
||||
// for reciprocal
|
||||
uint16_t *table_reciprocal_data = (uint16_t *)xmalloc(table_bytesize);
|
||||
uint16_t *table_reciprocal_data_mantissa = (uint16_t *)xmalloc(table_bytesize);
|
||||
|
||||
// for atan
|
||||
uint16_t *table_data_atan_y0 = (uint16_t *)xmalloc(table_bytesize);
|
||||
uint16_t *table_data_atan_invert = (uint16_t *)xmalloc(table_bytesize);
|
||||
uint16_t *table_data_atan_pos_neg = (uint16_t *)xmalloc(table_bytesize);
|
||||
|
||||
// for search '0' index
|
||||
uint16_t *idx_0_table_data = (uint16_t *)xmalloc(table_bytesize);
|
||||
|
||||
// init input / ref
|
||||
// input_data is x, input_data2 is y
|
||||
gen_input(input_data, input_data2, ifmap_size, mode, range_start, range_end);
|
||||
tl_lut_ref(ref_data, input_data, input_data2, ifmap_shape);
|
||||
|
||||
// init lut table
|
||||
cvm_reciprocal_tbl(table_reciprocal_data, table_reciprocal_data_mantissa, &table_shape);
|
||||
cvm_atan_tbl(table_data_atan_y0, NULL, table_data_atan_invert, table_data_atan_pos_neg,
|
||||
&table_shape);
|
||||
cvm_gen_0_tbl(idx_0_table_data, &table_shape);
|
||||
|
||||
// sys->local
|
||||
test_put_tensor_g2l_comp(ctx, bmk, tl_ifmap, (uint8_t *)input_data);
|
||||
test_put_tensor_g2l_comp(ctx, bmk, tl_ifmap2, (uint8_t *)input_data2);
|
||||
test_put_tensor_g2l_comp(ctx, bmk, tl_reciprocal_table_answer, (uint8_t *)table_reciprocal_data);
|
||||
test_put_tensor_g2l_comp(ctx, bmk, tl_reciprocal_table_answer_mantissa,
|
||||
(uint8_t *)table_reciprocal_data_mantissa);
|
||||
|
||||
test_put_tensor_g2l_comp(ctx, bmk, tl_y0_buf, (uint8_t *)table_data_atan_y0);
|
||||
test_put_tensor_g2l_comp(ctx, bmk, tl_invert_buf, (uint8_t *)table_data_atan_invert);
|
||||
test_put_tensor_g2l_comp(ctx, bmk, tl_pos_neg_buf, (uint8_t *)table_data_atan_pos_neg);
|
||||
|
||||
cvm_atan2_merge_emit(bmk, tl_ifmap2, tl_ifmap, tl_buf, tl_buf2, tl_buf3, tl_y0_buf, tl_invert_buf,
|
||||
tl_pos_neg_buf, tl_reciprocal_table_answer,
|
||||
tl_reciprocal_table_answer_mantissa, OUT tl_ofmap_bf16, fmt);
|
||||
|
||||
uint16_t *ofmap_data = (uint16_t *)test_get_tensor_l2g_comp(ctx, bmk, out);
|
||||
verify(ofmap_data, ref_data, input_data, input_data2, ifmap_size, epsilon);
|
||||
|
||||
free_tl(bmk, tl_buf3);
|
||||
free_tl(bmk, tl_buf2);
|
||||
free_tl(bmk, tl_buf);
|
||||
free_tl(bmk, tl_reciprocal_table_answer_mantissa);
|
||||
free_tl(bmk, tl_reciprocal_table_answer);
|
||||
free_tl(bmk, tl_pos_neg_buf);
|
||||
free_tl(bmk, tl_invert_buf);
|
||||
free_tl(bmk, tl_y0_buf);
|
||||
free_tl(bmk, tl_ofmap_bf16);
|
||||
free_tl(bmk, tl_ifmap2);
|
||||
free_tl(bmk, tl_ifmap);
|
||||
|
||||
free(idx_0_table_data);
|
||||
free(table_data_atan_y0);
|
||||
free(table_data_atan_invert);
|
||||
free(table_data_atan_pos_neg);
|
||||
free(table_reciprocal_data);
|
||||
free(table_reciprocal_data_mantissa);
|
||||
free(input_data);
|
||||
free(ref_data);
|
||||
free(ofmap_data);
|
||||
free(input_data2);
|
||||
}
|
||||
|
||||
int main() {
|
||||
CVI_RT_HANDLE ctx;
|
||||
cvk_context_t *bmk;
|
||||
int round_mode;
|
||||
|
||||
round_mode = set_store_feround();
|
||||
|
||||
test_init(&ctx, &bmk);
|
||||
|
||||
// for (int i = PRE_DATA_COMPARE_FIX; i < TEST_MODE_MAX; i++)
|
||||
// for (int i = PRE_DATA_COMPARE_FIX; i < DATA_COMPARE_ACCURACY; i++) {
|
||||
// for (int i = DATA_COMPARE_ACCURACY; i < DATA_COMPARE_U8; i++) {
|
||||
// for (int i = DATA_COMPARE_ACCURACY; i < DATA_COMPARE_ACCURACY_X_GT_0; i++)
|
||||
// {
|
||||
for (int i = PRE_DATA_COMPARE_FIX; i < DATA_COMPARE_U8; i++) {
|
||||
mode = static_cast<TEST_MODE>(i);
|
||||
printf("test mode %d...\n", mode);
|
||||
testbench(&ctx, bmk);
|
||||
}
|
||||
printf("pass\n");
|
||||
|
||||
test_exit(&ctx, bmk);
|
||||
restore_feround(round_mode);
|
||||
return 0;
|
||||
}
|
||||
148
cvimath/tests/cvi1835/bf16_fp32.cpp
Normal file
148
cvimath/tests/cvi1835/bf16_fp32.cpp
Normal file
@ -0,0 +1,148 @@
|
||||
// \file mask sample for gt(great than), ge(great equal), eq(equal), lt(less than), le(less equal)
|
||||
|
||||
// header include
|
||||
#include <assert.h>
|
||||
#include <cvimath_internal.h> // math
|
||||
#include <test_cvikernel_util.h> // kerenl
|
||||
|
||||
void init_input(uint16_t *input_data, uint64_t ifmap_size) {
|
||||
for (uint64_t i = 0; i < ifmap_size; i++) {
|
||||
input_data[i] = convert_fp32_bf16(i * 1.0);
|
||||
}
|
||||
}
|
||||
|
||||
void init_ref(uint16_t *input_data, uint32_t *ref_data, uint64_t ifmap_size) {
|
||||
union s {
|
||||
uint16_t int16[2]; // big endian
|
||||
uint32_t int32;
|
||||
};
|
||||
union s _s;
|
||||
for (uint64_t i = 0; i < ifmap_size; i++) {
|
||||
_s.int16[0] = 0;
|
||||
_s.int16[1] = input_data[i];
|
||||
ref_data[i] = _s.int32;
|
||||
}
|
||||
}
|
||||
|
||||
static void testbench(CVI_RT_HANDLE *rt_ctx, cvk_context_t *cvk_ctx,
|
||||
cvk_tg_shape_t *bf16_tg_shape) {
|
||||
// for calculate size we need in host
|
||||
cvk_tl_shape_t ifmap_shape = {bf16_tg_shape->n, bf16_tg_shape->c, bf16_tg_shape->h,
|
||||
bf16_tg_shape->w};
|
||||
|
||||
// * 2 means fp32 takes twice size of bf16
|
||||
cvk_tl_shape_t ofmap_shape = {bf16_tg_shape->n, bf16_tg_shape->c, bf16_tg_shape->h,
|
||||
bf16_tg_shape->w * 2};
|
||||
|
||||
uint64_t ifmap_size = tl_shape_size(&ifmap_shape);
|
||||
uint64_t ofmap_size = tl_shape_size(&ofmap_shape);
|
||||
|
||||
// unit size is 1 bytes, bf16 takes 2 bytes
|
||||
int data_type_size = 2;
|
||||
|
||||
uint64_t ifmap_bytesize = ifmap_size * data_type_size;
|
||||
|
||||
// * 2 means fp32 takes twice size of bf16
|
||||
uint64_t ofmap_bytesize = ofmap_size * data_type_size * 2;
|
||||
|
||||
uint8_t *input_data = (uint8_t *)xmalloc(ifmap_bytesize);
|
||||
uint8_t *ref_data = (uint8_t *)xmalloc(ofmap_bytesize);
|
||||
|
||||
// init input / output data in ddr
|
||||
init_input((uint16_t *)input_data, ifmap_size);
|
||||
init_ref((uint16_t *)input_data, (uint32_t *)ref_data, ifmap_size);
|
||||
|
||||
// send host memory->device memory
|
||||
cvk_fmt_t fmt = CVK_FMT_BF16;
|
||||
cvk_tg_shape_t fp32_tg_shape;
|
||||
fp32_tg_shape = {ofmap_shape.n, ofmap_shape.c, ofmap_shape.h, ofmap_shape.w};
|
||||
|
||||
cvk_tg_t *bf16_tg = test_alloc_tg_mem_comp(rt_ctx, cvk_ctx, *bf16_tg_shape, fmt);
|
||||
assert(bf16_tg && "alloc bf16 fail");
|
||||
|
||||
test_put_tg_mem_comp(rt_ctx, bf16_tg, (uint8_t *)input_data);
|
||||
|
||||
cvk_tg_t *fp32_tg = test_alloc_tg_mem_comp(rt_ctx, cvk_ctx, fp32_tg_shape, fmt);
|
||||
assert(bf16_tg && "alloc fp32 fail");
|
||||
|
||||
// prepare command buffer
|
||||
cvm_bf16_fp32(cvk_ctx, bf16_tg, fp32_tg);
|
||||
|
||||
// submit descriptor
|
||||
test_submit_comp(rt_ctx, cvk_ctx);
|
||||
|
||||
// get data from tl
|
||||
uint8_t *ofmap_data = test_get_tg_mem_comp(rt_ctx, fp32_tg);
|
||||
|
||||
// compare with reference with byte
|
||||
for (uint32_t i = 0; i < ofmap_size; i++) {
|
||||
if (ref_data[i] != ofmap_data[i]) {
|
||||
fprintf(stderr, "comparing failed output[%u] got %u, ref %u\n", i, ofmap_data[i],
|
||||
ref_data[i]);
|
||||
// fail case
|
||||
exit(-1);
|
||||
}
|
||||
}
|
||||
|
||||
// free resource from tpu memory
|
||||
test_free_tg_mem_comp(rt_ctx, bf16_tg);
|
||||
test_free_tg_mem_comp(rt_ctx, fp32_tg);
|
||||
|
||||
// free resource from host memory
|
||||
free(input_data);
|
||||
free(ref_data);
|
||||
free(ofmap_data);
|
||||
}
|
||||
|
||||
int main() {
|
||||
CVI_RT_HANDLE rt_ctx;
|
||||
cvk_context_t *cvk_ctx;
|
||||
int round_mode;
|
||||
|
||||
// align kerenl rounding mode
|
||||
round_mode = set_store_feround();
|
||||
|
||||
// init runtime / kerenl structure
|
||||
test_init(&rt_ctx, &cvk_ctx);
|
||||
|
||||
cvk_tg_shape_t bf16_tg_shape = {1, 2, 3, 4};
|
||||
{
|
||||
// test 1
|
||||
printf("test bf16 <%d,%d,%d,%d> to fp32\n", bf16_tg_shape.n, bf16_tg_shape.c, bf16_tg_shape.h,
|
||||
bf16_tg_shape.w);
|
||||
testbench(&rt_ctx, cvk_ctx, &bf16_tg_shape);
|
||||
printf("compare test bf16 to fp32 done\n");
|
||||
}
|
||||
|
||||
{
|
||||
// test 2
|
||||
bf16_tg_shape = {1, 20, 30, 40};
|
||||
printf("test bf16 <%d,%d,%d,%d> to fp32\n", bf16_tg_shape.n, bf16_tg_shape.c, bf16_tg_shape.h,
|
||||
bf16_tg_shape.w);
|
||||
testbench(&rt_ctx, cvk_ctx, &bf16_tg_shape);
|
||||
printf("compare test bf16 to fp32 done\n");
|
||||
}
|
||||
|
||||
bf16_tg_shape = {40, 40, 128, 256};
|
||||
for (int n = 1; n < (int)bf16_tg_shape.n; n += 10) {
|
||||
for (int c = 1; c < (int)bf16_tg_shape.c; c += 10) {
|
||||
for (int h = 1; h < (int)bf16_tg_shape.h; h += 100) {
|
||||
for (int w = 2; w < (int)bf16_tg_shape.w; w += 100) {
|
||||
printf("test bf16 <%d,%d,%d,%d> to fp32\n", bf16_tg_shape.n, bf16_tg_shape.c,
|
||||
bf16_tg_shape.h, bf16_tg_shape.w);
|
||||
cvk_tg_shape_t _bf16_tg_shape = {(uint32_t)n, (uint32_t)c, (uint32_t)h, (uint32_t)w};
|
||||
testbench(&rt_ctx, cvk_ctx, &_bf16_tg_shape);
|
||||
printf("compare test bf16 to fp32 done\n");
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// de-init runtime / kerenl structure
|
||||
test_exit(&rt_ctx, cvk_ctx);
|
||||
|
||||
// restore rounding mode
|
||||
restore_feround(round_mode);
|
||||
|
||||
return 0;
|
||||
}
|
||||
60
cvimath/tests/cvi1835/blas_cpu.cpp
Normal file
60
cvimath/tests/cvi1835/blas_cpu.cpp
Normal file
@ -0,0 +1,60 @@
|
||||
#include <cvimath_internal.h>
|
||||
|
||||
#include <string.h>
|
||||
#include <sys/time.h>
|
||||
#include <time.h>
|
||||
#include <cstdlib>
|
||||
#include <iostream>
|
||||
|
||||
int main() {
|
||||
srand(time(NULL));
|
||||
const uint32_t data_length = 512;
|
||||
const uint32_t data_num = 20000;
|
||||
uint8_t *db = new uint8_t[data_num * data_length];
|
||||
float *db_unit = new float[data_num];
|
||||
uint8_t *data = new uint8_t[data_length];
|
||||
float *buffer_f = new float[data_num];
|
||||
memset(buffer_f, 0, data_num * sizeof(float));
|
||||
|
||||
for (uint32_t i = 0; i < data_length; i++) {
|
||||
data[i] = rand() % 256;
|
||||
}
|
||||
for (uint32_t j = 0; j < data_num; j++) {
|
||||
for (uint32_t i = 0; i < data_length; i++) {
|
||||
db[j * data_length + i] = rand() % 256;
|
||||
}
|
||||
}
|
||||
cvm_gen_db_unit_length(db, db_unit, data_length, data_num);
|
||||
|
||||
const uint32_t k = 5;
|
||||
uint32_t k_index[k] = {0};
|
||||
float k_value[k] = {0};
|
||||
struct timeval t0, t1;
|
||||
gettimeofday(&t0, NULL);
|
||||
cvm_cpu_u8data_ip_match(data, db, db_unit, k_index, k_value, buffer_f, data_length, data_num, k);
|
||||
gettimeofday(&t1, NULL);
|
||||
unsigned long elapsed_tpu = ((t1.tv_sec - t0.tv_sec) * 1000000 + t1.tv_usec - t0.tv_usec);
|
||||
printf("Searching time uint8: %lu us\n", elapsed_tpu);
|
||||
printf("Result:\n");
|
||||
for (uint32_t i = 0; i < k; i++) {
|
||||
printf("[%u] %f\n", k_index[i], k_value[i]);
|
||||
}
|
||||
printf("\n");
|
||||
gettimeofday(&t0, NULL);
|
||||
cvm_cpu_i8data_ip_match((int8_t *)data, (int8_t *)db, db_unit, k_index, k_value, buffer_f,
|
||||
data_length, data_num, k);
|
||||
gettimeofday(&t1, NULL);
|
||||
elapsed_tpu = ((t1.tv_sec - t0.tv_sec) * 1000000 + t1.tv_usec - t0.tv_usec);
|
||||
printf("Searching time int8: %lu us\n", elapsed_tpu);
|
||||
printf("Result:\n");
|
||||
for (uint32_t i = 0; i < k; i++) {
|
||||
printf("[%u] %f\n", k_index[i], k_value[i]);
|
||||
}
|
||||
printf("\n");
|
||||
|
||||
delete[] data;
|
||||
delete[] db;
|
||||
delete[] db_unit;
|
||||
delete[] buffer_f;
|
||||
return 0;
|
||||
}
|
||||
134
cvimath/tests/cvi1835/blas_tpu.cpp
Normal file
134
cvimath/tests/cvi1835/blas_tpu.cpp
Normal file
@ -0,0 +1,134 @@
|
||||
#include <cvimath_internal.h>
|
||||
#include <cviruntime.h>
|
||||
#include <cviruntime_context.h>
|
||||
|
||||
#include <string.h>
|
||||
#include <sys/time.h>
|
||||
#include <time.h>
|
||||
#include <cmath>
|
||||
#include <cstdlib>
|
||||
#include <iostream>
|
||||
|
||||
void i8data_ip_match(CVI_RT_HANDLE ctx, cvk_context_t *cvk_ctx, uint64_t a_gaddr, int8_t *a_vaddr,
|
||||
uint64_t db_gaddr, float *unit_db_arr, uint32_t *k_index, float *k_value,
|
||||
uint64_t buffer_gemm_gaddr, uint8_t *buffer_gemm_vaddr, uint32_t *buffer_i32,
|
||||
float *buffer_f, CVI_RT_MEM gemm_device, const uint32_t data_length,
|
||||
const uint32_t data_num, const uint32_t k) {
|
||||
size_t *slice_num =
|
||||
cvm_gemm(cvk_ctx, a_gaddr, db_gaddr, buffer_gemm_gaddr, 1, data_length, data_num, CVK_FMT_I8);
|
||||
CVI_RT_Submit(cvk_ctx);
|
||||
CVI_RT_MemInvld(ctx, gemm_device);
|
||||
cvm_combin_gemm_i8(slice_num, buffer_gemm_vaddr, buffer_i32, 1, data_num);
|
||||
free(slice_num);
|
||||
// Get a length
|
||||
int32_t dot_result = 0;
|
||||
for (uint32_t i = 0; i < data_length; i++) {
|
||||
dot_result += ((short)a_vaddr[i] * a_vaddr[i]);
|
||||
}
|
||||
float unit_a = sqrt(dot_result);
|
||||
// Get a length end
|
||||
|
||||
for (uint32_t i = 0; i < data_num; i++) {
|
||||
buffer_f[i] = ((int32_t *)buffer_i32)[i] / (unit_a * unit_db_arr[i]);
|
||||
}
|
||||
// Get k result
|
||||
for (uint32_t i = 0; i < k; i++) {
|
||||
int largest = 0;
|
||||
for (uint32_t j = 0; j < data_num; j++) {
|
||||
if (buffer_f[j] > buffer_f[largest]) {
|
||||
largest = j;
|
||||
}
|
||||
}
|
||||
k_value[i] = buffer_f[largest];
|
||||
k_index[i] = largest;
|
||||
buffer_f[largest] = 0;
|
||||
}
|
||||
}
|
||||
|
||||
int main() {
|
||||
CVI_RT_HANDLE ctx;
|
||||
CVI_RT_Init(&ctx);
|
||||
cvk_context_t *bk_ctx = (cvk_context_t *)CVI_RT_RegisterKernel(ctx, 100000);
|
||||
printf("123\n");
|
||||
|
||||
const uint32_t data_length = 512;
|
||||
const uint32_t data_num = 1000;
|
||||
// Allocate memory
|
||||
CVI_RT_MEM bmmem_a = CVI_RT_MemAlloc(ctx, data_length);
|
||||
CVI_RT_MEM bmmem_db = CVI_RT_MemAlloc(ctx, data_length * data_num);
|
||||
CVI_RT_MEM bmmem_c = CVI_RT_MemAlloc(ctx, data_num * sizeof(uint32_t));
|
||||
|
||||
uint64_t gaddr_a = CVI_RT_MemGetPAddr(bmmem_a);
|
||||
uint64_t gaddr_db = CVI_RT_MemGetPAddr(bmmem_db);
|
||||
uint64_t gaddr_c = CVI_RT_MemGetPAddr(bmmem_c);
|
||||
|
||||
uint8_t *vaddr_a = CVI_RT_MemGetVAddr(bmmem_a);
|
||||
uint8_t *vaddr_db = CVI_RT_MemGetVAddr(bmmem_db);
|
||||
uint8_t *vaddr_c = CVI_RT_MemGetVAddr(bmmem_c);
|
||||
|
||||
int8_t *db_raw = new int8_t[data_length * data_num];
|
||||
float *db_unit = new float[data_num];
|
||||
uint32_t *buffer = new uint32_t[data_num];
|
||||
float *buffer_f = new float[data_num];
|
||||
|
||||
// Generate data
|
||||
srand(time(NULL));
|
||||
for (uint32_t i = 0; i < data_length; i++) {
|
||||
((int8_t *)vaddr_a)[i] = rand() % 10 - 10;
|
||||
}
|
||||
for (uint32_t j = 0; j < data_num; j++) {
|
||||
for (uint32_t i = 0; i < data_length; i++) {
|
||||
((int8_t *)db_raw)[j * data_length + i] = rand() % 10 - 10;
|
||||
}
|
||||
}
|
||||
|
||||
// Pass db feature to ion
|
||||
for (uint32_t n = 0; n < data_num * data_length; n++) {
|
||||
int i = n / data_num;
|
||||
int j = n % data_num;
|
||||
((int8_t *)vaddr_db)[n] = db_raw[data_length * j + i];
|
||||
}
|
||||
|
||||
// Calculate unit length for db feature
|
||||
cvm_gen_precached_i8_unit_length((int8_t *)db_raw, db_unit, data_length, data_num);
|
||||
CVI_RT_MemFlush(ctx, bmmem_a);
|
||||
CVI_RT_MemFlush(ctx, bmmem_db);
|
||||
|
||||
const uint32_t k = 5;
|
||||
uint32_t k_index[k] = {0};
|
||||
float k_value[k] = {0};
|
||||
struct timeval t0, t1;
|
||||
gettimeofday(&t0, NULL);
|
||||
i8data_ip_match(ctx, bk_ctx, gaddr_a, (int8_t *)vaddr_a, gaddr_db, db_unit, k_index, k_value,
|
||||
gaddr_c, vaddr_c, buffer, buffer_f, bmmem_c, data_length, data_num, k);
|
||||
gettimeofday(&t1, NULL);
|
||||
unsigned long elapsed_tpu = ((t1.tv_sec - t0.tv_sec) * 1000000 + t1.tv_usec - t0.tv_usec);
|
||||
printf("Searching time tpu int8: %lu us\n", elapsed_tpu);
|
||||
printf("Result:\n");
|
||||
for (uint32_t i = 0; i < k; i++) {
|
||||
printf("[%u] %f\n", k_index[i], k_value[i]);
|
||||
}
|
||||
printf("\n");
|
||||
|
||||
gettimeofday(&t0, NULL);
|
||||
cvm_cpu_i8data_ip_match((int8_t *)vaddr_a, (int8_t *)db_raw, db_unit, k_index, k_value, buffer_f,
|
||||
data_length, data_num, k);
|
||||
gettimeofday(&t1, NULL);
|
||||
elapsed_tpu = ((t1.tv_sec - t0.tv_sec) * 1000000 + t1.tv_usec - t0.tv_usec);
|
||||
printf("Searching time int8: %lu us\n", elapsed_tpu);
|
||||
printf("Result:\n");
|
||||
for (uint32_t i = 0; i < k; i++) {
|
||||
printf("[%u] %f\n", k_index[i], k_value[i]);
|
||||
}
|
||||
printf("\n");
|
||||
|
||||
delete[] db_unit;
|
||||
delete[] buffer;
|
||||
delete[] buffer_f;
|
||||
CVI_RT_MemFree(ctx, bmmem_a);
|
||||
CVI_RT_MemFree(ctx, bmmem_db);
|
||||
CVI_RT_MemFree(ctx, bmmem_c);
|
||||
CVI_RT_UnRegisterKernel(bk_ctx);
|
||||
CVI_RT_DeInit(ctx);
|
||||
return 0;
|
||||
}
|
||||
907
cvimath/tests/cvi1835/depthwise_reshape_same.cpp
Normal file
907
cvimath/tests/cvi1835/depthwise_reshape_same.cpp
Normal file
@ -0,0 +1,907 @@
|
||||
#include <cvimath_internal.h>
|
||||
#include <test_cvikernel_util.h>
|
||||
|
||||
#include <test_native_ref.h> // calc_dilute_hw
|
||||
|
||||
#define NPU_NUM (1 << 5)
|
||||
typedef cvk_tiu_depthwise_pt_convolution_param_t param_t;
|
||||
|
||||
int random_seed;
|
||||
static void print_pooling_param(param_t *p) {
|
||||
int in = p->ifmap->shape.n;
|
||||
int ic = p->ifmap->shape.c;
|
||||
int ih = p->ifmap->shape.h;
|
||||
int iw = p->ifmap->shape.w;
|
||||
int kh = p->weight->shape.h;
|
||||
int kw = p->weight->shape.w;
|
||||
|
||||
printf(" Pooling parameters:\n");
|
||||
// printf(" random_seed : %d \n", random_seed);
|
||||
printf(" ifmap = (%d, %d, %d, %d)\n", in, ic, ih, iw);
|
||||
printf(" opd0_sign = %d\n", p->ifmap->fmt == CVK_FMT_I8);
|
||||
printf(" weight = (%d, %d)\n", kh, kw);
|
||||
printf(" padding = (%d, %d, %d, %d)\n", p->pad_top, p->pad_bottom, p->pad_left, p->pad_right);
|
||||
printf(" stride = (%d, %d)\n", p->stride_h, p->stride_w);
|
||||
// printf(" ins0 = (%d, %d, %d, %d)\n",
|
||||
// p->ins_h, p->ins_last_h, p->ins_w, p->ins_last_w);
|
||||
// printf(" dilation = (%d, %d)\n",p->dilation_h, p->dilation_w);
|
||||
// printf(" rshift_bits = %d\n", p->rshift_bits);
|
||||
// printf(" relu_enable = %d\n", p->relu_enable);
|
||||
printf(" res0_sign = %d\n", p->ofmap->fmt == CVK_FMT_I8);
|
||||
}
|
||||
|
||||
static uint16_t *alloc_input(int ic, int ih, int iw, cvk_fmt_t ifmt) {
|
||||
uint64_t size = ic * ih * iw;
|
||||
uint16_t *data = (uint16_t *)new uint16_t[(size)];
|
||||
if (ifmt == CVK_FMT_BF16) {
|
||||
for (uint64_t i = 0; i < size; i++) {
|
||||
float val = 0;
|
||||
int RAND_MAX2 = RAND_MAX / 2; // 5 ~ -5
|
||||
val = (float)(rand() - RAND_MAX2) * 5 / (float)RAND_MAX;
|
||||
val = i;
|
||||
data[i] = convert_fp32_bf16(val);
|
||||
}
|
||||
} else {
|
||||
uint8_t *d = (uint8_t *)data;
|
||||
for (uint64_t i = 0; i < size; i++) {
|
||||
d[i] = i % 10 * (i % 2 ? -1 : 1);
|
||||
}
|
||||
}
|
||||
|
||||
return data;
|
||||
}
|
||||
|
||||
static uint16_t *alloc_weight(int ic, int kh, int kw, cvk_fmt_t fmt) {
|
||||
int size = ic * kh * kw;
|
||||
uint16_t *data = (uint16_t *)malloc(size * sizeof(uint16_t));
|
||||
// printf("weight size is %d\n", size * 2);
|
||||
if (fmt == CVK_FMT_BF16) {
|
||||
for (int i = 0; i < size; i++) {
|
||||
float val = 0;
|
||||
int RAND_MAX2 = RAND_MAX / 2; // 5 ~ -5
|
||||
val = (float)(rand() - RAND_MAX2) * 5 / (float)RAND_MAX;
|
||||
val = i;
|
||||
data[i] = convert_fp32_bf16(val);
|
||||
}
|
||||
} else {
|
||||
uint8_t *d = (uint8_t *)data;
|
||||
for (int i = 0; i < size; i++) {
|
||||
d[i] = i % 5 * (i % 2 ? -1 : 1);
|
||||
}
|
||||
}
|
||||
return data;
|
||||
}
|
||||
|
||||
static uint32_t *alloc_bias(int ic, cvk_fmt_t fmt) {
|
||||
int c = ic;
|
||||
uint64_t size = c;
|
||||
uint32_t *bias = (uint32_t *)malloc(sizeof(uint32_t) * c);
|
||||
if (fmt == CVK_FMT_BF16) {
|
||||
for (int i = 0; i < c; i++) {
|
||||
float val = 0;
|
||||
int RAND_MAX2 = RAND_MAX / 2; // 2 ~ -2
|
||||
val = (float)(rand() - RAND_MAX2) * 2 / (float)RAND_MAX;
|
||||
val = i;
|
||||
bias[i] = convert_fp32_hex(val);
|
||||
}
|
||||
} else {
|
||||
uint16_t *d = (uint16_t *)bias;
|
||||
for (uint64_t i = 0; i < size; i++) {
|
||||
d[i] = i % 0xf * (i % 2 ? -1 : 1);
|
||||
}
|
||||
}
|
||||
return bias;
|
||||
}
|
||||
|
||||
static uint16_t *alloc_output(int ic, int oh, int ow) {
|
||||
uint64_t size = ic * oh * ow;
|
||||
return (uint16_t *)new uint16_t[(size)];
|
||||
}
|
||||
|
||||
static inline void cvm_relu(uint16_t *buf, uint64_t size, cvk_fmt_t fmt) {
|
||||
if (fmt == CVK_FMT_BF16) {
|
||||
for (uint64_t i = 0; i < size; i++)
|
||||
if (convert_bf16_fp32(buf[i]) < 0) buf[i] = convert_fp32_bf16(0);
|
||||
} else {
|
||||
int8_t *buf_int8_t = (int8_t *)buf;
|
||||
for (uint64_t i = 0; i < size; i++) {
|
||||
if (buf_int8_t[i] < 0) buf_int8_t[i] = 0;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static int index_get(int h, int w1, int w2) { return h * w1 + w2; }
|
||||
|
||||
int native_pooling_avg_bf16(const uint16_t *i_fmap, const void *weight, const uint32_t *bias,
|
||||
uint16_t *o_fmap, int input_n, int input_c, int input_h, int input_w,
|
||||
int kh, int kw, int pad_h_t, int pad_h_b, int pad_w_l, int pad_w_r,
|
||||
int stride_h, int stride_w, int ins_h, int ins_w, int ins_h_last,
|
||||
int ins_w_last, int dh, int dw, int const_weight) {
|
||||
if (kh * kw <= 0) return BM_ERR_INVALID_ARGUMENT;
|
||||
|
||||
uint16_t avg_const_weight = *(uint16_t *)weight;
|
||||
uint16_t *weight_arr = (uint16_t *)weight;
|
||||
int h_after = calc_dilute_hw(input_h, ins_h, ins_h_last, pad_h_t, pad_h_b);
|
||||
int w_after = calc_dilute_hw(input_w, ins_w, ins_w_last, pad_w_l, pad_w_r);
|
||||
int d_kh = calc_dilute_hw(kh, dh - 1, 0, 0, 0);
|
||||
int d_kw = calc_dilute_hw(kw, dw - 1, 0, 0, 0);
|
||||
|
||||
int output_h = calc_output_hw(h_after, d_kh, stride_h);
|
||||
int output_w = calc_output_hw(w_after, d_kw, stride_w);
|
||||
// printf("output_h/output_w is %d/%d\n", output_h, output_w);
|
||||
float *avg_pooling_mac_a = (float *)malloc(d_kh * d_kw * sizeof(float));
|
||||
float *avg_pooling_mac_b = (float *)malloc(d_kh * d_kw * sizeof(float));
|
||||
|
||||
uint16_t *i_fmap_pad = NULL;
|
||||
uint16_t *i_kmap_pad = NULL;
|
||||
for (int n = 0; n < input_n; n++) {
|
||||
if (const_weight == 0) weight_arr = (uint16_t *)weight;
|
||||
|
||||
for (int c = 0; c < input_c; ++c) {
|
||||
fill_pad_fmap_bf16(i_fmap, &i_fmap_pad, 0, pad_w_l, pad_w_r, pad_h_t, pad_h_b, ins_h, ins_w,
|
||||
ins_h_last, ins_w_last, input_h, input_w);
|
||||
|
||||
// kernel_dilation(
|
||||
if (const_weight == 0)
|
||||
fill_pad_fmap_bf16((weight_arr), &i_kmap_pad, 0, 0, 0, 0,
|
||||
0, // no padding
|
||||
dh - 1, dw - 1, 0, 0, kh, kw);
|
||||
|
||||
float avg_pool_result;
|
||||
for (int ph = 0; ph < output_h; ++ph) {
|
||||
for (int pw = 0; pw < output_w; ++pw) {
|
||||
int hstart = ph * stride_h;
|
||||
int wstart = pw * stride_w;
|
||||
int pool_index = index_get(ph, output_w, pw);
|
||||
int mac_index = 0;
|
||||
|
||||
float r = 0;
|
||||
for (int h = 0; h < d_kh; h++) {
|
||||
for (int w = 0; w < d_kw; w++) {
|
||||
int index = index_get((hstart + h), w_after, (w + wstart));
|
||||
mac_index = h * d_kw + w;
|
||||
|
||||
avg_pooling_mac_a[mac_index] = convert_bf16_fp32(i_fmap_pad[index]);
|
||||
|
||||
avg_pooling_mac_b[h * d_kw + w] = const_weight
|
||||
? convert_bf16_fp32(avg_const_weight)
|
||||
: convert_bf16_fp32(i_kmap_pad[mac_index]);
|
||||
|
||||
#if 0
|
||||
printf ("ref[ni %u][ci %u][oh/ow %u/%u][kh/kw %u/%u] o[%d]"
|
||||
" %.1f * %.1f + %.1f = %.1f\n",
|
||||
n, c, ph, pw, h, w, pool_index,
|
||||
avg_pooling_mac_a[mac_index], avg_pooling_mac_b[h*d_kw+w],
|
||||
r, r + avg_pooling_mac_a[mac_index] * avg_pooling_mac_b[h*d_kw+w]);
|
||||
#endif
|
||||
|
||||
r += avg_pooling_mac_a[mac_index] * avg_pooling_mac_b[h * d_kw + w];
|
||||
}
|
||||
}
|
||||
|
||||
inner_float_product(avg_pooling_mac_a, avg_pooling_mac_b, d_kh * d_kw, &avg_pool_result);
|
||||
|
||||
if (bias) {
|
||||
avg_pool_result += convert_hex_fp32(bias[c]);
|
||||
}
|
||||
*(o_fmap + pool_index) = convert_fp32_bf16(avg_pool_result);
|
||||
}
|
||||
}
|
||||
weight_arr += kh * kw;
|
||||
i_fmap += input_w * input_h;
|
||||
o_fmap += output_w * output_h;
|
||||
}
|
||||
}
|
||||
free(i_fmap_pad);
|
||||
free(i_kmap_pad);
|
||||
free(avg_pooling_mac_a);
|
||||
free(avg_pooling_mac_b);
|
||||
|
||||
return BM_SUCCESS;
|
||||
}
|
||||
|
||||
static int get_fsz(cvk_fmt_t fmt) {
|
||||
assert(fmt == CVK_FMT_BF16 || fmt == CVK_FMT_I8 || fmt == CVK_FMT_U8);
|
||||
return fmt == CVK_FMT_BF16 ? 2 : 1;
|
||||
}
|
||||
|
||||
static void compare_results(param_t *p, uint16_t input[], uint16_t weight[], uint32_t bias[],
|
||||
uint16_t output[], uint16_t output_ref[], uint32_t org_o_shape_size,
|
||||
int is_valid_pack, int org_oc, int org_oh, int org_ow) {
|
||||
assert(input);
|
||||
assert(weight);
|
||||
(void)input;
|
||||
(void)weight;
|
||||
printf("bias at %p\n", bias);
|
||||
int f_sz = get_fsz(p->ofmap->fmt);
|
||||
|
||||
if (p->relu_enable) {
|
||||
cvm_relu(output_ref, org_o_shape_size, p->ofmap->fmt);
|
||||
}
|
||||
|
||||
int cmp_res = -1;
|
||||
if (!is_valid_pack) {
|
||||
// we reshape c with SAME mode padding with garbage
|
||||
// \is_valid_pack set to false means we skip garbage part
|
||||
int org_hw = org_oh * org_ow;
|
||||
int new_hw = p->ofmap->shape.h * p->ofmap->shape.w;
|
||||
int duplicated_c = p->ofmap->shape.c / org_oc;
|
||||
|
||||
assert(new_hw >= org_hw / duplicated_c);
|
||||
|
||||
int8_t *output_c = ((int8_t *)output);
|
||||
int8_t *output_ref_c = ((int8_t *)output_ref);
|
||||
for (int c = 0; c < org_oc; c++) {
|
||||
cmp_res =
|
||||
array_cmp_int8("Comparing results ...\n", output_c + c * duplicated_c * new_hw * f_sz,
|
||||
output_ref_c + org_hw * c * f_sz, org_hw * f_sz);
|
||||
|
||||
if (cmp_res != 0) {
|
||||
break;
|
||||
}
|
||||
// printf("compare [%d] pass, org len is %u, new len is %u\n", c,
|
||||
// org_hw, duplicated_c * new_hw);
|
||||
}
|
||||
} else {
|
||||
cmp_res = array_cmp_int8("Comparing results ...\n", (int8_t *)output_ref, (int8_t *)output,
|
||||
org_o_shape_size * f_sz);
|
||||
}
|
||||
if (cmp_res != 0) {
|
||||
printf("Comparison FAILED!!!\n");
|
||||
// print_pooling_param(p);
|
||||
exit(-1);
|
||||
}
|
||||
|
||||
delete[] output_ref;
|
||||
}
|
||||
|
||||
static int pooling_ih_ext(int ins_h, int ins_last_h, int pad_top, int pad_bottom, int ih) {
|
||||
int ins = ins_h;
|
||||
int ins_last = ins_last_h;
|
||||
int pad = pad_top + pad_bottom;
|
||||
return (ih - 1) * (ins + 1) + ins_last + 1 + pad;
|
||||
}
|
||||
|
||||
static int pooling_iw_ext(int ins_w, int ins_last_w, int pad_left, int pad_right, int iw) {
|
||||
int ins = ins_w;
|
||||
int ins_last = ins_last_w;
|
||||
int pad = pad_left + pad_right;
|
||||
return (iw - 1) * (ins + 1) + ins_last + 1 + pad;
|
||||
}
|
||||
|
||||
static int pooling_oh(int ins_h, int ins_last_h, int pad_top, int pad_bottom, int stride_h, int ih,
|
||||
int kh, int dh) {
|
||||
int ih_ext = pooling_ih_ext(ins_h, ins_last_h, pad_top, pad_bottom, ih);
|
||||
int d_h = (kh - 1) * dh + 1;
|
||||
return (ih_ext - d_h) / stride_h + 1;
|
||||
}
|
||||
|
||||
static int pooling_ow(int ins_w, int ins_last_w, int pad_left, int pad_right, int stride_w, int iw,
|
||||
int kw, int dw) {
|
||||
int iw_ext = pooling_iw_ext(ins_w, ins_last_w, pad_left, pad_right, iw);
|
||||
int d_w = (kw - 1) * dw + 1;
|
||||
return (iw_ext - d_w) / stride_w + 1;
|
||||
}
|
||||
|
||||
static void free_depthwise_struct(param_t *p) {
|
||||
free((void *)p->ofmap);
|
||||
free((void *)p->ifmap);
|
||||
free((void *)p->weight);
|
||||
if (p->bias) {
|
||||
free((void *)p->bias);
|
||||
}
|
||||
|
||||
p->ofmap = NULL;
|
||||
p->ifmap = NULL;
|
||||
p->weight = NULL;
|
||||
p->bias = NULL;
|
||||
}
|
||||
|
||||
static void free_depthwise_param(cvk_context_t *ctx, param_t *p) {
|
||||
if (p->ofmap) free_tl(ctx, p->ofmap);
|
||||
|
||||
if (p->weight) free_tl(ctx, p->weight);
|
||||
|
||||
if (p->bias) free_tl(ctx, p->bias);
|
||||
|
||||
if (p->ifmap) free_tl(ctx, p->ifmap);
|
||||
}
|
||||
|
||||
static param_t random_depthwise_param(cvk_context_t *ctx, int _ih, int _iw, int _stride_h,
|
||||
cvk_fmt_t _fmt) {
|
||||
param_t p;
|
||||
|
||||
// retry:
|
||||
random_seed = clock();
|
||||
srand(random_seed);
|
||||
int using_bias = rand() % 2;
|
||||
int n = rand() % 5 + 1;
|
||||
n = 1;
|
||||
int c = rand() % (3 * NPU_NUM) + 1;
|
||||
c = 3;
|
||||
int ih = rand() % 30 + 3;
|
||||
int iw = rand() % 30 + 6;
|
||||
int kh = rand() % 7 + 1;
|
||||
int kw = rand() % 7 + 1;
|
||||
|
||||
p.ins_h = rand() % kh;
|
||||
p.ins_w = rand() % kw;
|
||||
p.ins_last_h = rand() % kh;
|
||||
p.ins_last_w = rand() % kw;
|
||||
p.stride_h = rand() % kh + 1;
|
||||
p.stride_w = rand() % kw + 1;
|
||||
p.pad_top = rand() % kh;
|
||||
p.pad_bottom = rand() % kh;
|
||||
p.pad_left = rand() % kw;
|
||||
p.pad_right = rand() % kw;
|
||||
p.rshift_bits = rand() % 32;
|
||||
p.dilation_h = rand() % 4 + 1;
|
||||
p.dilation_w = rand() % 4 + 1;
|
||||
|
||||
// default
|
||||
cvk_fmt_t ifmt = CVK_FMT_BF16;
|
||||
cvk_fmt_t other_fmt = CVK_FMT_BF16;
|
||||
ih = 24;
|
||||
iw = 16;
|
||||
kw = 5;
|
||||
kh = 5;
|
||||
p.stride_h = 1;
|
||||
p.stride_w = 1;
|
||||
|
||||
p.rshift_bits = 0;
|
||||
|
||||
ih = _ih;
|
||||
p.stride_h = _stride_h;
|
||||
iw = _iw;
|
||||
ifmt = _fmt;
|
||||
other_fmt = CVK_FMT_I8;
|
||||
if (ifmt != CVK_FMT_BF16) {
|
||||
} else {
|
||||
other_fmt = CVK_FMT_BF16;
|
||||
}
|
||||
|
||||
p.pad_left = 2;
|
||||
p.pad_right = 2;
|
||||
p.pad_top = 0;
|
||||
p.pad_bottom = 0;
|
||||
// TODO: pad / ins / dilation
|
||||
p.ins_h = 0;
|
||||
p.ins_last_h = 0;
|
||||
p.ins_w = 0;
|
||||
p.ins_last_w = 0;
|
||||
p.dilation_h = 1;
|
||||
p.dilation_w = 1;
|
||||
|
||||
int oh =
|
||||
pooling_oh(p.ins_h, p.ins_last_h, p.pad_top, p.pad_bottom, p.stride_h, ih, kh, p.dilation_h);
|
||||
int ow =
|
||||
pooling_ow(p.ins_w, p.ins_last_w, p.pad_left, p.pad_right, p.stride_w, iw, kw, p.dilation_w);
|
||||
|
||||
cvk_tl_shape_t ofmap_shape;
|
||||
ofmap_shape.n = n;
|
||||
ofmap_shape.c = c;
|
||||
ofmap_shape.h = oh;
|
||||
ofmap_shape.w = ow;
|
||||
cvk_tl_shape_t ifmap_shape;
|
||||
ifmap_shape.n = n;
|
||||
ifmap_shape.c = c;
|
||||
ifmap_shape.h = ih;
|
||||
ifmap_shape.w = iw;
|
||||
cvk_tl_shape_t weight_shape;
|
||||
weight_shape.n = 1;
|
||||
weight_shape.c = c;
|
||||
weight_shape.h = kh;
|
||||
weight_shape.w = kw;
|
||||
cvk_tl_shape_t bias_shape;
|
||||
bias_shape.n = 2;
|
||||
bias_shape.c = c;
|
||||
bias_shape.h = 1;
|
||||
bias_shape.w = 1;
|
||||
p.relu_enable = rand() % 2;
|
||||
|
||||
// fake init for ref
|
||||
cvk_tl_t *bias, *weight, *ofmap, *ifmap;
|
||||
ifmap = (cvk_tl_t *)malloc(sizeof(cvk_tl_t));
|
||||
if (using_bias) {
|
||||
bias = (cvk_tl_t *)malloc(sizeof(cvk_tl_t));
|
||||
}
|
||||
weight = (cvk_tl_t *)malloc(sizeof(cvk_tl_t));
|
||||
ofmap = (cvk_tl_t *)malloc(sizeof(cvk_tl_t));
|
||||
|
||||
p.bias = NULL;
|
||||
if (using_bias) {
|
||||
bias->start_address = -1;
|
||||
bias->fmt = other_fmt;
|
||||
bias->shape = bias_shape;
|
||||
bias->stride = ctx->ops->tl_default_stride(ctx, bias->shape, other_fmt, /*eu_align*/ 0);
|
||||
p.bias = bias;
|
||||
}
|
||||
|
||||
weight->start_address = -1;
|
||||
weight->fmt = other_fmt;
|
||||
weight->shape = weight_shape;
|
||||
weight->stride = ctx->ops->tl_default_stride(ctx, weight->shape, other_fmt, /*align*/ 1);
|
||||
p.weight = weight;
|
||||
|
||||
ofmap->start_address = -1;
|
||||
ofmap->fmt = other_fmt;
|
||||
ofmap->shape = ofmap_shape;
|
||||
ofmap->stride = ctx->ops->tl_default_stride(ctx, ofmap->shape, other_fmt, /*align*/ 1);
|
||||
p.ofmap = ofmap;
|
||||
|
||||
ifmap->start_address = -1;
|
||||
ifmap->fmt = ifmt;
|
||||
ifmap->shape = ifmap_shape;
|
||||
ifmap->stride = ctx->ops->tl_default_stride(ctx, ifmap->shape, ifmt, /*align*/ 1);
|
||||
p.ifmap = ifmap;
|
||||
|
||||
#if 0
|
||||
int d_kh = calc_dilute_hw(kh, p.dilation_h - 1, 0, 0, 0);
|
||||
int d_kw = calc_dilute_hw(kw, p.dilation_w - 1, 0, 0, 0);
|
||||
if ((kh > pooling_ih_ext(&p, ih))
|
||||
|| (kw > pooling_iw_ext(&p, iw))
|
||||
|| (oh < d_kh)
|
||||
|| (ow < d_kw)
|
||||
|| (p.pad_top >= (1 << 4))
|
||||
|| (p.pad_bottom >= (1 << 4))
|
||||
|| (p.pad_left >= (1 << 4))
|
||||
|| (p.pad_right >= (1 << 4))
|
||||
|| !p.ofmap
|
||||
|| !p.ifmap
|
||||
|| !p.weight
|
||||
|| (using_bias && !p.bias)
|
||||
) {
|
||||
LOG(INFO) << "retry init_pooling_param";
|
||||
assert(0 && "it MUST valid param pass");
|
||||
goto retry;
|
||||
}
|
||||
#endif
|
||||
return p;
|
||||
}
|
||||
|
||||
static void put_bias_tensor(CVI_RT_HANDLE *ctx, cvk_context_t *bk_ctx, const cvk_tl_t *tl,
|
||||
uint32_t data[]) {
|
||||
int c = tl->shape.c;
|
||||
|
||||
uint16_t *hi_lo = (uint16_t *)malloc(sizeof(uint16_t) * 2 * c);
|
||||
if (tl->fmt == CVK_FMT_BF16) {
|
||||
for (int i = 0; i < c; i++) {
|
||||
hi_lo[i] = (data[i] >> 16) & 0xffff;
|
||||
hi_lo[i + c] = (data[i] & 0xffff);
|
||||
}
|
||||
} else {
|
||||
uint8_t *hi_lo_uint8_t = (uint8_t *)hi_lo;
|
||||
uint16_t *data_uint16_t = (uint16_t *)data;
|
||||
for (int i = 0; i < c; i++) {
|
||||
hi_lo_uint8_t[i] = data_uint16_t[i] & 0xff;
|
||||
hi_lo_uint8_t[i + c] = (data_uint16_t[i] >> 8) & 0xff;
|
||||
}
|
||||
}
|
||||
put_bf16_tensor_g2l(ctx, bk_ctx, tl, (uint16_t *)hi_lo, tl->fmt);
|
||||
|
||||
free(hi_lo);
|
||||
}
|
||||
|
||||
/**
|
||||
* \brief
|
||||
*/
|
||||
static int reshape_valid_output(cvk_context_t *bk_ctx, const cvk_tl_t *ofmap, int org_oc,
|
||||
int org_oh, int org_ow, cvk_tl_shape_t *tl_shape,
|
||||
cvk_tl_stride_t *tl_load_stride, cvk_tg_shape_t *tg_shape,
|
||||
cvk_tg_stride_t *tg_stride, cvk_fmt_t fmt) {
|
||||
assert(fmt == CVK_FMT_BF16 || fmt == CVK_FMT_I8 || fmt == CVK_FMT_U8);
|
||||
|
||||
// skip redundant one
|
||||
// store to sys and re-slice, maybe use next layer
|
||||
// sys->local skip redundant one
|
||||
|
||||
tg_shape->n = tl_shape->n = 1;
|
||||
tg_shape->c = tl_shape->c = org_oc;
|
||||
tg_shape->h = tl_shape->h = org_oh;
|
||||
tg_shape->w = tl_shape->w = org_ow;
|
||||
|
||||
cvk_tl_stride_t s = bk_ctx->ops->tl_default_stride(bk_ctx, *tl_shape, fmt, /*eu_align*/ 0);
|
||||
|
||||
tl_load_stride->n = s.n;
|
||||
tl_load_stride->c = s.c;
|
||||
tl_load_stride->h = s.h;
|
||||
tl_load_stride->w = s.w;
|
||||
|
||||
int duplicat_c = ofmap->shape.c / org_oc;
|
||||
tg_stride->n = tg_stride->c = duplicat_c * ofmap->shape.h * ofmap->shape.w * get_fsz(fmt);
|
||||
tg_stride->h = org_ow * get_fsz(fmt);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static bmerr_t init_ref(int ic, int ih, int iw, int kh, int kw, int pad_right, int pad_left,
|
||||
int stride_h, int stride_w, cvk_fmt_t fmt, uint16_t *input,
|
||||
uint16_t *weight, uint32_t *bias, uint16_t *output_ref) {
|
||||
bmerr_t ret;
|
||||
int in = 1;
|
||||
int ins_h = 0;
|
||||
int ins_w = 0;
|
||||
int ins_last_h = 0;
|
||||
int ins_last_w = 0;
|
||||
int dilation_h = 1;
|
||||
int dilation_w = 1;
|
||||
int pad_top = 0;
|
||||
int pad_bottom = 0;
|
||||
int rshift_bits = 0;
|
||||
|
||||
if (fmt == CVK_FMT_BF16) {
|
||||
ret = native_pooling_avg_bf16(input, weight, bias ? bias : NULL, output_ref, in, ic, ih, iw, kh,
|
||||
kw, pad_top, pad_bottom, pad_left, pad_right, stride_h, stride_w,
|
||||
ins_h, ins_w, ins_last_h, ins_last_w, dilation_h, dilation_w, 0);
|
||||
} else {
|
||||
int opd0_sign = fmt == CVK_FMT_I8;
|
||||
int res0_sign = true; //(ofmap->fmt == CVK_FMT_I8);
|
||||
ret = native_pooling_ave_int8((int8_t *)input, (int8_t *)weight, bias ? (int16_t *)bias : NULL,
|
||||
(int8_t *)output_ref, in, ic, ih, iw, kh, kw, pad_top, pad_bottom,
|
||||
pad_left, pad_right, stride_h, stride_w, ins_h, ins_w, ins_last_h,
|
||||
ins_last_w, opd0_sign, res0_sign, rshift_bits, 0);
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int test_depthwise(CVI_RT_HANDLE ctx, cvk_context_t *bk_ctx, int ic, int ih, int iw, int kh,
|
||||
int kw, int pad_right, int pad_left, int stride_h, int stride_w,
|
||||
bool has_bias, cvk_fmt_t ifmt) {
|
||||
// print_pooling_param(param);
|
||||
param_t param;
|
||||
param_t *p = ¶m;
|
||||
assert(ifmt == CVK_FMT_BF16 || ifmt == CVK_FMT_I8 || ifmt == CVK_FMT_U8);
|
||||
|
||||
int in = 1;
|
||||
// TODO: verify dialate > 1
|
||||
int dilation_h = 1;
|
||||
int dilation_w = 1;
|
||||
int relu_enable = 0;
|
||||
int rshift_bits = 0;
|
||||
|
||||
// TODO: verity ins_x
|
||||
int org_oh = pooling_oh(0, 0, 0, 0, stride_h, ih, kh, dilation_h);
|
||||
int org_ow = pooling_ow(0, 0, pad_left, pad_right, stride_w, iw, kw, dilation_w);
|
||||
int org_oc = ic;
|
||||
int org_o_shape_size = in * org_oc * org_oh * org_ow;
|
||||
uint16_t *output;
|
||||
cvk_tdma_g2l_tensor_copy_param_t p1;
|
||||
cvk_tdma_l2g_tensor_copy_param_t p2;
|
||||
// weight / ofmap not support U8 format
|
||||
cvk_fmt_t other_fmt = ifmt == CVK_FMT_BF16 ? CVK_FMT_BF16 : CVK_FMT_I8;
|
||||
|
||||
// alloc testbench, input/ref
|
||||
uint16_t *input = alloc_input(ic, ih, iw, ifmt);
|
||||
uint16_t *weight = alloc_weight(ic, kh, kw, ifmt);
|
||||
uint32_t *bias = NULL;
|
||||
if (has_bias) bias = alloc_bias(ic, ifmt);
|
||||
|
||||
uint16_t *output_ref = alloc_output(ic, org_oh, org_ow);
|
||||
|
||||
// init ref
|
||||
init_ref(ic, ih, iw, kh, kw, pad_right, pad_left, stride_h, stride_w, ifmt, input, weight, bias,
|
||||
output_ref);
|
||||
// assert(ret == BM_SUCCESS);
|
||||
|
||||
// init param
|
||||
// TODO: verify pad_top/pad_bottom
|
||||
// TODO: verify ins_h_x
|
||||
p->pad_left = pad_left;
|
||||
p->pad_right = pad_right;
|
||||
p->pad_top = 0;
|
||||
p->pad_bottom = 0;
|
||||
p->ins_h = 0;
|
||||
p->ins_last_h = 0;
|
||||
p->ins_w = 0;
|
||||
p->ins_last_w = 0;
|
||||
p->dilation_h = dilation_h;
|
||||
p->dilation_w = dilation_w;
|
||||
p->stride_h = stride_h;
|
||||
p->stride_w = stride_w;
|
||||
|
||||
p->relu_enable = relu_enable;
|
||||
p->rshift_bits = rshift_bits;
|
||||
p->bias = NULL;
|
||||
|
||||
// prepard load / input / weight / bias / output new shape / stride
|
||||
cvk_tl_shape_t tl_load_shape;
|
||||
cvk_tl_stride_t tl_load_stride;
|
||||
cvk_tg_shape_t tg_shape;
|
||||
cvk_tg_stride_t tg_stride;
|
||||
cvk_tl_shape_t tl_weight_shape;
|
||||
cvk_tl_shape_t tl_bias_shape;
|
||||
cvk_tl_shape_t tl_output_shape;
|
||||
cvk_tl_t *tmp_tl_load;
|
||||
cvk_tg_t *tmp_tg;
|
||||
|
||||
// get reshaped information
|
||||
int r = cvm_reshape_channel_same(bk_ctx, ic, ih, iw, kh, kw, pad_right, pad_left, stride_h,
|
||||
stride_w, &tl_load_shape, &tl_load_stride, &tg_shape, &tg_stride,
|
||||
&tl_weight_shape, &tl_bias_shape, &tl_output_shape, ifmt,
|
||||
/*align*/ 1);
|
||||
|
||||
if (r == -1) {
|
||||
printf("could not reshape it, 81\n");
|
||||
free_depthwise_param(bk_ctx, p);
|
||||
|
||||
delete[] input;
|
||||
free(weight);
|
||||
free(bias);
|
||||
return -1;
|
||||
}
|
||||
|
||||
// prepare input tg
|
||||
{
|
||||
cvk_tg_shape_t put_tg_shape;
|
||||
|
||||
put_tg_shape.n = in;
|
||||
put_tg_shape.c = ic;
|
||||
put_tg_shape.h = ih;
|
||||
put_tg_shape.w = iw;
|
||||
cvk_tg_t *put_tg = alloc_tg_bf16_gmem(&ctx, bk_ctx, put_tg_shape, ifmt);
|
||||
put_tg_bf16_gmem(&ctx, put_tg, (uint8_t *)input);
|
||||
free_tg_gmem(&ctx, put_tg);
|
||||
}
|
||||
|
||||
// prepare load input, put to tg and load back
|
||||
{
|
||||
tmp_tl_load = alloc_tl_bf16(bk_ctx, tl_load_shape, ifmt, /*eu_align*/ 0);
|
||||
assert(tmp_tl_load);
|
||||
|
||||
tmp_tg = alloc_tg_bf16_gmem(&ctx, bk_ctx, tg_shape, ifmt);
|
||||
tmp_tg->stride = tg_stride;
|
||||
|
||||
p1.src = tmp_tg;
|
||||
p1.dst = tmp_tl_load;
|
||||
|
||||
bk_ctx->ops->tdma_g2l_bf16_tensor_copy(bk_ctx, &p1);
|
||||
test_submit_comp(&ctx, bk_ctx);
|
||||
free_tg_gmem(&ctx, tmp_tg);
|
||||
|
||||
// fit for hw
|
||||
tmp_tl_load->stride =
|
||||
bk_ctx->ops->tl_default_stride(bk_ctx, tmp_tl_load->shape, ifmt, /*align*/ 1);
|
||||
p->ifmap = tmp_tl_load;
|
||||
}
|
||||
|
||||
// prepare load bias, put to tg and load back
|
||||
if (has_bias) {
|
||||
// bias must i8
|
||||
cvk_fmt_t bias_fmt = ifmt == CVK_FMT_BF16 ? CVK_FMT_BF16 : CVK_FMT_I8;
|
||||
p->bias = bk_ctx->ops->lmem_alloc_tensor(bk_ctx, tl_bias_shape, bias_fmt, 0);
|
||||
|
||||
// duplicate bias and replace old
|
||||
uint32_t *new_bias = cvm_reshape_channel_bias((uint8_t *)bias, tl_bias_shape.n, tl_bias_shape.c,
|
||||
tl_bias_shape.h, tl_bias_shape.w, org_oc, ifmt);
|
||||
|
||||
// free old one
|
||||
free(bias);
|
||||
bias = new_bias;
|
||||
put_bias_tensor(&ctx, bk_ctx, p->bias, bias);
|
||||
}
|
||||
|
||||
// prepare load weight, put to tg and load back
|
||||
{
|
||||
p->weight = bk_ctx->ops->lmem_alloc_tensor(bk_ctx, tl_weight_shape, other_fmt, /*align*/ 1);
|
||||
assert(p->weight);
|
||||
|
||||
// duplicate kernel with c
|
||||
uint8_t *new_weight =
|
||||
cvm_reshape_channel_weight((uint8_t *)weight, tl_weight_shape.n, tl_weight_shape.c,
|
||||
tl_weight_shape.h, tl_weight_shape.w, org_oc, ifmt);
|
||||
|
||||
// free old one
|
||||
free(weight);
|
||||
weight = (uint16_t *)new_weight;
|
||||
put_bf16_tensor_g2l(&ctx, bk_ctx, p->weight, (uint16_t *)weight, ifmt);
|
||||
}
|
||||
|
||||
// prepard ofmap
|
||||
{
|
||||
// we allocate 'same' mode shape
|
||||
p->ofmap = bk_ctx->ops->lmem_alloc_tensor(bk_ctx, tl_output_shape, other_fmt, /*align*/ 1);
|
||||
assert(p->ofmap);
|
||||
}
|
||||
|
||||
// printf("p->ifmap at %p, c is %d\n", p->ifmap, tmp_tl_load->shape.c);
|
||||
|
||||
// emit
|
||||
if (ifmt == CVK_FMT_BF16) {
|
||||
bk_ctx->ops->tiu_pt_depthwise_convolution(bk_ctx, p);
|
||||
} else {
|
||||
bk_ctx->ops->tiu_pt_depthwise_convolution(bk_ctx, p);
|
||||
}
|
||||
|
||||
// output = (uint16_t *)get_bf16_tensor_l2g(&ctx, bk_ctx, p->ofmap, ifmt);
|
||||
|
||||
// check with no pad if true
|
||||
int is_valid_pack = false;
|
||||
cvk_tl_shape_t r_ofmap_shape;
|
||||
cvk_tl_stride_t r_ofmap_stride;
|
||||
cvk_tg_shape_t r_tg_shape;
|
||||
cvk_tg_stride_t r_tg_stride;
|
||||
|
||||
reshape_valid_output(bk_ctx, p->ofmap, org_oc, org_oh, org_ow, &r_ofmap_shape, &r_ofmap_stride,
|
||||
&r_tg_shape, &r_tg_stride, ifmt);
|
||||
|
||||
p1.dst = p->ofmap;
|
||||
|
||||
if (is_valid_pack) {
|
||||
cvk_tg_shape_t dst_shape;
|
||||
dst_shape.n = p->ofmap->shape.n;
|
||||
dst_shape.c = p->ofmap->shape.c;
|
||||
dst_shape.h = p->ofmap->shape.h;
|
||||
dst_shape.w = p->ofmap->shape.w;
|
||||
cvk_tg_t *cvk_tg_tmp = alloc_tg_bf16_gmem(&ctx, bk_ctx, dst_shape, ifmt);
|
||||
|
||||
p2.src = p->ofmap;
|
||||
p2.dst = cvk_tg_tmp;
|
||||
|
||||
// store for later reshape
|
||||
bk_ctx->ops->tdma_l2g_bf16_tensor_copy(bk_ctx, &p2);
|
||||
test_submit_comp(&ctx, bk_ctx);
|
||||
|
||||
// free useless for later reallocate
|
||||
free_depthwise_param(bk_ctx, p);
|
||||
|
||||
p->ofmap = bk_ctx->ops->lmem_alloc_tensor(bk_ctx, r_ofmap_shape, ifmt,
|
||||
/*eu_align*/ 0);
|
||||
assert(p->ofmap);
|
||||
|
||||
cvk_tg_tmp->shape = r_tg_shape;
|
||||
cvk_tg_tmp->stride = r_tg_stride;
|
||||
|
||||
p1.src = cvk_tg_tmp;
|
||||
p1.dst = p->ofmap;
|
||||
bk_ctx->ops->tdma_g2l_bf16_tensor_copy(bk_ctx, &p1);
|
||||
free_tg_gmem(&ctx, cvk_tg_tmp);
|
||||
}
|
||||
|
||||
cvk_fmt_t ofmap_fmt = ifmt == CVK_FMT_BF16 ? CVK_FMT_BF16 : CVK_FMT_I8;
|
||||
output = (uint16_t *)get_bf16_tensor_l2g(&ctx, bk_ctx, p1.dst, ofmap_fmt);
|
||||
compare_results(p, input, weight, bias, output, output_ref, org_o_shape_size, is_valid_pack,
|
||||
org_oc, org_oh, org_ow);
|
||||
|
||||
// free resource
|
||||
if (is_valid_pack) {
|
||||
free_tl(bk_ctx, p->ofmap);
|
||||
} else {
|
||||
free_depthwise_param(bk_ctx, p);
|
||||
}
|
||||
|
||||
delete[] input;
|
||||
free(weight);
|
||||
free(bias);
|
||||
free(output);
|
||||
|
||||
return 1;
|
||||
}
|
||||
|
||||
static void init_input(param_t *p, int *ic, int *ih, int *iw, int *kh, int *kw, int *pad_right,
|
||||
int *pad_left) {
|
||||
*ic = p->ifmap->shape.c;
|
||||
*ih = p->ifmap->shape.h;
|
||||
*iw = p->ifmap->shape.w;
|
||||
*kh = p->weight->shape.h;
|
||||
*kw = p->weight->shape.w;
|
||||
*pad_right = p->pad_right;
|
||||
*pad_left = p->pad_left;
|
||||
}
|
||||
|
||||
static int test_depthwise_pooling(CVI_RT_HANDLE *ctx, cvk_context_t *bk_ctx) {
|
||||
int loop = 1;
|
||||
int test_finished_num = 0;
|
||||
int ihs[] = {24, 96, 120, 480, 0};
|
||||
int iws[] = {16, 17, 19, 23, 128, 256, 0};
|
||||
int stride_hs[] = {3, 4, 0};
|
||||
cvk_fmt_t formats[] = {CVK_FMT_I8, CVK_FMT_U8, CVK_FMT_BF16, CVK_FMT_F32};
|
||||
int ic, ih, iw, kh, kw, pad_right, pad_left;
|
||||
cvk_fmt_t ifmt;
|
||||
param_t param;
|
||||
assert(print_pooling_param);
|
||||
|
||||
ifmt = CVK_FMT_U8;
|
||||
param = random_depthwise_param(bk_ctx, 210, 640, 1, ifmt);
|
||||
init_input(¶m, &ic, &ih, &iw, &kh, &kw, &pad_right, &pad_left);
|
||||
test_finished_num += test_depthwise(*ctx, bk_ctx, ic, ih, iw, kh, kw, pad_right, pad_left,
|
||||
param.stride_h, param.stride_w, param.bias, ifmt);
|
||||
print_pooling_param(¶m);
|
||||
free_depthwise_struct(¶m);
|
||||
|
||||
#if 1
|
||||
param = random_depthwise_param(bk_ctx, 36, 11, 3, ifmt);
|
||||
init_input(¶m, &ic, &ih, &iw, &kh, &kw, &pad_right, &pad_left);
|
||||
test_finished_num += test_depthwise(*ctx, bk_ctx, ic, ih, iw, kh, kw, pad_right, pad_left,
|
||||
param.stride_h, param.stride_w, param.bias, ifmt);
|
||||
print_pooling_param(¶m);
|
||||
free_depthwise_struct(¶m);
|
||||
|
||||
ifmt = CVK_FMT_U8;
|
||||
param = random_depthwise_param(bk_ctx, 24, 29, 3, ifmt);
|
||||
init_input(¶m, &ic, &ih, &iw, &kh, &kw, &pad_right, &pad_left);
|
||||
free_depthwise_struct(¶m);
|
||||
test_finished_num += test_depthwise(*ctx, bk_ctx, ic, ih, iw, kh, kw, pad_right, pad_left,
|
||||
param.stride_h, param.stride_w, param.bias, ifmt);
|
||||
|
||||
ifmt = CVK_FMT_BF16;
|
||||
param = random_depthwise_param(bk_ctx, 480, 53, 3, ifmt);
|
||||
init_input(¶m, &ic, &ih, &iw, &kh, &kw, &pad_right, &pad_left);
|
||||
free_depthwise_struct(¶m);
|
||||
test_finished_num += test_depthwise(*ctx, bk_ctx, ic, ih, iw, kh, kw, pad_right, pad_left,
|
||||
param.stride_h, param.stride_w, param.bias, ifmt);
|
||||
|
||||
ifmt = CVK_FMT_I8;
|
||||
param = random_depthwise_param(bk_ctx, 480, 61, 3, ifmt);
|
||||
init_input(¶m, &ic, &ih, &iw, &kh, &kw, &pad_right, &pad_left);
|
||||
free_depthwise_struct(¶m);
|
||||
test_finished_num += test_depthwise(*ctx, bk_ctx, ic, ih, iw, kh, kw, pad_right, pad_left,
|
||||
param.stride_h, param.stride_w, param.bias, ifmt);
|
||||
|
||||
ifmt = CVK_FMT_U8;
|
||||
param = random_depthwise_param(bk_ctx, 24, 17, 3, ifmt);
|
||||
init_input(¶m, &ic, &ih, &iw, &kh, &kw, &pad_right, &pad_left);
|
||||
free_depthwise_struct(¶m);
|
||||
test_finished_num += test_depthwise(*ctx, bk_ctx, ic, ih, iw, kh, kw, pad_right, pad_left,
|
||||
param.stride_h, param.stride_w, param.bias, ifmt);
|
||||
|
||||
ifmt = CVK_FMT_BF16;
|
||||
param = random_depthwise_param(bk_ctx, 48, 65, 3, ifmt);
|
||||
init_input(¶m, &ic, &ih, &iw, &kh, &kw, &pad_right, &pad_left);
|
||||
free_depthwise_struct(¶m);
|
||||
test_finished_num += test_depthwise(*ctx, bk_ctx, ic, ih, iw, kh, kw, pad_right, pad_left,
|
||||
param.stride_h, param.stride_w, param.bias, ifmt);
|
||||
|
||||
ifmt = CVK_FMT_I8;
|
||||
param = random_depthwise_param(bk_ctx, 48, 63, 3, ifmt);
|
||||
init_input(¶m, &ic, &ih, &iw, &kh, &kw, &pad_right, &pad_left);
|
||||
free_depthwise_struct(¶m);
|
||||
test_finished_num += test_depthwise(*ctx, bk_ctx, ic, ih, iw, kh, kw, pad_right, pad_left,
|
||||
param.stride_h, param.stride_w, param.bias, ifmt);
|
||||
#endif
|
||||
|
||||
for (int i = 0; i < loop; i++) {
|
||||
for (int i = 0; ihs[i] != 0; i++) {
|
||||
for (int j = 0; iws[j] != 0; j++) {
|
||||
for (int k = 0; stride_hs[k] != 0; k++) {
|
||||
for (int l = 0; formats[l] != 0; l++) {
|
||||
continue;
|
||||
if (ihs[i] >= 480 && formats[l] == CVK_FMT_BF16) {
|
||||
continue;
|
||||
}
|
||||
param = random_depthwise_param(bk_ctx, ihs[i], iws[j], stride_hs[k], formats[l]);
|
||||
ifmt = formats[l];
|
||||
printf("test[%d] ih/iw/sh/fmt is {%d, %d, %d, %d}\n", test_finished_num, ihs[i], iws[j],
|
||||
stride_hs[k], formats[l]);
|
||||
|
||||
init_input(¶m, &ic, &ih, &iw, &kh, &kw, &pad_right, &pad_left);
|
||||
free_depthwise_struct(¶m);
|
||||
int r = test_depthwise(*ctx, bk_ctx, ic, ih, iw, kh, kw, pad_right, pad_left,
|
||||
param.stride_h, param.stride_w, param.bias, ifmt);
|
||||
test_finished_num += r;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
printf("Test finished %u\n", test_finished_num);
|
||||
|
||||
return test_finished_num;
|
||||
}
|
||||
|
||||
int main() {
|
||||
CVI_RT_HANDLE ctx;
|
||||
cvk_context_t *bk_ctx;
|
||||
|
||||
test_init(&ctx, &bk_ctx);
|
||||
|
||||
int round_mode;
|
||||
round_mode = set_store_feround();
|
||||
int ret = test_depthwise_pooling(&ctx, bk_ctx);
|
||||
assert(ret >= 0);
|
||||
(void)ret;
|
||||
printf("pass\n");
|
||||
|
||||
test_exit(&ctx, bk_ctx);
|
||||
restore_feround(round_mode);
|
||||
return 0;
|
||||
}
|
||||
127
cvimath/tests/cvi1835/fp32_bf16.cpp
Normal file
127
cvimath/tests/cvi1835/fp32_bf16.cpp
Normal file
@ -0,0 +1,127 @@
|
||||
#include <cvimath_internal.h>
|
||||
#include <sys/time.h>
|
||||
#include <test_cvikernel_util.h>
|
||||
|
||||
typedef cvk_tdma_g2g_tensor_copy_param_t param_t;
|
||||
|
||||
static void __print_param(const char *tag, FILE *f, param_t *p) {
|
||||
fprintf(f, "%s: (%u, %u, %u, %u) => (%u, %u, %u, %u)\n", tag, p->src->shape.n, p->src->shape.c,
|
||||
p->src->shape.h, p->src->shape.w, p->dst->shape.n, p->dst->shape.c, p->dst->shape.h,
|
||||
p->dst->shape.w);
|
||||
}
|
||||
|
||||
#define print_param(f, p) __print_param(__func__, f, p)
|
||||
|
||||
typedef struct {
|
||||
cvk_tg_shape_t src_shape;
|
||||
cvk_tg_shape_t dst_shape;
|
||||
} case_t;
|
||||
|
||||
static cvk_fmt_type input_fmt[] = {
|
||||
{CVK_FMT_BF16, CVK_FMT_BF16},
|
||||
};
|
||||
|
||||
static case_t g_cases[] = {
|
||||
{
|
||||
{1, 3, 3, 2},
|
||||
{1, 3, 3, 2},
|
||||
},
|
||||
{
|
||||
{4, 3, 3, 2},
|
||||
{4, 3, 3, 2},
|
||||
},
|
||||
|
||||
//{
|
||||
// // YOLOv2 concat layer
|
||||
// {1, 256, 19, 19},
|
||||
// {1, 256, 19, 19},
|
||||
//},
|
||||
{
|
||||
{1, 256, 19, 20},
|
||||
{1, 256, 19, 20},
|
||||
},
|
||||
{
|
||||
{1, 1280, 3, 4},
|
||||
{1, 1280, 3, 4},
|
||||
},
|
||||
{
|
||||
{1, 159 * 89, 36, 4},
|
||||
{1, 159 * 89, 36, 4},
|
||||
},
|
||||
{
|
||||
{159, 89, 36, 4},
|
||||
{159, 89, 36, 4},
|
||||
},
|
||||
};
|
||||
|
||||
static void test_param_g2g(CVI_RT_HANDLE *ctx, cvk_context_t *bmk, param_t *p) {
|
||||
print_param(stderr, p);
|
||||
|
||||
// 2 means source is fp32, occupy 2 * bf16 size
|
||||
uint64_t size = p->src->shape.n * p->src->shape.c * p->src->shape.h * p->src->shape.w / 2;
|
||||
uint32_t *src_data = new uint32_t[size];
|
||||
for (uint64_t i = 0; i < size; i++) {
|
||||
src_data[i] = ((0x1234 + i) << 16) + 0x5678 + i;
|
||||
// printf("src[%lu] 0x%x\n", i, src_data[i]);
|
||||
}
|
||||
|
||||
test_put_tg_mem_comp(ctx, p->src, (uint8_t *)src_data);
|
||||
|
||||
cvm_s2s_fp32_bf16(bmk, p->src->start_address, p->src->shape, p->dst->start_address, p->dst->shape,
|
||||
CVK_FMT_BF16);
|
||||
|
||||
long elapsed;
|
||||
struct timeval t0, t1;
|
||||
gettimeofday(&t0, NULL);
|
||||
|
||||
test_submit_comp(ctx, bmk);
|
||||
|
||||
gettimeofday(&t1, NULL);
|
||||
elapsed = (t1.tv_sec - t0.tv_sec) * 1000000 + t1.tv_usec - t0.tv_usec;
|
||||
printf("kernel takes %ld us\n", elapsed);
|
||||
|
||||
uint16_t *dst_data = (uint16_t *)test_get_tg_mem_comp(ctx, p->dst);
|
||||
|
||||
for (uint64_t i = 0; i < size; i++) {
|
||||
uint16_t _src_data = (src_data[i] >> 16) & 0xffff;
|
||||
if (dst_data[i] != _src_data) {
|
||||
fprintf(stderr, "comparing failed at dst[%lu], got %x, exp %x\n", i, dst_data[i], _src_data);
|
||||
exit(-1);
|
||||
}
|
||||
}
|
||||
|
||||
delete[] src_data;
|
||||
free(dst_data);
|
||||
}
|
||||
|
||||
static void destroy_param_g2g(CVI_RT_HANDLE *ctx, param_t *p) {
|
||||
test_free_tg_mem_comp(ctx, p->src);
|
||||
test_free_tg_mem_comp(ctx, p->dst);
|
||||
}
|
||||
|
||||
static void test_one_case(CVI_RT_HANDLE *ctx, cvk_context_t *bmk, case_t *c) {
|
||||
uint32_t nr_fmt = sizeof(input_fmt) / sizeof(input_fmt[0]);
|
||||
for (uint32_t i = 0; i < nr_fmt; i++) {
|
||||
param_t p;
|
||||
cvk_tg_t *src, *dst;
|
||||
src = test_alloc_tg_mem_comp(ctx, bmk, c->src_shape, input_fmt[i].src_fmt);
|
||||
dst = test_alloc_tg_mem_comp(ctx, bmk, c->dst_shape, input_fmt[i].dst_fmt);
|
||||
p.src = src;
|
||||
p.dst = dst;
|
||||
test_param_g2g(ctx, bmk, &p);
|
||||
destroy_param_g2g(ctx, &p);
|
||||
}
|
||||
}
|
||||
|
||||
int main() {
|
||||
CVI_RT_HANDLE ctx;
|
||||
cvk_context_t *bmk;
|
||||
|
||||
test_init(&ctx, &bmk);
|
||||
|
||||
uint32_t nr_cases = sizeof(g_cases) / sizeof(g_cases[0]);
|
||||
for (uint32_t i = 0; i < nr_cases; i++) test_one_case(&ctx, bmk, &g_cases[i]);
|
||||
|
||||
test_exit(&ctx, bmk);
|
||||
return 0;
|
||||
}
|
||||
845
cvimath/tests/cvi1835/gemm.cpp
Normal file
845
cvimath/tests/cvi1835/gemm.cpp
Normal file
@ -0,0 +1,845 @@
|
||||
#include <cvimath_internal.h>
|
||||
#include <sys/time.h>
|
||||
#include <test_cvikernel_util.h>
|
||||
#include <time.h> // clock
|
||||
|
||||
typedef cvk_tiu_matrix_multiplication_param_t param_t;
|
||||
int random_seed;
|
||||
|
||||
static uint64_t matrix_size(const cvk_ml_t *ml) {
|
||||
uint64_t row = ml->shape.n;
|
||||
uint64_t col = ml->shape.col;
|
||||
return row * col;
|
||||
}
|
||||
|
||||
static uint64_t res_size(param_t *p) { return matrix_size(p->res); }
|
||||
|
||||
static uint16_t *alloc_left(param_t *p) {
|
||||
uint64_t size = matrix_size(p->left);
|
||||
uint16_t *buf = new uint16_t[size];
|
||||
for (uint64_t i = 0; i < size; i++) {
|
||||
buf[i] = convert_fp32_bf16(i);
|
||||
}
|
||||
|
||||
return buf;
|
||||
}
|
||||
|
||||
static uint16_t *alloc_right(param_t *p) {
|
||||
uint64_t size = matrix_size(p->right);
|
||||
uint16_t *buf = new uint16_t[size];
|
||||
for (uint64_t i = 0; i < size; i++) {
|
||||
float val = 0.01;
|
||||
buf[i] = convert_fp32_bf16(i);
|
||||
val += 0.01;
|
||||
}
|
||||
return buf;
|
||||
}
|
||||
|
||||
static uint32_t *alloc_bias(param_t *p) {
|
||||
if (!p->bias) return NULL;
|
||||
|
||||
uint64_t size = matrix_size(p->bias);
|
||||
uint32_t *buf = new uint32_t[size];
|
||||
for (uint64_t i = 0; i < size; i++) {
|
||||
buf[i] = convert_fp32_hex(i);
|
||||
}
|
||||
return buf;
|
||||
}
|
||||
|
||||
static uint32_t *alloc_res(param_t *p) {
|
||||
uint64_t size = res_size(p);
|
||||
uint32_t *buf = new uint32_t[size];
|
||||
for (uint64_t i = 0; i < size; i++) {
|
||||
buf[i] = convert_fp32_bf16(i);
|
||||
}
|
||||
return buf;
|
||||
}
|
||||
|
||||
static inline void cvm_relu(float *buf, uint64_t size) {
|
||||
for (uint64_t i = 0; i < size; i++)
|
||||
if (buf[i] < 0) buf[i] = 0;
|
||||
}
|
||||
|
||||
static void matrix_mac_ref(param_t *p, uint16_t left[], uint16_t right[], uint32_t bias[],
|
||||
uint32_t res[]) {
|
||||
uint64_t size = res_size(p);
|
||||
uint32_t left_col = p->left->shape.col;
|
||||
uint32_t right_col = p->right->shape.col;
|
||||
uint32_t res_row = p->left->shape.n;
|
||||
uint32_t res_col = p->res->shape.col;
|
||||
uint32_t left_c = p->left->shape.c;
|
||||
uint32_t left_w = p->left->shape.w;
|
||||
|
||||
float *tmp_res = new float[size];
|
||||
if (p->add_result) {
|
||||
for (uint32_t i = 0; i < res_row * res_col; i++) tmp_res[i] = convert_bf16_fp32(res[i]);
|
||||
} else {
|
||||
for (uint32_t i = 0; i < res_row * res_col; i++) tmp_res[i] = 0;
|
||||
}
|
||||
for (uint32_t row = 0; row < res_row; row++) {
|
||||
for (uint32_t col = 0; col < res_col; col++) {
|
||||
for (uint32_t wi = 0; wi < left_w; wi++) {
|
||||
for (uint32_t ci = 0; ci < left_c; ci++) {
|
||||
if ((wi + (ci * left_w)) >= left_col) continue;
|
||||
uint32_t li = row * left_col + left_w * ci + wi;
|
||||
uint32_t ri = (ci * left_w + wi) * right_col + col;
|
||||
|
||||
float l = convert_bf16_fp32(left[li]);
|
||||
float r = convert_bf16_fp32(right[ri]);
|
||||
tmp_res[row * res_col + col] += l * r;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (p->bias) {
|
||||
for (uint32_t row = 0; row < res_row; row++) {
|
||||
for (uint32_t col = 0; col < res_col; col++) {
|
||||
float b = convert_hex_fp32(bias[col]);
|
||||
tmp_res[row * res_col + col] += b;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (p->relu_enable) cvm_relu(tmp_res, size);
|
||||
|
||||
for (uint64_t i = 0; i < size; i++) {
|
||||
res[i] = convert_fp32_bf16(tmp_res[i]);
|
||||
}
|
||||
delete[] tmp_res;
|
||||
}
|
||||
|
||||
static void put_bias(CVI_RT_HANDLE *ctx, cvk_context_t *bk_ctx, const cvk_ml_t *ml,
|
||||
uint32_t data[]) {
|
||||
uint64_t size = ml->shape.col;
|
||||
|
||||
uint16_t *tmp = new uint16_t[size * 2];
|
||||
for (uint64_t i = 0; i < size; i++) {
|
||||
tmp[i] = (data[i] >> 16) & 0xFFFF;
|
||||
tmp[i + size] = (data[i] & 0xFFFF);
|
||||
}
|
||||
|
||||
test_put_matrix_g2l_comp(ctx, bk_ctx, ml, (uint8_t *)tmp);
|
||||
|
||||
delete[] tmp;
|
||||
}
|
||||
|
||||
static void put_res(CVI_RT_HANDLE *ctx, cvk_context_t *bk_ctx, const cvk_ml_t *ml,
|
||||
uint32_t data[]) {
|
||||
uint64_t size = ml->shape.n * ml->shape.col;
|
||||
|
||||
uint16_t *tmp = new uint16_t[size];
|
||||
for (uint64_t i = 0; i < size; i++) {
|
||||
tmp[i] = (data[i] & 0xFFFF);
|
||||
}
|
||||
|
||||
test_put_matrix_g2l_comp(ctx, bk_ctx, ml, (uint8_t *)tmp);
|
||||
|
||||
delete[] tmp;
|
||||
}
|
||||
|
||||
static uint32_t *get_res(CVI_RT_HANDLE *ctx, cvk_mg_t *mg, param_t *p) {
|
||||
uint64_t size = res_size(p);
|
||||
uint32_t *res = new uint32_t[size];
|
||||
|
||||
uint16_t *tmp = (uint16_t *)test_get_mg_mem_comp(ctx, mg);
|
||||
for (uint64_t i = 0; i < size; i++) res[i] = tmp[i];
|
||||
|
||||
delete[] tmp;
|
||||
return res;
|
||||
}
|
||||
|
||||
static inline cvk_mg_t *put_bf16_matrix_g(CVI_RT_HANDLE *ctx, cvk_context_t *bk_ctx,
|
||||
const cvk_ml_t *ml, uint8_t data[],
|
||||
cvk_fmt_t mg_data_format) {
|
||||
cvk_mg_shape_t s;
|
||||
s.row = ml->shape.n;
|
||||
s.col = ml->shape.col;
|
||||
cvk_mg_t *mg = test_alloc_mg_mem_comp(ctx, s, mg_data_format);
|
||||
|
||||
test_put_mg_mem_comp(ctx, mg, data);
|
||||
test_submit_comp(ctx, bk_ctx);
|
||||
|
||||
return mg;
|
||||
}
|
||||
|
||||
static void test_param(CVI_RT_HANDLE *ctx, cvk_context_t *bk_ctx, param_t *p) {
|
||||
uint16_t *left = alloc_left(p);
|
||||
uint16_t *right = alloc_right(p);
|
||||
uint32_t *bias = alloc_bias(p);
|
||||
uint32_t *ref = alloc_res(p);
|
||||
|
||||
cvk_mg_t *left_mg = put_bf16_matrix_g(ctx, bk_ctx, p->left, (uint8_t *)left, CVK_FMT_BF16);
|
||||
cvk_mg_t *right_mg = put_bf16_matrix_g(ctx, bk_ctx, p->right, (uint8_t *)right, CVK_FMT_BF16);
|
||||
cvk_mg_shape_t s;
|
||||
s.row = p->res->shape.n;
|
||||
s.col = p->res->shape.col;
|
||||
cvk_mg_t *result_mg = test_alloc_mg_mem_comp(ctx, s, CVK_FMT_BF16);
|
||||
|
||||
if (bias) put_bias(ctx, bk_ctx, p->bias, bias);
|
||||
if (p->add_result) put_res(ctx, bk_ctx, p->res, ref);
|
||||
|
||||
printf("start\n");
|
||||
size_t *slice_num =
|
||||
cvm_gemm(bk_ctx, left_mg->start_address, right_mg->start_address, result_mg->start_address,
|
||||
p->left->shape.n, p->left->shape.col, p->res->shape.col, CVK_FMT_BF16);
|
||||
free(slice_num); // no need use in bf16
|
||||
test_submit_comp(ctx, bk_ctx);
|
||||
|
||||
uint32_t *res = get_res(ctx, result_mg, p);
|
||||
matrix_mac_ref(p, left, right, bias, ref);
|
||||
|
||||
uint64_t size = res_size(p);
|
||||
for (uint64_t i = 0; i < size; i++) {
|
||||
if (res[i] != ref[i]) {
|
||||
uint16_t _res = res[i] & 0xffff;
|
||||
uint16_t _ref = ref[i] & 0xffff;
|
||||
fprintf(stderr, "comparing failed at out[%lu], got %f(0x%x), exp %f(0x%x)\n", i,
|
||||
convert_bf16_fp32(_res), res[i], convert_bf16_fp32(_ref), ref[i]);
|
||||
fprintf(stderr, "random_seed=%d\n", random_seed);
|
||||
exit(-1);
|
||||
}
|
||||
}
|
||||
|
||||
test_free_mg_mem_comp(ctx, left_mg);
|
||||
test_free_mg_mem_comp(ctx, right_mg);
|
||||
test_free_mg_mem_comp(ctx, result_mg);
|
||||
|
||||
delete[] left;
|
||||
delete[] right;
|
||||
delete[] bias;
|
||||
delete[] res;
|
||||
}
|
||||
|
||||
static void destroy_param(cvk_context_t *bk_ctx, param_t *p) {
|
||||
if (p->bias) bk_ctx->ops->lmem_free_matrix(bk_ctx, p->bias);
|
||||
if (p->res) bk_ctx->ops->lmem_free_matrix(bk_ctx, p->res);
|
||||
if (p->right) bk_ctx->ops->lmem_free_matrix(bk_ctx, p->right);
|
||||
if (p->left) bk_ctx->ops->lmem_free_matrix(bk_ctx, p->left);
|
||||
}
|
||||
|
||||
static cvk_ml_t *alloc_param_res(cvk_context_t *bk_ctx, param_t *p) {
|
||||
cvk_ml_shape_t s;
|
||||
|
||||
s.n = p->left->shape.n;
|
||||
s.c = p->right->shape.c;
|
||||
s.w = p->right->shape.w;
|
||||
s.col = p->right->shape.col;
|
||||
cvk_fmt_t fmt = CVK_FMT_BF16;
|
||||
cvk_ml_shape_t fake;
|
||||
fake.n = 1;
|
||||
fake.c = 1;
|
||||
fake.w = 1;
|
||||
fake.col = 1;
|
||||
cvk_ml_t *t = bk_ctx->ops->lmem_alloc_matrix(bk_ctx, fake, fmt, 1);
|
||||
t->shape = s;
|
||||
return t;
|
||||
}
|
||||
|
||||
static param_t param_0(cvk_context_t *bk_ctx) {
|
||||
retry:
|
||||
random_seed = clock();
|
||||
srand(random_seed);
|
||||
|
||||
param_t p;
|
||||
memset(&p, 0, sizeof(p));
|
||||
p.lshift_bits = 0;
|
||||
p.rshift_bits = 0;
|
||||
p.res_is_int8 = true;
|
||||
p.relu_enable = rand() % 2;
|
||||
p.relu_enable = 0;
|
||||
p.add_result = 0; /*bf16 HW does not support add_result*/
|
||||
p.ps32_mode = 0;
|
||||
|
||||
uint32_t left_row = rand() % 100 + 1;
|
||||
uint32_t left_col = rand() % 100 + 1;
|
||||
left_row = 1024;
|
||||
left_col = 1024;
|
||||
uint32_t left_w = rand() % (left_col / 5 + 1) + 1; // c is generate by w, and make c is larger
|
||||
uint32_t left_c = left_col / left_w + (left_col % left_w ? 1 : 0);
|
||||
|
||||
uint32_t right_row = left_col;
|
||||
uint32_t right_col = rand() % 100 + 1;
|
||||
right_col = 1024;
|
||||
uint32_t right_w = (rand() % (right_col / 5 + 1) + 1); // make c is larger
|
||||
uint32_t right_c = right_col / right_w + (right_col % right_w ? 1 : 0);
|
||||
|
||||
cvk_ml_shape_t left_shape;
|
||||
left_shape.n = left_row;
|
||||
left_shape.c = left_c;
|
||||
left_shape.w = left_w;
|
||||
left_shape.col = left_col;
|
||||
|
||||
cvk_ml_shape_t right_shape;
|
||||
right_shape.n = right_row;
|
||||
right_shape.c = right_c;
|
||||
right_shape.w = right_w;
|
||||
right_shape.col = right_col;
|
||||
|
||||
uint32_t bias = rand() % 2;
|
||||
bias = 0;
|
||||
p.bias = NULL;
|
||||
|
||||
cvk_ml_shape_t fake;
|
||||
fake.n = 1;
|
||||
fake.c = 1;
|
||||
fake.w = 1;
|
||||
fake.col = 1;
|
||||
|
||||
cvk_ml_t *t = bk_ctx->ops->lmem_alloc_matrix(bk_ctx, fake, CVK_FMT_BF16, 1);
|
||||
t->shape = left_shape;
|
||||
p.left = t;
|
||||
|
||||
t = bk_ctx->ops->lmem_alloc_matrix(bk_ctx, fake, CVK_FMT_BF16, 1);
|
||||
t->shape = right_shape;
|
||||
p.right = t;
|
||||
if (!p.left || !p.right) {
|
||||
printf("retry init_matrix_param\n");
|
||||
destroy_param(bk_ctx, &p);
|
||||
goto retry;
|
||||
}
|
||||
|
||||
p.res = alloc_param_res(bk_ctx, &p);
|
||||
if (bias) {
|
||||
cvk_ml_shape_t bias_shape = right_shape;
|
||||
bias_shape.n = 2;
|
||||
p.bias = bk_ctx->ops->lmem_alloc_matrix(bk_ctx, bias_shape, CVK_FMT_BF16, 1);
|
||||
}
|
||||
|
||||
if (!p.res || (bias && !p.bias)) {
|
||||
printf("retry init_matrix_param\n");
|
||||
destroy_param(bk_ctx, &p);
|
||||
goto retry;
|
||||
}
|
||||
|
||||
return p;
|
||||
}
|
||||
|
||||
// gemm test function
|
||||
//#define USE_CBLAS_VERITY (1)
|
||||
|
||||
#ifdef USE_CBLAS_VERITY
|
||||
#include <cblas.h>
|
||||
#endif /* ifdef USE_CBLAS_VERITY */
|
||||
|
||||
// comes from
|
||||
// https://stackoverflow.com/questions/47023651/multiplying-matrices-in-one-dimensional-arrays
|
||||
void multiply(uint16_t *a, int row1, int col1, uint16_t *b, int row2, int col2, uint16_t *d) {
|
||||
assert(col1 == row2);
|
||||
// silence error=unused-but-set-parameter warning
|
||||
(void)row2;
|
||||
|
||||
for (int i = 0; i < row1; i++) {
|
||||
for (int j = 0; j < col2; j++) {
|
||||
float sum = 0;
|
||||
for (int k = 0; k < col1; k++) {
|
||||
float _a = convert_bf16_fp32(a[i * col1 + k]);
|
||||
float _b = convert_bf16_fp32(b[k * col2 + j]);
|
||||
sum = sum + _a * _b;
|
||||
}
|
||||
d[i * col2 + j] = convert_fp32_bf16(sum);
|
||||
}
|
||||
}
|
||||
|
||||
#if 0
|
||||
for (int i = 0; i < size; i++) {
|
||||
if (i % col2 == 0) {
|
||||
printf("\n");
|
||||
}
|
||||
printf("%f ", convert_bf16_fp32(d[i]));
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
#ifdef USE_CBLAS_VERITY
|
||||
#else
|
||||
static void multiply_i32(uint8_t *a, int row1, int col1, uint8_t *b, int row2, int col2,
|
||||
uint32_t *d, cvk_fmt_t fmt) {
|
||||
assert(col1 == row2);
|
||||
// silence error=unused-but-set-parameter warning
|
||||
(void)row2;
|
||||
|
||||
for (int i = 0; i < row1; i++) {
|
||||
for (int j = 0; j < col2; j++) {
|
||||
int sum = 0;
|
||||
for (int k = 0; k < col1; k++) {
|
||||
int _a = fmt == CVK_FMT_I8 ? (int8_t)(a[i * col1 + k]) : (a[i * col1 + k]);
|
||||
int _b = fmt == CVK_FMT_I8 ? (int8_t)(b[k * col2 + j]) : (b[k * col2 + j]);
|
||||
// printf("sum = sum + _a * _b = %d = %d + %d * %d\n", sum + _a * _b, sum, _a, _b);
|
||||
sum = sum + _a * _b;
|
||||
}
|
||||
// printf("out [%d] is %d\n", i * col2 + j, sum);
|
||||
d[i * col2 + j] = (sum);
|
||||
}
|
||||
}
|
||||
|
||||
#if 0
|
||||
for (int i = 0; i < size; i++) {
|
||||
if (i % col2 == 0) {
|
||||
printf("\n");
|
||||
}
|
||||
printf("%f ", convert_bf16_fp32(d[i]));
|
||||
}
|
||||
#endif
|
||||
}
|
||||
#endif /* ifdef USE_CBLAS_VERITY */
|
||||
|
||||
int array_cmp_int16(const char *const info, const uint16_t *p_exp, const uint16_t *p_got,
|
||||
int count) {
|
||||
int idx;
|
||||
for (idx = 0; idx < count; idx++) {
|
||||
if (p_exp[idx] != p_got[idx]) {
|
||||
printf("%s error at index %d exp %d(%f,0x%x) got %d(%f,0x%x)\n", info, idx, p_exp[idx],
|
||||
convert_bf16_fp32(p_exp[idx]), p_exp[idx], p_got[idx], convert_bf16_fp32(p_got[idx]),
|
||||
p_got[idx]);
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
int array_cmp_int32(const char *const info, const uint32_t *p_exp, const uint32_t *p_got,
|
||||
int count) {
|
||||
int idx;
|
||||
for (idx = 0; idx < count; idx++) {
|
||||
if (p_exp[idx] != p_got[idx]) {
|
||||
printf("%s error at index %d exp %d got %d\n", info, idx, p_exp[idx], p_got[idx]);
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void assign_bf16_values_to_matrix(uint16_t *matrix, size_t size) {
|
||||
float t;
|
||||
for (size_t i = 0; i < size; i++) {
|
||||
float f;
|
||||
#if 1
|
||||
if (i % 2 == 0) t = i % 8;
|
||||
if (i % 2 == 1) t = -1 * (i % 8);
|
||||
f = t;
|
||||
#else
|
||||
t = i * (i % 2 ? -1 : 1);
|
||||
f = t * 0.01 + size * 0.01;
|
||||
#endif
|
||||
matrix[i] = convert_fp32_bf16(f);
|
||||
// printf("f[%lu] is %f(0x%x)\n", i, f, matrix[i]);
|
||||
}
|
||||
}
|
||||
|
||||
static void uint16_to_float(float *float_data, uint16_t *bf16_data, size_t size) {
|
||||
for (size_t i = 0; i < size; i++) {
|
||||
float_data[i] = convert_bf16_fp32(bf16_data[i]);
|
||||
}
|
||||
}
|
||||
|
||||
static void uint8_to_float(float *float_data, uint8_t *i8_data, size_t size, cvk_fmt_t fmt) {
|
||||
for (size_t i = 0; i < size; i++) {
|
||||
int input = (i8_data[i]);
|
||||
if (fmt == CVK_FMT_I8) {
|
||||
input = (int8_t)(i8_data[i]);
|
||||
}
|
||||
float_data[i] = (float)input;
|
||||
}
|
||||
}
|
||||
|
||||
static void assign_i8_values_to_matrix(uint8_t *matrix, size_t size) {
|
||||
for (size_t i = 0; i < size; i++) {
|
||||
matrix[i] = i + 20;
|
||||
}
|
||||
}
|
||||
|
||||
#ifdef USE_CBLAS_VERITY
|
||||
static void float_to_int16(uint16_t *int16_data, float *float_data, size_t size) {
|
||||
for (size_t i = 0; i < size; i++) {
|
||||
int16_data[i] = convert_fp32_bf16(float_data[i]);
|
||||
}
|
||||
}
|
||||
|
||||
static void float_to_int32(uint32_t *int32_data, float *float_data, size_t size) {
|
||||
for (size_t i = 0; i < size; i++) {
|
||||
int32_data[i] = (uint32_t)float_data[i];
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
// int8
|
||||
static int _test_bmblas_gemm_bm1880v2(size_t M, size_t N, size_t K, cvk_fmt_t fmt) {
|
||||
long elapsed;
|
||||
struct timeval t0, t1;
|
||||
int ret = 0;
|
||||
|
||||
uint8_t *i8_A = new uint8_t[M * K];
|
||||
uint8_t *i8_B = new uint8_t[N * K];
|
||||
uint8_t *i8_C = new uint8_t[4 * M * N]; // 32 bit output
|
||||
uint32_t *i32bit_ref = new uint32_t[M * N];
|
||||
|
||||
assign_i8_values_to_matrix(i8_A, M * K);
|
||||
assign_i8_values_to_matrix(i8_B, N * K);
|
||||
|
||||
float *float_A = new float[M * K];
|
||||
float *float_B = new float[N * K];
|
||||
float *float_C_ref = new float[M * N];
|
||||
uint8_to_float(float_A, i8_A, M * K, fmt);
|
||||
uint8_to_float(float_B, i8_B, N * K, fmt);
|
||||
|
||||
#if 0
|
||||
printf("\nA:");
|
||||
for (int i = 0; i < M; i++) {
|
||||
printf("\n");
|
||||
for (int j = 0; j < K; j++) {
|
||||
printf("%e(0x%x) ", float_A[i * K + j], i8_A[i * K + j]);
|
||||
}
|
||||
}
|
||||
printf("\nB:");
|
||||
for (int i = 0; i < K; i++) {
|
||||
printf("\n");
|
||||
for (int j = 0; j < N; j++) {
|
||||
printf("%e(0x%x) ", float_B[i * N + j], i8_B[i * N + j]);
|
||||
}
|
||||
}
|
||||
printf("\nR:");
|
||||
for (int i = 0; i < M; i++) {
|
||||
printf("\n");
|
||||
for (int j = 0; j < N; j++) {
|
||||
printf("%e ", convert_i8_fp32(i32bit_ref[i * N + j]));
|
||||
}
|
||||
}
|
||||
#endif
|
||||
gettimeofday(&t0, NULL);
|
||||
|
||||
#ifdef USE_CBLAS_VERITY
|
||||
float alpha = 0;
|
||||
float beta = 0;
|
||||
|
||||
cblas_sgemm(CblasRowMajor, CblasNoTrans, CblasNoTrans, M, N, K, alpha, float_A, K, float_B, N,
|
||||
beta, float_C_ref, N);
|
||||
float_to_int32(i32bit_ref, float_C_ref, M * N);
|
||||
#else /* ! ifdef USE_CBLAS_VERITY */
|
||||
multiply_i32(i8_A, M, K, i8_B, K, N, i32bit_ref, fmt);
|
||||
#endif /* ifdef USE_CBLAS_VERITY */
|
||||
|
||||
gettimeofday(&t1, NULL);
|
||||
elapsed = (t1.tv_sec - t0.tv_sec) * 1000000 + t1.tv_usec - t0.tv_usec;
|
||||
#ifdef USE_CBLAS_VERITY
|
||||
printf("cblas GEMM takes %ld us\n", elapsed);
|
||||
#else /* ! ifdef USE_CBLAS_VERITY */
|
||||
printf("CPU GEMM takes %ld us\n", elapsed);
|
||||
#endif /* ifdef USE_CBLAS_VERITY */
|
||||
|
||||
CVI_RT_HANDLE ctx;
|
||||
cvk_context_t *bk_ctx;
|
||||
|
||||
test_init(&ctx, &bk_ctx);
|
||||
|
||||
// alloc device memory
|
||||
cvk_mg_shape_t s_a = {(uint32_t)M, (uint32_t)K};
|
||||
cvk_mg_shape_t s_b = {(uint32_t)K, (uint32_t)N};
|
||||
cvk_mg_shape_t s_r = {2 * (uint32_t)M, 2 * (uint32_t)N};
|
||||
|
||||
size_t s_size_a = mg_shape_size(&s_a) * bytesize_of_fmt(fmt);
|
||||
size_t s_size_b = mg_shape_size(&s_b) * bytesize_of_fmt(fmt);
|
||||
size_t s_size_r = mg_shape_size(&s_r) * bytesize_of_fmt(fmt);
|
||||
|
||||
CVI_RT_MEM devmem_a = CVI_RT_MemAlloc(ctx, s_size_a);
|
||||
CVI_RT_MEM devmem_b = CVI_RT_MemAlloc(ctx, s_size_b);
|
||||
CVI_RT_MEM devmem_r = CVI_RT_MemAlloc(ctx, s_size_r);
|
||||
|
||||
gaddr_t gaddr_a = CVI_RT_MemGetPAddr(devmem_a);
|
||||
gaddr_t gaddr_b = CVI_RT_MemGetPAddr(devmem_b);
|
||||
gaddr_t gaddr_r = CVI_RT_MemGetPAddr(devmem_r);
|
||||
|
||||
// copy to device memory
|
||||
CVI_RT_MemCopyS2D(ctx, devmem_a, (uint8_t *)i8_A);
|
||||
CVI_RT_MemCopyS2D(ctx, devmem_b, (uint8_t *)i8_B);
|
||||
CVI_RT_MemCopyS2D(ctx, devmem_r, (uint8_t *)i8_C);
|
||||
|
||||
// do computation with bmkernel
|
||||
// bmruntime_bmkernel_create(ctx, (void**)&bk_ctx);
|
||||
|
||||
// printf("gaddr_a/gaddr_b/gaddr_r at %zx %zx %zx\n", gaddr_a, gaddr_b, gaddr_r);
|
||||
size_t *slice_num = cvm_gemm((cvk_context_t *)bk_ctx, gaddr_a, gaddr_b, gaddr_r, M, K, N, fmt);
|
||||
|
||||
gettimeofday(&t0, NULL);
|
||||
test_submit_comp(&ctx, bk_ctx);
|
||||
gettimeofday(&t1, NULL);
|
||||
elapsed = (t1.tv_sec - t0.tv_sec) * 1000000 + t1.tv_usec - t0.tv_usec;
|
||||
printf("TPU GEMM takes %ld us\n", elapsed);
|
||||
|
||||
CVI_RT_MemCopyD2S(ctx, (uint8_t *)i8_C, devmem_r);
|
||||
|
||||
CVI_RT_MemFree(ctx, devmem_a);
|
||||
CVI_RT_MemFree(ctx, devmem_b);
|
||||
CVI_RT_MemFree(ctx, devmem_r);
|
||||
|
||||
test_exit(&ctx, bk_ctx);
|
||||
|
||||
uint32_t *i32_C = new uint32_t[M * N]; // 32 bit output with stirded
|
||||
|
||||
cvm_combin_gemm_i8(slice_num, i8_C, i32_C, M, N);
|
||||
|
||||
free(slice_num);
|
||||
|
||||
int cmp_res = array_cmp_int32("gemm", i32bit_ref, i32_C, M * N);
|
||||
if (cmp_res != 0) {
|
||||
ret = -1;
|
||||
printf("Comparison failed for cblas_sgemm and bmblas_gemm!\n");
|
||||
#if 0
|
||||
printf("\nref/cmd is:");
|
||||
for (int i = 0; i < M; i++) {
|
||||
printf(">\n");
|
||||
for (int j = 0; j < N; j++) {
|
||||
printf("%f(0x%x)/%f(0x%x) ",
|
||||
convert_i8_fp32(i32bit_ref[i * N + j]), i32bit_ref[i * N + j],
|
||||
convert_i8_fp32(i8_C[i * N + j]), i8_C[i * N + j]
|
||||
);
|
||||
}
|
||||
}
|
||||
#endif
|
||||
} else {
|
||||
// printf("Comparison done for cblas_sgemm and bmblas_gemm!\n\n");
|
||||
}
|
||||
|
||||
delete[] float_A;
|
||||
delete[] float_B;
|
||||
delete[] float_C_ref;
|
||||
delete[] i8_A;
|
||||
delete[] i8_B;
|
||||
delete[] i8_C;
|
||||
delete[] i32bit_ref;
|
||||
delete[] i32_C;
|
||||
return ret;
|
||||
}
|
||||
|
||||
int test_bmblas_gemm_bm1880v2(size_t M, size_t N, size_t K, cvk_fmt_t fmt) {
|
||||
printf("%s: M=%zu, N=%zu, K=%zu, fmt_sz: %d\n", __func__, M, N, K, cvm_bytesize_of_fmt(fmt));
|
||||
|
||||
// FIXME: not duplicate
|
||||
if (fmt != CVK_FMT_BF16) {
|
||||
return _test_bmblas_gemm_bm1880v2(M, N, K, fmt);
|
||||
}
|
||||
|
||||
long elapsed;
|
||||
struct timeval t0, t1;
|
||||
int ret = 0;
|
||||
|
||||
uint16_t *bf16_A = new uint16_t[M * K];
|
||||
uint16_t *bf16_B = new uint16_t[N * K];
|
||||
uint16_t *bf16_C = new uint16_t[2 * M * N];
|
||||
uint16_t *int16_C_ref = new uint16_t[M * N];
|
||||
|
||||
assign_bf16_values_to_matrix(bf16_A, M * K);
|
||||
assign_bf16_values_to_matrix(bf16_B, N * K);
|
||||
|
||||
float *float_A = new float[M * K];
|
||||
float *float_B = new float[N * K];
|
||||
float *float_C_ref = new float[M * N];
|
||||
uint16_to_float(float_A, bf16_A, M * K);
|
||||
uint16_to_float(float_B, bf16_B, N * K);
|
||||
|
||||
#if 0
|
||||
printf("\nA:");
|
||||
for (int i = 0; i < M; i++) {
|
||||
printf("\n");
|
||||
for (int j = 0; j < K; j++) {
|
||||
printf("%e(0x%x) ", float_A[i * K + j], bf16_A[i * K + j]);
|
||||
}
|
||||
}
|
||||
printf("\nB:");
|
||||
for (int i = 0; i < K; i++) {
|
||||
printf("\n");
|
||||
for (int j = 0; j < N; j++) {
|
||||
printf("%e(0x%x) ", float_B[i * N + j], bf16_B[i * N + j]);
|
||||
}
|
||||
}
|
||||
printf("\nR:");
|
||||
for (int i = 0; i < M; i++) {
|
||||
printf("\n");
|
||||
for (int j = 0; j < N; j++) {
|
||||
printf("%e ", convert_bf16_fp32(int16_C_ref[i * N + j]));
|
||||
}
|
||||
}
|
||||
#endif
|
||||
gettimeofday(&t0, NULL);
|
||||
|
||||
#ifdef USE_CBLAS_VERITY
|
||||
float alpha = 0;
|
||||
float beta = 0;
|
||||
cblas_sgemm(CblasRowMajor, CblasNoTrans, CblasNoTrans, M, N, K, alpha, float_A, K, float_B, N,
|
||||
beta, float_C_ref, N);
|
||||
float_to_int16(int16_C_ref, float_C_ref, M * N);
|
||||
#else /* ! ifdef USE_CBLAS_VERITY */
|
||||
multiply(bf16_A, M, K, bf16_B, K, N, int16_C_ref);
|
||||
#endif /* ifdef USE_CBLAS_VERITY */
|
||||
|
||||
delete[] float_A;
|
||||
delete[] float_B;
|
||||
delete[] float_C_ref;
|
||||
|
||||
gettimeofday(&t1, NULL);
|
||||
elapsed = (t1.tv_sec - t0.tv_sec) * 1000000 + t1.tv_usec - t0.tv_usec;
|
||||
#ifdef USE_CBLAS_VERITY
|
||||
printf("cblas GEMM takes %ld us\n", elapsed);
|
||||
#else
|
||||
printf("CPU GEMM takes %ld us\n", elapsed);
|
||||
#endif
|
||||
|
||||
CVI_RT_HANDLE ctx;
|
||||
cvk_context_t *bk_ctx;
|
||||
|
||||
test_init(&ctx, &bk_ctx);
|
||||
|
||||
// alloc device memory
|
||||
cvk_mg_shape_t s_a = {(uint32_t)M, (uint32_t)K};
|
||||
cvk_mg_shape_t s_b = {(uint32_t)K, (uint32_t)N};
|
||||
cvk_mg_shape_t s_r = {(uint32_t)M, (uint32_t)N};
|
||||
|
||||
size_t s_size_a = mg_shape_size(&s_a) * bytesize_of_fmt(fmt);
|
||||
size_t s_size_b = mg_shape_size(&s_b) * bytesize_of_fmt(fmt);
|
||||
size_t s_size_r = mg_shape_size(&s_r) * bytesize_of_fmt(fmt) * bytesize_of_fmt(fmt);
|
||||
|
||||
CVI_RT_MEM devmem_a = CVI_RT_MemAlloc(ctx, s_size_a);
|
||||
CVI_RT_MEM devmem_b = CVI_RT_MemAlloc(ctx, s_size_b);
|
||||
CVI_RT_MEM devmem_r = CVI_RT_MemAlloc(ctx, s_size_r);
|
||||
|
||||
gaddr_t gaddr_a = CVI_RT_MemGetPAddr(devmem_a);
|
||||
gaddr_t gaddr_b = CVI_RT_MemGetPAddr(devmem_b);
|
||||
gaddr_t gaddr_r = CVI_RT_MemGetPAddr(devmem_r);
|
||||
|
||||
// copy to device memory
|
||||
CVI_RT_MemCopyS2D(ctx, devmem_a, (uint8_t *)bf16_A);
|
||||
CVI_RT_MemCopyS2D(ctx, devmem_b, (uint8_t *)bf16_B);
|
||||
CVI_RT_MemCopyS2D(ctx, devmem_r, (uint8_t *)bf16_C);
|
||||
// do computation with bmkernel
|
||||
// bmruntime_bmkernel_create(ctx, (void**)&bk_ctx);
|
||||
|
||||
size_t *slice_num =
|
||||
cvm_gemm((cvk_context_t *)bk_ctx, gaddr_a, gaddr_b, gaddr_r, M, K, N, CVK_FMT_BF16);
|
||||
free(slice_num); // no use slice_num infomation in BF16
|
||||
|
||||
gettimeofday(&t0, NULL);
|
||||
test_submit_comp(&ctx, bk_ctx);
|
||||
gettimeofday(&t1, NULL);
|
||||
elapsed = (t1.tv_sec - t0.tv_sec) * 1000000 + t1.tv_usec - t0.tv_usec;
|
||||
printf("TPU GEMM takes %ld us\n", elapsed);
|
||||
|
||||
CVI_RT_MemCopyD2S(ctx, (uint8_t *)bf16_C, devmem_r);
|
||||
|
||||
// bmruntime_bmkernel_destroy(ctx);
|
||||
|
||||
CVI_RT_MemFree(ctx, devmem_a);
|
||||
CVI_RT_MemFree(ctx, devmem_b);
|
||||
CVI_RT_MemFree(ctx, devmem_r);
|
||||
|
||||
test_exit(&ctx, bk_ctx);
|
||||
|
||||
int cmp_res = array_cmp_int16("gemm", int16_C_ref, bf16_C, M * N);
|
||||
if (cmp_res != 0) {
|
||||
ret = -1;
|
||||
printf("Comparison failed for cblas_sgemm and bmblas_gemm!\n");
|
||||
#if 0
|
||||
printf("\nref/cmd is:");
|
||||
for (int i = 0; i < M; i++) {
|
||||
printf(">\n");
|
||||
for (int j = 0; j < N; j++) {
|
||||
printf("%f(0x%x)/%f(0x%x) ",
|
||||
convert_bf16_fp32(int16_C_ref[i * N + j]), int16_C_ref[i * N + j],
|
||||
convert_bf16_fp32(bf16_C[i * N + j]), bf16_C[i * N + j]
|
||||
);
|
||||
}
|
||||
}
|
||||
#endif
|
||||
} else {
|
||||
// printf("Comparison done for cblas_sgemm and bmblas_gemm!\n\n");
|
||||
}
|
||||
|
||||
delete[] bf16_A;
|
||||
delete[] bf16_B;
|
||||
delete[] bf16_C;
|
||||
delete[] int16_C_ref;
|
||||
return ret;
|
||||
}
|
||||
|
||||
#define test_one_param(n) \
|
||||
do { \
|
||||
param_t p = param_##n(bk_ctx); \
|
||||
test_param(&ctx, bk_ctx, &p); \
|
||||
destroy_param(bk_ctx, &p); \
|
||||
} while (0)
|
||||
|
||||
int main() {
|
||||
int round_mode;
|
||||
round_mode = set_store_feround();
|
||||
CVI_RT_HANDLE ctx;
|
||||
cvk_context_t *bk_ctx;
|
||||
|
||||
test_init(&ctx, &bk_ctx);
|
||||
|
||||
// int8 example
|
||||
if (0 != test_bmblas_gemm_bm1880v2(1, 100, 512, CVK_FMT_I8)) exit(-1);
|
||||
if (0 != test_bmblas_gemm_bm1880v2(2, 100, 512, CVK_FMT_I8)) exit(-1);
|
||||
if (0 != test_bmblas_gemm_bm1880v2(4, 100, 512, CVK_FMT_I8)) exit(-1);
|
||||
if (0 != test_bmblas_gemm_bm1880v2(8, 100, 512, CVK_FMT_I8)) exit(-1);
|
||||
|
||||
if (0 != test_bmblas_gemm_bm1880v2(1, 20000, 512, CVK_FMT_I8)) exit(-1);
|
||||
if (0 != test_bmblas_gemm_bm1880v2(10, 200, 10, CVK_FMT_I8)) exit(-1);
|
||||
if (0 != test_bmblas_gemm_bm1880v2(1, 200, 500, CVK_FMT_I8)) exit(-1);
|
||||
if (0 != test_bmblas_gemm_bm1880v2(1, 20, 50, CVK_FMT_I8)) exit(-1);
|
||||
if (0 != test_bmblas_gemm_bm1880v2(2, 10, 100, CVK_FMT_I8)) exit(-1);
|
||||
if (0 != test_bmblas_gemm_bm1880v2(2, 1000, 5, CVK_FMT_I8)) exit(-1);
|
||||
if (0 != test_bmblas_gemm_bm1880v2(20, 5, 5, CVK_FMT_I8)) exit(-1);
|
||||
if (0 != test_bmblas_gemm_bm1880v2(2, 5, 5, CVK_FMT_I8)) exit(-1);
|
||||
cvk_fmt_t fmts[2] = {CVK_FMT_BF16, CVK_FMT_I8};
|
||||
// cvk_fmt_t fmts[1] = {CVK_FMT_BF16};
|
||||
int fmts_sz = sizeof(fmts) / sizeof(fmts[0]);
|
||||
|
||||
for (int i = 0; i < fmts_sz; i++) {
|
||||
cvk_fmt_t fmt = fmts[i];
|
||||
if (0) {
|
||||
// backend implement
|
||||
for (int i = 0; i < 30; i++) test_one_param(0);
|
||||
|
||||
} else {
|
||||
// gemm, plz refer bmtap2/libbmblas
|
||||
int M = 10000;
|
||||
int N = 10000;
|
||||
int K = 1024;
|
||||
M = 2000;
|
||||
N = 2000;
|
||||
int m, k, n;
|
||||
|
||||
if (0) {
|
||||
for (m = 1; m <= M; m *= 10) {
|
||||
for (n = 1; n <= N; n += 200) {
|
||||
for (k = 1; k <= K; k *= 2) {
|
||||
if (0 != test_bmblas_gemm_bm1880v2(m, n, k, fmt)) {
|
||||
exit(-1);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (1) {
|
||||
if (0 != test_bmblas_gemm_bm1880v2(1, 500, 512, fmt)) exit(-1);
|
||||
if (0 != test_bmblas_gemm_bm1880v2(1, 750, 512, fmt)) exit(-1);
|
||||
if (0 != test_bmblas_gemm_bm1880v2(1, 100, 512, fmt)) exit(-1);
|
||||
if (0 != test_bmblas_gemm_bm1880v2(2, 100, 512, fmt)) exit(-1);
|
||||
if (0 != test_bmblas_gemm_bm1880v2(4, 100, 512, fmt)) exit(-1);
|
||||
if (0 != test_bmblas_gemm_bm1880v2(8, 100, 512, fmt)) exit(-1);
|
||||
// if (0 != test_bmblas_gemm_bm1880v2(1, 50000, 512, fmt)) exit(-1);
|
||||
// if (0 != test_bmblas_gemm_bm1880v2(1, 75000, 512, fmt)) exit(-1);
|
||||
// if (0 != test_bmblas_gemm_bm1880v2(1, 10000, 512, fmt)) exit(-1);
|
||||
// if (0 != test_bmblas_gemm_bm1880v2(2, 10000, 512, fmt)) exit(-1);
|
||||
// if (0 != test_bmblas_gemm_bm1880v2(4, 10000, 512, fmt)) exit(-1);
|
||||
// if (0 != test_bmblas_gemm_bm1880v2(8, 10000, 512, fmt)) exit(-1);
|
||||
}
|
||||
|
||||
printf("Comparison done for cblas_sgemm and bmblas_gemm!\n\n");
|
||||
}
|
||||
}
|
||||
|
||||
test_exit(&ctx, bk_ctx);
|
||||
restore_feround(round_mode);
|
||||
return 0;
|
||||
}
|
||||
158
cvimath/tests/cvi1835/mask.cpp
Normal file
158
cvimath/tests/cvi1835/mask.cpp
Normal file
@ -0,0 +1,158 @@
|
||||
#include <cvimath_internal.h>
|
||||
#include <test_cvikernel_util.h>
|
||||
|
||||
#define OUT
|
||||
#define IN
|
||||
#include <cfloat>
|
||||
#include <iomanip>
|
||||
#include <iostream>
|
||||
#include <map>
|
||||
#include <random>
|
||||
#include <string>
|
||||
//#define DBG
|
||||
|
||||
using namespace std;
|
||||
|
||||
/**
|
||||
* pre_data means we test fixed pattern, it should be same sa lut
|
||||
*/
|
||||
// enum TEST_MODE {
|
||||
// CVM_MASK_TYPE_GT_0 = 0, // remain > 0
|
||||
// //CVM_MASK_TYPE_GE_0, // remain >= 0
|
||||
// //CVM_MASK_TYPE_EQ_0, // remain = 0
|
||||
// //CVM_MASK_TYPE_LT_0, // remain < 0
|
||||
// //CVM_MASK_TYPE_LE_0, // remain <= 0
|
||||
// CVM_MASK_MAX
|
||||
//};
|
||||
|
||||
enum CVM_MASK_TYPE mode;
|
||||
|
||||
struct pattern {
|
||||
float *input;
|
||||
float *ref;
|
||||
int len;
|
||||
};
|
||||
#define SIZEOF(x) (sizeof(x) / sizeof(x[0]))
|
||||
float cvm_mask_type_gt_0_input[] = {-1 * pow(2, -62), -0.003, -1.0, -100000, 0.000001, 1, 1000,
|
||||
pow(2, 62), 0};
|
||||
|
||||
float cvm_mask_type_gt_0_output[] = {0, 0, 0, 0, 1, 1, 1, 1, 0};
|
||||
float cvm_mask_type_ge_0_output[] = {0, 0, 0, 0, 1, 1, 1, 1, 1};
|
||||
float cvm_mask_type_eq_0_output[] = {0, 0, 0, 0, 0, 0, 0, 0, 1};
|
||||
float cvm_mask_type_lt_0_output[] = {1, 1, 1, 1, 0, 0, 0, 0, 0};
|
||||
float cvm_mask_type_le_0_output[] = {1, 1, 1, 1, 0, 0, 0, 0, 1};
|
||||
|
||||
int input_sz = sizeof(cvm_mask_type_gt_0_input) / sizeof(cvm_mask_type_gt_0_input[0]);
|
||||
|
||||
static struct pattern patterns[] = {
|
||||
{cvm_mask_type_gt_0_input, cvm_mask_type_gt_0_output, input_sz},
|
||||
{cvm_mask_type_gt_0_input, cvm_mask_type_ge_0_output, input_sz},
|
||||
{cvm_mask_type_gt_0_input, cvm_mask_type_eq_0_output, input_sz},
|
||||
{cvm_mask_type_gt_0_input, cvm_mask_type_lt_0_output, input_sz},
|
||||
{cvm_mask_type_gt_0_input, cvm_mask_type_le_0_output, input_sz},
|
||||
};
|
||||
|
||||
static void testbench(CVI_RT_HANDLE *ctx, cvk_context_t *bmk) {
|
||||
cvk_fmt_t fmt = CVK_FMT_BF16;
|
||||
struct pattern *p = &patterns[mode];
|
||||
uint32_t input_n = 1;
|
||||
uint32_t input_c = 1;
|
||||
uint32_t input_h = 1;
|
||||
uint32_t input_w = p->len;
|
||||
|
||||
cvk_tl_shape_t ifmap_shape = {input_n, input_c, input_h, input_w};
|
||||
cvk_tl_shape_t ofmap_shape = ifmap_shape;
|
||||
uint64_t ifmap_size = tl_shape_size(&ifmap_shape);
|
||||
uint64_t ofmap_size = tl_shape_size(&ofmap_shape);
|
||||
|
||||
int data_type_size = bytesize_of_fmt(fmt);
|
||||
uint64_t ifmap_bytesize = ifmap_size * data_type_size;
|
||||
uint64_t ofmap_bytesize = ofmap_size * data_type_size;
|
||||
|
||||
cvk_tl_shape_t table_shape;
|
||||
uint64_t table_bytesize = cvm_lut_tbl_bytesize(bmk, &table_shape, fmt);
|
||||
|
||||
cvk_tl_t *tl_ifmap = test_alloc_tl(bmk, ifmap_shape, fmt, /*align*/ 1);
|
||||
cvk_tl_t *tl_ofmap_bf16 = test_alloc_tl(bmk, ofmap_shape, fmt, /*align*/ 1);
|
||||
cvk_tl_t *out = tl_ofmap_bf16;
|
||||
cvk_tl_t *tl_pos_neg_buf = test_alloc_tl(bmk, table_shape, fmt, /*align*/ 1);
|
||||
cvk_tl_t *tl_0_idx_table = test_alloc_tl(bmk, table_shape, fmt, /*align*/ 1);
|
||||
|
||||
// temp buf
|
||||
cvk_tl_t *tl_buf = test_alloc_tl(bmk, ofmap_shape, fmt, /*align*/ 1);
|
||||
cvk_tl_t *tl_buf2 = test_alloc_tl(bmk, tl_ofmap_bf16->shape, fmt, /*align*/ 1);
|
||||
cvk_tl_t *tl_buf4 = test_alloc_tl(bmk, tl_ofmap_bf16->shape, fmt, /*align*/ 1);
|
||||
|
||||
uint16_t *input_data = (uint16_t *)xmalloc(ifmap_bytesize);
|
||||
uint16_t *ref_data = (uint16_t *)xmalloc(ofmap_bytesize);
|
||||
uint16_t *table_data_atan_pos_neg = (uint16_t *)xmalloc(table_bytesize);
|
||||
uint16_t *idx_0_table_data = (uint16_t *)xmalloc(table_bytesize);
|
||||
|
||||
cvm_gen_0_tbl(idx_0_table_data, &table_shape);
|
||||
cvm_pos_neg_tbl(table_data_atan_pos_neg, &table_shape);
|
||||
|
||||
for (uint32_t i = 0; i < ifmap_size; i++) {
|
||||
input_data[i] = convert_fp32_bf16(p->input[i]);
|
||||
ref_data[i] = convert_fp32_bf16(p->ref[i]);
|
||||
}
|
||||
|
||||
test_put_tensor_g2l_comp(ctx, bmk, tl_ifmap, (uint8_t *)input_data);
|
||||
test_put_tensor_g2l_comp(ctx, bmk, tl_pos_neg_buf, (uint8_t *)table_data_atan_pos_neg);
|
||||
test_put_tensor_g2l_comp(ctx, bmk, tl_0_idx_table, (uint8_t *)idx_0_table_data);
|
||||
|
||||
cvm_emit_mask(bmk, tl_ifmap, tl_buf, tl_buf2, tl_buf4, tl_pos_neg_buf, tl_0_idx_table, out, fmt,
|
||||
mode);
|
||||
|
||||
test_submit_comp(ctx, bmk);
|
||||
|
||||
uint16_t *ofmap_data = (uint16_t *)test_get_tensor_l2g_comp(ctx, bmk, out);
|
||||
|
||||
for (uint32_t i = 0; i < ifmap_size; i++) {
|
||||
if (ref_data[i] != ofmap_data[i]) {
|
||||
fprintf(stderr,
|
||||
"comparing failed at mode %d ofmap_data[%u] got %f(0x%x), ref "
|
||||
"%f(0x%x)\n",
|
||||
mode, i, convert_bf16_fp32(ofmap_data[i]), ofmap_data[i],
|
||||
convert_bf16_fp32(ref_data[i]), ref_data[i]);
|
||||
exit(-1);
|
||||
}
|
||||
}
|
||||
#if 0
|
||||
if (!is_close) {
|
||||
float input = convert_bf16_fp32(ifmap[i]);
|
||||
}
|
||||
#endif
|
||||
free_tl(bmk, tl_buf4);
|
||||
free_tl(bmk, tl_buf2);
|
||||
free_tl(bmk, tl_buf);
|
||||
free_tl(bmk, tl_0_idx_table);
|
||||
free_tl(bmk, tl_pos_neg_buf);
|
||||
free_tl(bmk, tl_ofmap_bf16);
|
||||
free_tl(bmk, tl_ifmap);
|
||||
|
||||
free(input_data);
|
||||
free(ref_data);
|
||||
free(ofmap_data);
|
||||
free(table_data_atan_pos_neg);
|
||||
free(idx_0_table_data);
|
||||
}
|
||||
|
||||
int main() {
|
||||
CVI_RT_HANDLE ctx;
|
||||
cvk_context_t *bmk;
|
||||
int round_mode;
|
||||
|
||||
round_mode = set_store_feround();
|
||||
|
||||
test_init(&ctx, &bmk);
|
||||
|
||||
for (int i = CVM_MASK_TYPE_GT_0; i < CVM_MASK_MAX; i++) {
|
||||
mode = static_cast<enum CVM_MASK_TYPE>(i);
|
||||
printf("test mode %d...\n", mode);
|
||||
testbench(&ctx, bmk);
|
||||
}
|
||||
|
||||
test_exit(&ctx, bmk);
|
||||
restore_feround(round_mode);
|
||||
return 0;
|
||||
}
|
||||
376
cvimath/tests/cvi1835/reciprocal.cpp
Normal file
376
cvimath/tests/cvi1835/reciprocal.cpp
Normal file
@ -0,0 +1,376 @@
|
||||
/**
|
||||
*/
|
||||
#include <cvimath_internal.h>
|
||||
#include <test_cvikernel_util.h>
|
||||
|
||||
#include <cfloat>
|
||||
#include <iomanip>
|
||||
#include <iostream>
|
||||
#include <map>
|
||||
#include <random>
|
||||
#include <string>
|
||||
//#define DBG
|
||||
|
||||
using namespace std;
|
||||
|
||||
/**
|
||||
* pre_data means we test fixed pattern, it should be same sa lut
|
||||
*/
|
||||
enum TEST_MODE {
|
||||
PRE_DATA_COMPARE_FIX = 0, // pre-data + fix compare
|
||||
GEN_POW_20_DATA_MAX_ERROR, // generate 2^-20 ~ 2^20 value that check epsilon
|
||||
TEST_MODE_MAX,
|
||||
};
|
||||
|
||||
static TEST_MODE mode;
|
||||
|
||||
static uint16_t test_pattern[] = {
|
||||
0x0000, 0x38D2, 0x3952, 0x399D, 0x39D2, 0x3A03, 0x3A1D, 0x3A38, 0x3A52, 0x3A6C, 0x3A83, 0x3A90,
|
||||
0x3A9D, 0x3AAA, 0x3AB8, 0x3AC5, 0x3AD2, 0x3ADF, 0x3AEC, 0x3AF9, 0x3B03, 0x3B0A, 0x3B10, 0x3B17,
|
||||
0x3B1D, 0x3B24, 0x3B2A, 0x3B31, 0x3B38, 0x3B3E, 0x3B45, 0x3B4B, 0x3B52, 0x3B58, 0x3B5F, 0x3B65,
|
||||
0x3B6C, 0x3B72, 0x3B79, 0x3B80, 0x3B83, 0x3B86, 0x3B8A, 0x3B8D, 0x3B90, 0x3B93, 0x3B97, 0x3B9A,
|
||||
0x3B9D, 0x3BA1, 0x3BA4, 0x3BA7, 0x3BAA, 0x3BAE, 0x3BB1, 0x3BB4, 0x3BB8, 0x3BBB, 0x3BBE, 0x3BC1,
|
||||
0x3BC5, 0x3BC8, 0x3BCB, 0x3BCE, 0x3BD2, 0x3BD5, 0x3BD8, 0x3BDC, 0x3BDF, 0x3BE2, 0x3BE5, 0x3BE9,
|
||||
0x3BEC, 0x3BEF, 0x3BF2, 0x3BF6, 0x3BF9, 0x3BFC, 0x3C00, 0x3C01, 0x3C03, 0x3C05, 0x3C06, 0x3C08,
|
||||
0x3C0A, 0x3C0B, 0x3C0D, 0x3C0F, 0x3C10, 0x3C12, 0x3C13, 0x3C15, 0x3C17, 0x3C18, 0x3C1A, 0x3C1C,
|
||||
0x3C1D, 0x3C1F, 0x3C21, 0x3C22, 0x3C24, 0x3C25, 0x3C27, 0x3C29, 0x3C2A, 0x3C2C, 0x3C2E, 0x3C2F,
|
||||
0x3C31, 0x3C33, 0x3C34, 0x3C36, 0x3C38, 0x3C39, 0x3C3B, 0x3C3C, 0x3C3E, 0x3C40, 0x3C41, 0x3C43,
|
||||
0x3C45, 0x3C46, 0x3C48, 0x3C4A, 0x3C4B, 0x3C4D, 0x3C4E, 0x3C50, 0x3C52, 0x3C53, 0x3C55, 0x3C57,
|
||||
0x3C58, 0x3C5A, 0x3C5C, 0x3C5D, 0x3C5F, 0x3C60, 0x3C62, 0x3C64, 0x3C65, 0x3C67, 0x3C69, 0x3C6A,
|
||||
0x3C6C, 0x3C6E, 0x3C6F, 0x3C71, 0x3C72, 0x3C74, 0x3C76, 0x3C77, 0x3C79, 0x3C7B, 0x3C7C, 0x3C7E,
|
||||
0x3C80, 0x3C81, 0x3C81, 0x3C82, 0x3C83, 0x3C84, 0x3C85, 0x3C86, 0x3C86, 0x3C87, 0x3C88, 0x3C89,
|
||||
0x3C8A, 0x3C8A, 0x3C8B, 0x3C8C, 0x3C8D, 0x3C8E, 0x3C8F, 0x3C8F, 0x3C90, 0x3C91, 0x3C92, 0x3C93,
|
||||
0x3C93, 0x3C94, 0x3C95, 0x3C96, 0x3C97, 0x3C98, 0x3C98, 0x3C99, 0x3C9A, 0x3C9B, 0x3C9C, 0x3C9C,
|
||||
0x3C9D, 0x3C9E, 0x3C9F, 0x3CA0, 0x3CA1, 0x3CA1, 0x3CA2, 0x3CA3, 0x3CA4, 0x3CA5, 0x3CA5, 0x3CA6,
|
||||
0x3CA7, 0x3CA8, 0x3CA9, 0x3CAA, 0x3CAA, 0x3CAB, 0x3CAC, 0x3CAD, 0x3CAE, 0x3CAE, 0x3CAF, 0x3CB0,
|
||||
0x3CB1, 0x3CB2, 0x3CB3, 0x3CB3, 0x3CB4, 0x3CB5, 0x3CB6, 0x3CB7, 0x3CB8, 0x3CB8, 0x3CB9, 0x3CBA,
|
||||
0x3CBB, 0x3CBC, 0x3CBC, 0x3CBD, 0x3CBE, 0x3CBF, 0x3CC0, 0x3CC1, 0x3CC1, 0x3CC2, 0x3CC3, 0x3CC4,
|
||||
0x3CC5, 0x3CC5, 0x3CC6, 0x3CC7, 0x3CC8, 0x3CC9, 0x3CCA, 0x3CCA, 0x3CCB, 0x3CCC, 0x3CCD, 0x3CCE,
|
||||
0x3CCE, 0x3CCF, 0x3CD0, 0x3CD1, 0x3CD2, 0x3CD3, 0x3CD3, 0x3CD4, 0x3CD5, 0x3CD6, 0x3CD7, 0x3CD7,
|
||||
0x3CD8, 0x3CD9, 0x3CDA, 0x3CDB, 0x3CDC, 0x3CDC, 0x3CDD, 0x3CDE, 0x3CDF, 0x3CE0, 0x3CE0, 0x3CE1,
|
||||
0x3CE2, 0x3CE3, 0x3CE4, 0x3CE5, 0x3CE5, 0x3CE6, 0x3CE7, 0x3CE8, 0x3CE9, 0x3CE9, 0x3CEA, 0x3CEB,
|
||||
0x3CEC, 0x3CED, 0x3CEE, 0x3CEE, 0x3CEF, 0x3CF0, 0x3CF1, 0x3CF2, 0x3CF2, 0x3CF3, 0x3CF4, 0x3CF5,
|
||||
0x3CF6, 0x3CF7, 0x3CF7, 0x3CF8, 0x3CF9, 0x3CFA, 0x3CFB, 0x3CFB, 0x3CFC, 0x3CFD, 0x3CFE, 0x3CFF,
|
||||
0x3D00, 0x3D00, 0x3D01, 0x3D01, 0x3D01, 0x3D02, 0x3D02, 0x3D03, 0x3D03, 0x3D03, 0x3D04, 0x3D04,
|
||||
0x3D05, 0x3D05, 0x3D06, 0x3D06, 0x3D06, 0x3D07, 0x3D07, 0x3D08, 0x3D08, 0x3D08, 0x3D09, 0x3D09,
|
||||
0x3D0A, 0x3D0A, 0x3D0A, 0x3D0B, 0x3D0B, 0x3D0C, 0x3D0C, 0x3D0C, 0x3D0D, 0x3D0D, 0x3D0E, 0x3D0E,
|
||||
0x3D0F, 0x3D0F, 0x3D0F, 0x3D10, 0x3D10, 0x3D11, 0x3D11, 0x3D11, 0x3D12, 0x3D12, 0x3D13, 0x3D13,
|
||||
0x3D13, 0x3D14, 0x3D14, 0x3D15, 0x3D15, 0x3D16, 0x3D16, 0x3D16, 0x3D17, 0x3D17, 0x3D18, 0x3D18,
|
||||
0x3D18, 0x3D19, 0x3D19, 0x3D1A, 0x3D1A, 0x3D1A, 0x3D1B, 0x3D1B, 0x3D1C, 0x3D1C, 0x3D1C, 0x3D1D,
|
||||
0x3D1D, 0x3D1E, 0x3D1E, 0x3D1F, 0x3D1F, 0x3D1F, 0x3D20, 0x3D20, 0x3D21, 0x3D21, 0x3D21, 0x3D22,
|
||||
0x3D22, 0x3D23, 0x3D23, 0x3D23, 0x3D24, 0x3D24, 0x3D25, 0x3D25, 0x3D25, 0x3D26, 0x3D26, 0x3D27,
|
||||
0x3D27, 0x3D28, 0x3D28, 0x3D28, 0x3D29, 0x3D29, 0x3D2A, 0x3D2A, 0x3D2A, 0x3D2B, 0x3D2B, 0x3D2C,
|
||||
0x3D2C, 0x3D2C, 0x3D2D, 0x3D2D, 0x3D2E, 0x3D2E, 0x3D2E, 0x3D2F, 0x3D2F, 0x3D30, 0x3D30, 0x3D31,
|
||||
0x3D31, 0x3D31, 0x3D32, 0x3D32, 0x3D33, 0x3D33, 0x3D33, 0x3D34, 0x3D34, 0x3D35, 0x3D35, 0x3D35,
|
||||
0x3D36, 0x3D36, 0x3D37, 0x3D37, 0x3D38, 0x3D38, 0x3D38, 0x3D39, 0x3D39, 0x3D3A, 0x3D3A, 0x3D3A,
|
||||
0x3D3B, 0x3D3B, 0x3D3C, 0x3D3C, 0x3D3C, 0x3D3D, 0x3D3D, 0x3D3E, 0x3D3E, 0x3D3E, 0x3D3F, 0x3D3F,
|
||||
0x3D40, 0x3D40, 0x3D41, 0x3D41, 0x3D41, 0x3D42, 0x3D42, 0x3D43, 0x3D43, 0x3D43, 0x3D44, 0x3D44,
|
||||
0x3D45, 0x3D45, 0x3D45, 0x3D46, 0x3D46, 0x3D47, 0x3D47, 0x3D47, 0x3D48, 0x3D48, 0x3D49, 0x3D49,
|
||||
0x3D4A, 0x3D4A, 0x3D4A, 0x3D4B, 0x3D4B, 0x3D4C, 0x3D4C, 0x3D4C, 0x3D4D, 0x3D4D, 0x3D4E, 0x3D4E,
|
||||
0x3D4E, 0x3D4F, 0x3D4F, 0x3D50, 0x3D50, 0x3D50, 0x3D51, 0x3D51, 0x3D52, 0x3D52, 0x3D53, 0x3D53,
|
||||
0x3D53, 0x3D54, 0x3D54, 0x3D55, 0x3D55, 0x3D55, 0x3D56, 0x3D56, 0x3D57, 0x3D57, 0x3D57, 0x3D58,
|
||||
0x3D58, 0x3D59, 0x3D59, 0x3D59, 0x3D5A, 0x3D5A, 0x3D5B, 0x3D5B, 0x3D5C, 0x3D5C, 0x3D5C, 0x3D5D,
|
||||
0x3D5D, 0x3D5E, 0x3D5E, 0x3D5E, 0x3D5F, 0x3D5F, 0x3D60, 0x3D60, 0x3D60, 0x3D61, 0x3D61, 0x3D62,
|
||||
0x3D62, 0x3D63, 0x3D63, 0x3D63, 0x3D64, 0x3D64, 0x3D65, 0x3D65, 0x3D65, 0x3D66, 0x3D66, 0x3D67,
|
||||
0x3D67, 0x3D67, 0x3D68, 0x3D68, 0x3D69, 0x3D69, 0x3D69, 0x3D6A, 0x3D6A, 0x3D6B, 0x3D6B, 0x3D6C,
|
||||
0x3D6C, 0x3D6C, 0x3D6D, 0x3D6D, 0x3D6E, 0x3D6E, 0x3D6E, 0x3D6F, 0x3D6F, 0x3D70, 0x3D70, 0x3D70,
|
||||
0x3D71, 0x3D71, 0x3D72, 0x3D72, 0x3D72, 0x3D73, 0x3D73, 0x3D74, 0x3D74, 0x3D75, 0x3D75, 0x3D75,
|
||||
0x3D76, 0x3D76, 0x3D77, 0x3D77, 0x3D77, 0x3D78, 0x3D78, 0x3D79, 0x3D79, 0x3D79, 0x3D7A, 0x3D7A,
|
||||
0x3D7B, 0x3D7B, 0x3D7B, 0x3D7C, 0x3D7C, 0x3D7D, 0x3D7D, 0x3D7E, 0x3D7E, 0x3D7E, 0x3D7F, 0x3D7F,
|
||||
0x3D80, 0x3D80, 0x3D80, 0x3D80, 0x3D81, 0x3D81, 0x3D81, 0x3D81, 0x3D81, 0x3D82, 0x3D82, 0x3D82,
|
||||
0x3D82, 0x3D82, 0x3D83, 0x3D83, 0x3D83, 0x3D83, 0x3D83, 0x3D84, 0x3D84, 0x3D84, 0x3D84, 0x3D85,
|
||||
0x3D85, 0x3D85, 0x3D85, 0x3D85, 0x3D86, 0x3D86, 0x3D86, 0x3D86, 0x3D86, 0x3D87, 0x3D87, 0x3D87,
|
||||
0x3D87, 0x3D87, 0x3D88, 0x3D88, 0x3D88, 0x3D88, 0x3D88, 0x3D89, 0x3D89, 0x3D89, 0x3D89, 0x3D89,
|
||||
0x3D8A, 0x3D8A, 0x3D8A, 0x3D8A, 0x3D8A, 0x3D8B, 0x3D8B, 0x3D8B, 0x3D8B, 0x3D8B, 0x3D8C, 0x3D8C,
|
||||
0x3D8C, 0x3D8C, 0x3D8C, 0x3D8D, 0x3D8D, 0x3D8D, 0x3D8D, 0x3D8E, 0x3D8E, 0x3D8E, 0x3D8E, 0x3D8E,
|
||||
0x3D8F, 0x3D8F, 0x3D8F, 0x3D8F, 0x3D8F, 0x3D90, 0x3D90, 0x3D90, 0x3D90, 0x3D90, 0x3D91, 0x3D91,
|
||||
0x3D91, 0x3D91, 0x3D91, 0x3D92, 0x3D92, 0x3D92, 0x3D92, 0x3D92, 0x3D93, 0x3D93, 0x3D93, 0x3D93,
|
||||
0x3D93, 0x3D94, 0x3D94, 0x3D94, 0x3D94, 0x3D94, 0x3D95, 0x3D95, 0x3D95, 0x3D95, 0x3D96, 0x3D96,
|
||||
0x3D96, 0x3D96, 0x3D96, 0x3D97, 0x3D97, 0x3D97, 0x3D97, 0x3D97, 0x3D98, 0x3D98, 0x3D98, 0x3D98,
|
||||
0x3D98, 0x3D99, 0x3D99, 0x3D99, 0x3D99, 0x3D99, 0x3D9A, 0x3D9A, 0x3D9A, 0x3D9A, 0x3D9A, 0x3D9B,
|
||||
0x3D9B, 0x3D9B, 0x3D9B, 0x3D9B, 0x3D9C, 0x3D9C, 0x3D9C, 0x3D9C, 0x3D9C, 0x3D9D, 0x3D9D, 0x3D9D,
|
||||
0x3D9D, 0x3D9D, 0x3D9E, 0x3D9E, 0x3D9E, 0x3D9E, 0x3D9F, 0x3D9F, 0x3D9F, 0x3D9F, 0x3D9F, 0x3DA0,
|
||||
0x3DA0, 0x3DA0, 0x3DA0, 0x3DA0, 0x3DA1, 0x3DA1, 0x3DA1, 0x3DA1, 0x3DA1, 0x3DA2, 0x3DA2, 0x3DA2,
|
||||
0x3DA2, 0x3DA2, 0x3DA3, 0x3DA3, 0x3DA3, 0x3DA3, 0x3DA3, 0x3DA4, 0x3DA4, 0x3DA4, 0x3DA4, 0x3DA4,
|
||||
0x3DA5, 0x3DA5, 0x3DA5, 0x3DA5, 0x3DA5, 0x3DA6, 0x3DA6, 0x3DA6, 0x3DA6, 0x3DA7, 0x3DA7, 0x3DA7,
|
||||
0x3DA7, 0x3DA7, 0x3DA8, 0x3DA8, 0x3DA8, 0x3DA8, 0x3DA8, 0x3DA9, 0x3DA9, 0x3DA9, 0x3DA9, 0x3DA9,
|
||||
0x3DAA, 0x3DAA, 0x3DAA, 0x3DAA, 0x3DAA, 0x3DAB, 0x3DAB, 0x3DAB, 0x3DAB, 0x3DAB, 0x3DAC, 0x3DAC,
|
||||
0x3DAC, 0x3DAC, 0x3DAC, 0x3DAD, 0x3DAD, 0x3DAD, 0x3DAD, 0x3DAD, 0x3DAE, 0x3DAE, 0x3DAE, 0x3DAE,
|
||||
0x3DAE, 0x3DAF, 0x3DAF, 0x3DAF, 0x3DAF, 0x3DB0, 0x3DB0, 0x3DB0, 0x3DB0, 0x3DB0, 0x3DB1, 0x3DB1,
|
||||
0x3DB1, 0x3DB1, 0x3DB1, 0x3DB2, 0x3DB2, 0x3DB2, 0x3DB2, 0x3DB2, 0x3DB3, 0x3DB3, 0x3DB3, 0x3DB3,
|
||||
0x3DB3, 0x3DB4, 0x3DB4, 0x3DB4, 0x3DB4, 0x3DB4, 0x3DB5, 0x3DB5, 0x3DB5, 0x3DB5, 0x3DB5, 0x3DB6,
|
||||
0x3DB6, 0x3DB6, 0x3DB6, 0x3DB6, 0x3DB7, 0x3DB7, 0x3DB7, 0x3DB7, 0x3DB8, 0x3DB8, 0x3DB8, 0x3DB8,
|
||||
0x3DB8, 0x3DB9, 0x3DB9, 0x3DB9, 0x3DB9, 0x3DB9, 0x3DBA, 0x3DBA, 0x3DBA, 0x3DBA, 0x3DBA, 0x3DBB,
|
||||
0x3DBB, 0x3DBB, 0x3DBB, 0x3DBB, 0x3DBC, 0x3DBC, 0x3DBC, 0x3DBC, 0x3DBC, 0x3DBD, 0x3DBD, 0x3DBD,
|
||||
0x3DBD, 0x3DBD, 0x3DBE, 0x3DBE, 0x3DBE, 0x3DBE, 0x3DBE, 0x3DBF, 0x3DBF, 0x3DBF, 0x3DBF, 0x3DBF,
|
||||
0x3DC0, 0x3DC0, 0x3DC0, 0x3DC0, 0x3DC1, 0x3DC1, 0x3DC1, 0x3DC1, 0x3DC1, 0x3DC2, 0x3DC2, 0x3DC2,
|
||||
0x3DC2, 0x3DC2, 0x3DC3, 0x3DC3, 0x3DC3, 0x3DC3, 0x3DC3, 0x3DC4, 0x3DC4, 0x3DC4, 0x3DC4, 0x3DC4,
|
||||
0x3DC5, 0x3DC5, 0x3DC5, 0x3DC5, 0x3DC5, 0x3DC6, 0x3DC6, 0x3DC6, 0x3DC6, 0x3DC6, 0x3DC7, 0x3DC7,
|
||||
0x3DC7, 0x3DC7, 0x3DC7, 0x3DC8, 0x3DC8, 0x3DC8, 0x3DC8, 0x3DC8, 0x3DC9, 0x3DC9, 0x3DC9, 0x3DC9,
|
||||
0x3DCA, 0x3DCA, 0x3DCA, 0x3DCA, 0x3DCA, 0x3DCB, 0x3DCB, 0x3DCB, 0x3DCB, 0x3DCB, 0x3DCC, 0x3DCC,
|
||||
0x3DCC, 0x3DCC, 0x3DCC, 0x3DCD, 0x3DCE, 0x3DCF, 0x3DD0, 0x3DD1, 0x3DD2, 0x3DD3, 0x3DD4, 0x3DD5,
|
||||
0x3DD6, 0x3DD7, 0x3DD8, 0x3DD9, 0x3DDA, 0x3DDB, 0x3DDC, 0x3DDD, 0x3DDE, 0x3DDF, 0x3DE0, 0x3DE1,
|
||||
0x3DE2, 0x3DE3, 0x3DE4, 0x3DE5,
|
||||
};
|
||||
|
||||
static uint16_t test_pattern_ref[] = {
|
||||
0x7f7f, 0x461c, 0x459c, 0x4551, 0x451c, 0x44fa, 0x44d1, 0x44b2, 0x449c, 0x448b, 0x447a, 0x4464,
|
||||
0x4451, 0x4441, 0x4432, 0x4426, 0x441c, 0x4413, 0x440b, 0x4404, 0x43fa, 0x43ed, 0x43e4, 0x43d9,
|
||||
0x43d1, 0x43c8, 0x43c1, 0x43b9, 0x43b2, 0x43ac, 0x43a6, 0x43a1, 0x439c, 0x4398, 0x4393, 0x438f,
|
||||
0x438b, 0x4387, 0x4384, 0x4380, 0x437a, 0x4375, 0x436d, 0x4368, 0x4364, 0x435f, 0x4359, 0x4355,
|
||||
0x4351, 0x434c, 0x4348, 0x4344, 0x4341, 0x433c, 0x4339, 0x4336, 0x4332, 0x432f, 0x432c, 0x432a,
|
||||
0x4326, 0x4324, 0x4321, 0x431f, 0x431c, 0x431a, 0x4318, 0x4315, 0x4313, 0x4311, 0x430f, 0x430d,
|
||||
0x430b, 0x4309, 0x4307, 0x4305, 0x4304, 0x4302, 0x4300, 0x42fe, 0x42fa, 0x42f6, 0x42f5, 0x42f1,
|
||||
0x42ed, 0x42ec, 0x42e8, 0x42e5, 0x42e4, 0x42e0, 0x42df, 0x42dc, 0x42d9, 0x42d8, 0x42d5, 0x42d2,
|
||||
0x42d1, 0x42ce, 0x42cc, 0x42ca, 0x42c8, 0x42c7, 0x42c4, 0x42c2, 0x42c1, 0x42bf, 0x42bc, 0x42bb,
|
||||
0x42b9, 0x42b7, 0x42b6, 0x42b4, 0x42b2, 0x42b1, 0x42af, 0x42ae, 0x42ac, 0x42ab, 0x42aa, 0x42a8,
|
||||
0x42a6, 0x42a5, 0x42a4, 0x42a2, 0x42a1, 0x42a0, 0x429f, 0x429e, 0x429c, 0x429b, 0x429a, 0x4298,
|
||||
0x4298, 0x4296, 0x4295, 0x4294, 0x4293, 0x4292, 0x4291, 0x4290, 0x428f, 0x428e, 0x428d, 0x428c,
|
||||
0x428b, 0x428a, 0x4289, 0x4288, 0x4287, 0x4286, 0x4285, 0x4285, 0x4284, 0x4283, 0x4282, 0x4281,
|
||||
0x4280, 0x427e, 0x427e, 0x427c, 0x427a, 0x4278, 0x4276, 0x4275, 0x4275, 0x4273, 0x4271, 0x426f,
|
||||
0x426d, 0x426d, 0x426c, 0x426a, 0x4268, 0x4267, 0x4265, 0x4265, 0x4264, 0x4262, 0x4260, 0x425f,
|
||||
0x425f, 0x425d, 0x425c, 0x425a, 0x4259, 0x4258, 0x4258, 0x4256, 0x4255, 0x4253, 0x4252, 0x4252,
|
||||
0x4251, 0x424f, 0x424e, 0x424d, 0x424c, 0x424c, 0x424a, 0x4249, 0x4248, 0x4247, 0x4247, 0x4245,
|
||||
0x4244, 0x4243, 0x4242, 0x4241, 0x4241, 0x4240, 0x423f, 0x423d, 0x423c, 0x423c, 0x423b, 0x423a,
|
||||
0x4239, 0x4238, 0x4237, 0x4237, 0x4236, 0x4235, 0x4234, 0x4233, 0x4232, 0x4232, 0x4231, 0x4230,
|
||||
0x422f, 0x422e, 0x422e, 0x422d, 0x422c, 0x422c, 0x422b, 0x422a, 0x422a, 0x4229, 0x4228, 0x4227,
|
||||
0x4226, 0x4226, 0x4225, 0x4225, 0x4224, 0x4223, 0x4222, 0x4222, 0x4221, 0x4221, 0x4220, 0x421f,
|
||||
0x421f, 0x421e, 0x421e, 0x421d, 0x421c, 0x421b, 0x421b, 0x421b, 0x421a, 0x4219, 0x4218, 0x4218,
|
||||
0x4218, 0x4217, 0x4216, 0x4216, 0x4215, 0x4215, 0x4214, 0x4214, 0x4213, 0x4212, 0x4212, 0x4212,
|
||||
0x4211, 0x4210, 0x4210, 0x420f, 0x420f, 0x420e, 0x420e, 0x420d, 0x420d, 0x420d, 0x420c, 0x420b,
|
||||
0x420b, 0x420a, 0x420a, 0x420a, 0x4209, 0x4209, 0x4208, 0x4207, 0x4207, 0x4207, 0x4206, 0x4206,
|
||||
0x4205, 0x4205, 0x4205, 0x4204, 0x4204, 0x4203, 0x4203, 0x4203, 0x4202, 0x4202, 0x4201, 0x4201,
|
||||
0x4200, 0x4200, 0x41fe, 0x41fe, 0x41fe, 0x41fc, 0x41fc, 0x41fa, 0x41fa, 0x41fa, 0x41f8, 0x41f8,
|
||||
0x41f6, 0x41f6, 0x41f5, 0x41f5, 0x41f5, 0x41f3, 0x41f3, 0x41f1, 0x41f1, 0x41f1, 0x41ef, 0x41ef,
|
||||
0x41ed, 0x41ed, 0x41ed, 0x41ec, 0x41ec, 0x41ea, 0x41ea, 0x41ea, 0x41e8, 0x41e8, 0x41e7, 0x41e7,
|
||||
0x41e5, 0x41e5, 0x41e5, 0x41e4, 0x41e4, 0x41e2, 0x41e2, 0x41e2, 0x41e0, 0x41e0, 0x41df, 0x41df,
|
||||
0x41df, 0x41dd, 0x41dd, 0x41dc, 0x41dc, 0x41da, 0x41da, 0x41da, 0x41d9, 0x41d9, 0x41d8, 0x41d8,
|
||||
0x41d8, 0x41d6, 0x41d6, 0x41d5, 0x41d5, 0x41d5, 0x41d3, 0x41d3, 0x41d2, 0x41d2, 0x41d2, 0x41d1,
|
||||
0x41d1, 0x41cf, 0x41cf, 0x41ce, 0x41ce, 0x41ce, 0x41cd, 0x41cd, 0x41cc, 0x41cc, 0x41cc, 0x41ca,
|
||||
0x41ca, 0x41c9, 0x41c9, 0x41c9, 0x41c8, 0x41c8, 0x41c7, 0x41c7, 0x41c7, 0x41c5, 0x41c5, 0x41c4,
|
||||
0x41c4, 0x41c3, 0x41c3, 0x41c3, 0x41c2, 0x41c2, 0x41c1, 0x41c1, 0x41c1, 0x41c0, 0x41c0, 0x41bf,
|
||||
0x41bf, 0x41bf, 0x41bd, 0x41bd, 0x41bc, 0x41bc, 0x41bc, 0x41bb, 0x41bb, 0x41ba, 0x41ba, 0x41b9,
|
||||
0x41b9, 0x41b9, 0x41b8, 0x41b8, 0x41b7, 0x41b7, 0x41b7, 0x41b6, 0x41b6, 0x41b5, 0x41b5, 0x41b5,
|
||||
0x41b4, 0x41b4, 0x41b3, 0x41b3, 0x41b2, 0x41b2, 0x41b2, 0x41b1, 0x41b1, 0x41b0, 0x41b0, 0x41b0,
|
||||
0x41af, 0x41af, 0x41ae, 0x41ae, 0x41ae, 0x41ad, 0x41ad, 0x41ac, 0x41ac, 0x41ac, 0x41ac, 0x41ac,
|
||||
0x41ab, 0x41ab, 0x41aa, 0x41aa, 0x41aa, 0x41a9, 0x41a9, 0x41a8, 0x41a8, 0x41a8, 0x41a7, 0x41a7,
|
||||
0x41a6, 0x41a6, 0x41a6, 0x41a5, 0x41a5, 0x41a5, 0x41a5, 0x41a5, 0x41a4, 0x41a4, 0x41a3, 0x41a3,
|
||||
0x41a2, 0x41a2, 0x41a2, 0x41a1, 0x41a1, 0x41a1, 0x41a1, 0x41a1, 0x41a0, 0x41a0, 0x419f, 0x419f,
|
||||
0x419f, 0x419e, 0x419e, 0x419e, 0x419e, 0x419e, 0x419d, 0x419d, 0x419c, 0x419c, 0x419b, 0x419b,
|
||||
0x419b, 0x419b, 0x419b, 0x419a, 0x419a, 0x419a, 0x4199, 0x4199, 0x4198, 0x4198, 0x4198, 0x4198,
|
||||
0x4198, 0x4197, 0x4197, 0x4197, 0x4196, 0x4196, 0x4196, 0x4196, 0x4195, 0x4195, 0x4195, 0x4194,
|
||||
0x4194, 0x4194, 0x4194, 0x4194, 0x4193, 0x4193, 0x4192, 0x4192, 0x4192, 0x4192, 0x4192, 0x4191,
|
||||
0x4191, 0x4190, 0x4190, 0x4190, 0x4190, 0x4190, 0x418f, 0x418f, 0x418f, 0x418e, 0x418e, 0x418e,
|
||||
0x418e, 0x418e, 0x418d, 0x418d, 0x418d, 0x418d, 0x418d, 0x418c, 0x418c, 0x418b, 0x418b, 0x418b,
|
||||
0x418b, 0x418b, 0x418a, 0x418a, 0x418a, 0x418a, 0x418a, 0x4189, 0x4189, 0x4189, 0x4189, 0x4189,
|
||||
0x4188, 0x4188, 0x4187, 0x4187, 0x4187, 0x4187, 0x4187, 0x4186, 0x4186, 0x4186, 0x4186, 0x4186,
|
||||
0x4185, 0x4185, 0x4185, 0x4185, 0x4185, 0x4184, 0x4184, 0x4184, 0x4184, 0x4184, 0x4183, 0x4183,
|
||||
0x4183, 0x4183, 0x4183, 0x4182, 0x4182, 0x4182, 0x4182, 0x4181, 0x4181, 0x4181, 0x4181, 0x4181,
|
||||
0x4180, 0x4180, 0x4180, 0x4180, 0x417e, 0x417e, 0x417e, 0x417e, 0x417e, 0x417c, 0x417c, 0x417c,
|
||||
0x417c, 0x417c, 0x417a, 0x417a, 0x417a, 0x417a, 0x417a, 0x4178, 0x4178, 0x4178, 0x4178, 0x4176,
|
||||
0x4176, 0x4176, 0x4176, 0x4176, 0x4175, 0x4175, 0x4175, 0x4175, 0x4175, 0x4173, 0x4173, 0x4173,
|
||||
0x4173, 0x4173, 0x4171, 0x4171, 0x4171, 0x4171, 0x4171, 0x416f, 0x416f, 0x416f, 0x416f, 0x416f,
|
||||
0x416d, 0x416d, 0x416d, 0x416d, 0x416d, 0x416c, 0x416c, 0x416c, 0x416c, 0x416c, 0x416a, 0x416a,
|
||||
0x416a, 0x416a, 0x416a, 0x4168, 0x4168, 0x4168, 0x4168, 0x4167, 0x4167, 0x4167, 0x4167, 0x4167,
|
||||
0x4165, 0x4165, 0x4165, 0x4165, 0x4165, 0x4164, 0x4164, 0x4164, 0x4164, 0x4164, 0x4162, 0x4162,
|
||||
0x4162, 0x4162, 0x4162, 0x4160, 0x4160, 0x4160, 0x4160, 0x4160, 0x415f, 0x415f, 0x415f, 0x415f,
|
||||
0x415f, 0x415d, 0x415d, 0x415d, 0x415d, 0x415d, 0x415c, 0x415c, 0x415c, 0x415c, 0x415a, 0x415a,
|
||||
0x415a, 0x415a, 0x415a, 0x4159, 0x4159, 0x4159, 0x4159, 0x4159, 0x4158, 0x4158, 0x4158, 0x4158,
|
||||
0x4158, 0x4156, 0x4156, 0x4156, 0x4156, 0x4156, 0x4155, 0x4155, 0x4155, 0x4155, 0x4155, 0x4153,
|
||||
0x4153, 0x4153, 0x4153, 0x4153, 0x4152, 0x4152, 0x4152, 0x4152, 0x4152, 0x4151, 0x4151, 0x4151,
|
||||
0x4151, 0x4151, 0x414f, 0x414f, 0x414f, 0x414f, 0x414e, 0x414e, 0x414e, 0x414e, 0x414e, 0x414d,
|
||||
0x414d, 0x414d, 0x414d, 0x414d, 0x414c, 0x414c, 0x414c, 0x414c, 0x414c, 0x414a, 0x414a, 0x414a,
|
||||
0x414a, 0x414a, 0x4149, 0x4149, 0x4149, 0x4149, 0x4149, 0x4148, 0x4148, 0x4148, 0x4148, 0x4148,
|
||||
0x4147, 0x4147, 0x4147, 0x4147, 0x4147, 0x4145, 0x4145, 0x4145, 0x4145, 0x4144, 0x4144, 0x4144,
|
||||
0x4144, 0x4144, 0x4143, 0x4143, 0x4143, 0x4143, 0x4143, 0x4142, 0x4142, 0x4142, 0x4142, 0x4142,
|
||||
0x4141, 0x4141, 0x4141, 0x4141, 0x4141, 0x4140, 0x4140, 0x4140, 0x4140, 0x4140, 0x413f, 0x413f,
|
||||
0x413f, 0x413f, 0x413f, 0x413d, 0x413d, 0x413d, 0x413d, 0x413d, 0x413c, 0x413c, 0x413c, 0x413c,
|
||||
0x413c, 0x413b, 0x413b, 0x413b, 0x413b, 0x413a, 0x413a, 0x413a, 0x413a, 0x413a, 0x4139, 0x4139,
|
||||
0x4139, 0x4139, 0x4139, 0x4138, 0x4138, 0x4138, 0x4138, 0x4138, 0x4137, 0x4137, 0x4137, 0x4137,
|
||||
0x4137, 0x4136, 0x4136, 0x4136, 0x4136, 0x4136, 0x4135, 0x4135, 0x4135, 0x4135, 0x4135, 0x4134,
|
||||
0x4134, 0x4134, 0x4134, 0x4134, 0x4133, 0x4133, 0x4133, 0x4133, 0x4132, 0x4132, 0x4132, 0x4132,
|
||||
0x4132, 0x4131, 0x4131, 0x4131, 0x4131, 0x4131, 0x4130, 0x4130, 0x4130, 0x4130, 0x4130, 0x412f,
|
||||
0x412f, 0x412f, 0x412f, 0x412f, 0x412e, 0x412e, 0x412e, 0x412e, 0x412e, 0x412d, 0x412d, 0x412d,
|
||||
0x412d, 0x412d, 0x412c, 0x412c, 0x412c, 0x412c, 0x412c, 0x412c, 0x412c, 0x412c, 0x412c, 0x412c,
|
||||
0x412b, 0x412b, 0x412b, 0x412b, 0x412a, 0x412a, 0x412a, 0x412a, 0x412a, 0x4129, 0x4129, 0x4129,
|
||||
0x4129, 0x4129, 0x4128, 0x4128, 0x4128, 0x4128, 0x4128, 0x4127, 0x4127, 0x4127, 0x4127, 0x4127,
|
||||
0x4126, 0x4126, 0x4126, 0x4126, 0x4126, 0x4125, 0x4125, 0x4125, 0x4125, 0x4125, 0x4125, 0x4125,
|
||||
0x4125, 0x4125, 0x4125, 0x4124, 0x4124, 0x4124, 0x4124, 0x4124, 0x4123, 0x4123, 0x4123, 0x4123,
|
||||
0x4122, 0x4122, 0x4122, 0x4122, 0x4122, 0x4121, 0x4121, 0x4121, 0x4121, 0x4121, 0x4121, 0x4121,
|
||||
0x4121, 0x4121, 0x4121, 0x4120, 0x411f, 0x411e, 0x411e, 0x411d, 0x411c, 0x411b, 0x411b, 0x411a,
|
||||
0x4119, 0x4118, 0x4118, 0x4117, 0x4116, 0x4116, 0x4115, 0x4114, 0x4114, 0x4113, 0x4112, 0x4112,
|
||||
0x4111, 0x4110, 0x4110, 0x410f,
|
||||
};
|
||||
|
||||
static void tl_lut_ref(uint16_t *ofmap, uint16_t *ifmap, cvk_tl_shape_t ifmap_shape) {
|
||||
for (uint32_t i = 0; i < tl_shape_size(&ifmap_shape); i++) {
|
||||
if (mode == PRE_DATA_COMPARE_FIX) {
|
||||
ofmap[i] = test_pattern_ref[i];
|
||||
} else {
|
||||
uint16_t v = convert_fp32_bf16(1 / (1.0 * (convert_bf16_fp32(ifmap[i]))));
|
||||
ofmap[i] = v;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static bool verify(uint16_t *ofmap_data, uint16_t *ref_data, uint16_t *ifmap,
|
||||
uint64_t ifmap_shape_size, TEST_MODE mode) {
|
||||
uint64_t size = ifmap_shape_size;
|
||||
|
||||
for (uint64_t i = 0; i < size; i++) {
|
||||
bool is_close;
|
||||
uint16_t ref;
|
||||
uint16_t ofmap_data_bf16;
|
||||
float ref_f;
|
||||
float ofmap_data_f;
|
||||
|
||||
ref = ref_data[i];
|
||||
ref_f = convert_bf16_fp32(ref);
|
||||
ofmap_data_f = convert_bf16_fp32(ofmap_data[i]);
|
||||
ofmap_data_bf16 = ofmap_data[i];
|
||||
|
||||
if (mode == PRE_DATA_COMPARE_FIX) {
|
||||
is_close = ofmap_data[i] == ref;
|
||||
} else {
|
||||
is_close = fabs(ref_f - ofmap_data_f) < 0.001;
|
||||
}
|
||||
|
||||
if (!is_close) {
|
||||
fprintf(stderr,
|
||||
"comparing failed at ofmap_data[%lu](input:%e), got %x, exp %x, "
|
||||
"fp32: got %e exp %e\n",
|
||||
i, convert_bf16_fp32(ifmap[i]), ofmap_data_bf16, ref, ofmap_data_f, ref_f);
|
||||
exit(-1);
|
||||
}
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
static void gen_input(uint16_t *ifmap, uint64_t ifmap_shape_size) {
|
||||
if (mode == PRE_DATA_COMPARE_FIX) {
|
||||
memcpy(ifmap, &test_pattern, sizeof(test_pattern));
|
||||
} else {
|
||||
for (uint64_t i = 0; i < ifmap_shape_size; i++) {
|
||||
srand(static_cast<unsigned>(time(0)));
|
||||
std::random_device rd;
|
||||
std::mt19937 e2(rd());
|
||||
float LO = pow(2, -10);
|
||||
float HI = pow(2, 10);
|
||||
// std::uniform_real_distribution<> dist(pow(2,-62), pow(2,63));
|
||||
for (uint64_t i = 0; i < ifmap_shape_size; i++) {
|
||||
// float r3 = dist(e2);
|
||||
float r3 = LO + static_cast<float>(rand()) / (static_cast<float>(RAND_MAX / (HI - LO)));
|
||||
ifmap[i] = convert_fp32_bf16(r3);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#ifdef DBG
|
||||
for (uint64_t i = 0; i < ifmap_shape_size; i++) {
|
||||
printf("source if[%lu] bf16 %f 0x%x, log2f is %f\n", i, convert_bf16_fp32(ifmap[i]), ifmap[i],
|
||||
floor(log2((convert_bf16_fp32(ifmap[i])))));
|
||||
}
|
||||
#endif /* ifdef DBG */
|
||||
}
|
||||
|
||||
static void testbench(CVI_RT_HANDLE *ctx, cvk_context_t *bmk, uint32_t input_n, uint32_t input_c,
|
||||
uint32_t input_h, uint32_t input_w) {
|
||||
cvk_fmt_t fmt = CVK_FMT_BF16;
|
||||
|
||||
// TODO: check more shape / align
|
||||
cvk_tl_shape_t ifmap_shape = {input_n, input_c, input_h, input_w};
|
||||
cvk_tl_shape_t ofmap_shape = ifmap_shape;
|
||||
cvk_tl_shape_t table_shape;
|
||||
cvm_table_shape(bmk, &table_shape);
|
||||
|
||||
uint64_t ifmap_shape_size = tl_shape_size(&ifmap_shape);
|
||||
uint64_t ofmap_size = tl_shape_size(&ofmap_shape);
|
||||
uint64_t table_size = tl_shape_size(&table_shape);
|
||||
|
||||
// prepare input data with size
|
||||
int data_type_size = bytesize_of_fmt(fmt);
|
||||
uint64_t ifmap_bytesize = ifmap_shape_size * data_type_size;
|
||||
uint64_t ofmap_bytesize = ofmap_size * data_type_size;
|
||||
uint64_t table_bytesize = table_size * data_type_size;
|
||||
|
||||
uint16_t *ifmap = (uint16_t *)xmalloc(ifmap_bytesize);
|
||||
uint16_t *ref_data = (uint16_t *)xmalloc(ofmap_bytesize);
|
||||
uint16_t *table_data = (uint16_t *)xmalloc(table_bytesize);
|
||||
uint16_t *table_data_mantissa = (uint16_t *)xmalloc(table_bytesize);
|
||||
|
||||
// alloc lmem
|
||||
cvk_tl_t *tl_ifmap = test_alloc_tl(bmk, ifmap_shape, fmt, /*align*/ 1);
|
||||
cvk_tl_t *tl_ofmap_bf16 = test_alloc_tl(bmk, ofmap_shape, fmt, /*align*/ 1);
|
||||
cvk_tl_t *tl_buf = test_alloc_tl(bmk, tl_ifmap->shape, fmt, /*align*/ 1);
|
||||
cvk_tl_t *cvk_tl_table_answer = test_alloc_tl(bmk, table_shape, fmt, /*align*/ 1);
|
||||
cvk_tl_t *cvk_tl_table_answer_mantissa = test_alloc_tl(bmk, table_shape, fmt, /*align*/ 1);
|
||||
|
||||
// generate testbench
|
||||
gen_input(ifmap, ifmap_shape_size);
|
||||
tl_lut_ref(ref_data, ifmap, ifmap_shape);
|
||||
|
||||
// prepare table
|
||||
cvm_reciprocal_tbl(table_data, table_data_mantissa, &table_shape);
|
||||
|
||||
// sys->lmem
|
||||
test_put_tensor_g2l_comp(ctx, bmk, tl_ifmap, (uint8_t *)ifmap);
|
||||
test_put_tensor_g2l_comp(ctx, bmk, cvk_tl_table_answer, (uint8_t *)table_data);
|
||||
test_put_tensor_g2l_comp(ctx, bmk, cvk_tl_table_answer_mantissa, (uint8_t *)table_data_mantissa);
|
||||
|
||||
cvm_emit_reciprocal(bmk, tl_ifmap, tl_buf, cvk_tl_table_answer, cvk_tl_table_answer_mantissa,
|
||||
tl_ofmap_bf16);
|
||||
|
||||
// issue cmd
|
||||
test_submit_comp(ctx, bmk);
|
||||
|
||||
// get output from lmem->sys
|
||||
uint16_t *ofmap_data = (uint16_t *)test_get_tensor_l2g_comp(ctx, bmk, tl_ofmap_bf16);
|
||||
|
||||
verify(ofmap_data, ref_data, ifmap, ifmap_shape_size, mode);
|
||||
|
||||
free_tl(bmk, cvk_tl_table_answer_mantissa);
|
||||
free_tl(bmk, cvk_tl_table_answer);
|
||||
free_tl(bmk, tl_buf);
|
||||
free_tl(bmk, tl_ofmap_bf16);
|
||||
free_tl(bmk, tl_ifmap);
|
||||
|
||||
free(ifmap);
|
||||
free(ref_data);
|
||||
free(ofmap_data);
|
||||
free(table_data);
|
||||
free(table_data_mantissa);
|
||||
}
|
||||
|
||||
int main() {
|
||||
CVI_RT_HANDLE ctx;
|
||||
cvk_context_t *bmk;
|
||||
int round_mode;
|
||||
|
||||
round_mode = set_store_feround();
|
||||
|
||||
test_init(&ctx, &bmk);
|
||||
|
||||
for (int i = GEN_POW_20_DATA_MAX_ERROR; i < TEST_MODE_MAX; i++) {
|
||||
mode = static_cast<TEST_MODE>(i);
|
||||
printf("test mode %d...\n", mode);
|
||||
|
||||
int input_n = 1;
|
||||
int input_c = 32;
|
||||
int input_h = 1;
|
||||
int input_w = 1;
|
||||
|
||||
if (mode == PRE_DATA_COMPARE_FIX) {
|
||||
input_h = 4;
|
||||
input_w = 8;
|
||||
} else {
|
||||
input_h = input_w = 16;
|
||||
}
|
||||
|
||||
testbench(&ctx, bmk, input_n, input_c, input_h, input_w);
|
||||
}
|
||||
|
||||
test_exit(&ctx, bmk);
|
||||
restore_feround(round_mode);
|
||||
return 0;
|
||||
}
|
||||
907
cvimath/tests/cvi1835/sigmoid_linear_interp.cpp
Normal file
907
cvimath/tests/cvi1835/sigmoid_linear_interp.cpp
Normal file
@ -0,0 +1,907 @@
|
||||
//* TODO: you could rerange any value to -127~127
|
||||
#include <cvimath_internal.h>
|
||||
#include <test_cvikernel_util.h>
|
||||
|
||||
#define OUT
|
||||
#define IN
|
||||
//#define DBG
|
||||
|
||||
/**
|
||||
* pre_data means we test fixed pattern, it should be same sa lut
|
||||
* compare fix means we MAKE SURE output values equal with golden,
|
||||
* comment it for check with error using `MAX_ERROR`
|
||||
*/
|
||||
enum TEST_MODE {
|
||||
PRE_DATA_COMPARE_FIX = 0, // pre-data + fix compare
|
||||
PRE_DATA_MAX_ERROR, // pre-data + compare only diff < MAX_ERROR
|
||||
GEN_DATA_MAX_ERROR, // gen data + compare only diff < MAX_ERROR
|
||||
TEST_MODE_MAX,
|
||||
};
|
||||
|
||||
static TEST_MODE mode;
|
||||
#define MAX_ERROR (0.004)
|
||||
|
||||
using namespace std;
|
||||
static uint16_t test_pattern[] = {
|
||||
0x0000, 0x3C03, 0x3C83, 0x3CC5, 0x3D03, 0x3D24, 0x3D45, 0x3D65, 0x3D83, 0x3D93, 0x3DA4, 0x3DB4,
|
||||
0x3DC5, 0x3DD5, 0x3DE5, 0x3DF6, 0x3E03, 0x3E0B, 0x3E13, 0x3E1C, 0x3E24, 0x3E2C, 0x3E34, 0x3E3C,
|
||||
0x3E45, 0x3E4D, 0x3E55, 0x3E5D, 0x3E65, 0x3E6E, 0x3E76, 0x3E7E, 0x3E83, 0x3E87, 0x3E8B, 0x3E8F,
|
||||
0x3E93, 0x3E98, 0x3E9C, 0x3EA0, 0x3EA4, 0x3EA8, 0x3EAC, 0x3EB0, 0x3EB4, 0x3EB8, 0x3EBC, 0x3EC1,
|
||||
0x3EC5, 0x3EC9, 0x3ECD, 0x3ED1, 0x3ED5, 0x3ED9, 0x3EDD, 0x3EE1, 0x3EE5, 0x3EE9, 0x3EEE, 0x3EF2,
|
||||
0x3EF6, 0x3EFA, 0x3EFE, 0x3F01, 0x3F03, 0x3F05, 0x3F07, 0x3F09, 0x3F0B, 0x3F0D, 0x3F0F, 0x3F11,
|
||||
0x3F13, 0x3F16, 0x3F18, 0x3F1A, 0x3F1C, 0x3F1E, 0x3F20, 0x3F22, 0x3F24, 0x3F26, 0x3F28, 0x3F2A,
|
||||
0x3F2C, 0x3F2E, 0x3F30, 0x3F32, 0x3F34, 0x3F36, 0x3F38, 0x3F3A, 0x3F3C, 0x3F3E, 0x3F41, 0x3F43,
|
||||
0x3F45, 0x3F47, 0x3F49, 0x3F4B, 0x3F4D, 0x3F4F, 0x3F51, 0x3F53, 0x3F55, 0x3F57, 0x3F59, 0x3F5B,
|
||||
0x3F5D, 0x3F5F, 0x3F61, 0x3F63, 0x3F65, 0x3F67, 0x3F69, 0x3F6C, 0x3F6E, 0x3F70, 0x3F72, 0x3F74,
|
||||
0x3F76, 0x3F78, 0x3F7A, 0x3F7C, 0x3F7E, 0x3F80, 0x3F81, 0x3F82, 0x3F83, 0x3F84, 0x3F85, 0x3F86,
|
||||
0x3F87, 0x3F88, 0x3F89, 0x3F8A, 0x3F8B, 0x3F8C, 0x3F8D, 0x3F8E, 0x3F8F, 0x3F90, 0x3F91, 0x3F92,
|
||||
0x3F93, 0x3F94, 0x3F96, 0x3F97, 0x3F98, 0x3F99, 0x3F9A, 0x3F9B, 0x3F9C, 0x3F9D, 0x3F9E, 0x3F9F,
|
||||
0x3FA0, 0x3FA1, 0x3FA2, 0x3FA3, 0x3FA4, 0x3FA5, 0x3FA6, 0x3FA7, 0x3FA8, 0x3FA9, 0x3FAA, 0x3FAB,
|
||||
0x3FAC, 0x3FAD, 0x3FAE, 0x3FAF, 0x3FB0, 0x3FB1, 0x3FB2, 0x3FB3, 0x3FB4, 0x3FB5, 0x3FB6, 0x3FB7,
|
||||
0x3FB8, 0x3FB9, 0x3FBA, 0x3FBB, 0x3FBC, 0x3FBD, 0x3FBE, 0x3FBF, 0x3FC1, 0x3FC2, 0x3FC3, 0x3FC4,
|
||||
0x3FC5, 0x3FC6, 0x3FC7, 0x3FC8, 0x3FC9, 0x3FCA, 0x3FCB, 0x3FCC, 0x3FCD, 0x3FCE, 0x3FCF, 0x3FD0,
|
||||
0x3FD1, 0x3FD2, 0x3FD3, 0x3FD4, 0x3FD5, 0x3FD6, 0x3FD7, 0x3FD8, 0x3FD9, 0x3FDA, 0x3FDB, 0x3FDC,
|
||||
0x3FDD, 0x3FDE, 0x3FDF, 0x3FE0, 0x3FE1, 0x3FE2, 0x3FE3, 0x3FE4, 0x3FE5, 0x3FE6, 0x3FE7, 0x3FE8,
|
||||
0x3FE9, 0x3FEA, 0x3FEC, 0x3FED, 0x3FEE, 0x3FEF, 0x3FF0, 0x3FF1, 0x3FF2, 0x3FF3, 0x3FF4, 0x3FF5,
|
||||
0x3FF6, 0x3FF7, 0x3FF8, 0x3FF9, 0x3FFA, 0x3FFB, 0x3FFC, 0x3FFD, 0x3FFE, 0x3FFF, 0x4000, 0x4001,
|
||||
0x4001, 0x4002, 0x4002, 0x4003, 0x4003, 0x4004, 0x4004, 0x4005, 0x4005, 0x4006, 0x4006, 0x4007,
|
||||
0x4007, 0x4008, 0x4008, 0x4009, 0x4009, 0x400A, 0x400A, 0x400B, 0x400B, 0x400C, 0x400C, 0x400D,
|
||||
0x400D, 0x400E, 0x400E, 0x400F, 0x400F, 0x4010, 0x4010, 0x4011, 0x4011, 0x4012, 0x4012, 0x4013,
|
||||
0x4013, 0x4014, 0x4014, 0x4015, 0x4016, 0x4016, 0x4017, 0x4017, 0x4018, 0x4018, 0x4019, 0x4019,
|
||||
0x401A, 0x401A, 0x401B, 0x401B, 0x401C, 0x401C, 0x401D, 0x401D, 0x401E, 0x401E, 0x401F, 0x401F,
|
||||
0x4020, 0x4020, 0x4021, 0x4021, 0x4022, 0x4022, 0x4023, 0x4023, 0x4024, 0x4024, 0x4025, 0x4025,
|
||||
0x4026, 0x4026, 0x4027, 0x4027, 0x4028, 0x4028, 0x4029, 0x4029, 0x402A, 0x402A, 0x402B, 0x402C,
|
||||
0x402C, 0x402D, 0x402D, 0x402E, 0x402E, 0x402F, 0x402F, 0x4030, 0x4030, 0x4031, 0x4031, 0x4032,
|
||||
0x4032, 0x4033, 0x4033, 0x4034, 0x4034, 0x4035, 0x4035, 0x4036, 0x4036, 0x4037, 0x4037, 0x4038,
|
||||
0x4038, 0x4039, 0x4039, 0x403A, 0x403A, 0x403B, 0x403B, 0x403C, 0x403C, 0x403D, 0x403D, 0x403E,
|
||||
0x403E, 0x403F, 0x403F, 0x4040, 0x4041, 0x4041, 0x4042, 0x4042, 0x4043, 0x4043, 0x4044, 0x4044,
|
||||
0x4045, 0x4045, 0x4046, 0x4046, 0x4047, 0x4047, 0x4048, 0x4048, 0x4049, 0x4049, 0x404A, 0x404A,
|
||||
0x404B, 0x404B, 0x404C, 0x404C, 0x404D, 0x404D, 0x404E, 0x404E, 0x404F, 0x404F, 0x4050, 0x4050,
|
||||
0x4051, 0x4051, 0x4052, 0x4052, 0x4053, 0x4053, 0x4054, 0x4054, 0x4055, 0x4056, 0x4056, 0x4057,
|
||||
0x4057, 0x4058, 0x4058, 0x4059, 0x4059, 0x405A, 0x405A, 0x405B, 0x405B, 0x405C, 0x405C, 0x405D,
|
||||
0x405D, 0x405E, 0x405E, 0x405F, 0x405F, 0x4060, 0x4060, 0x4061, 0x4061, 0x4062, 0x4062, 0x4063,
|
||||
0x4063, 0x4064, 0x4064, 0x4065, 0x4065, 0x4066, 0x4066, 0x4067, 0x4067, 0x4068, 0x4068, 0x4069,
|
||||
0x4069, 0x406A, 0x406A, 0x406B, 0x406C, 0x406C, 0x406D, 0x406D, 0x406E, 0x406E, 0x406F, 0x406F,
|
||||
0x4070, 0x4070, 0x4071, 0x4071, 0x4072, 0x4072, 0x4073, 0x4073, 0x4074, 0x4074, 0x4075, 0x4075,
|
||||
0x4076, 0x4076, 0x4077, 0x4077, 0x4078, 0x4078, 0x4079, 0x4079, 0x407A, 0x407A, 0x407B, 0x407B,
|
||||
0x407C, 0x407C, 0x407D, 0x407D, 0x407E, 0x407E, 0x407F, 0x407F, 0x4080, 0x4080, 0x4081, 0x4081,
|
||||
0x4081, 0x4081, 0x4082, 0x4082, 0x4082, 0x4082, 0x4083, 0x4083, 0x4083, 0x4083, 0x4084, 0x4084,
|
||||
0x4084, 0x4084, 0x4085, 0x4085, 0x4085, 0x4085, 0x4086, 0x4086, 0x4086, 0x4086, 0x4087, 0x4087,
|
||||
0x4087, 0x4087, 0x4088, 0x4088, 0x4088, 0x4088, 0x4089, 0x4089, 0x4089, 0x4089, 0x408A, 0x408A,
|
||||
0x408A, 0x408A, 0x408B, 0x408B, 0x408B, 0x408C, 0x408C, 0x408C, 0x408C, 0x408D, 0x408D, 0x408D,
|
||||
0x408D, 0x408E, 0x408E, 0x408E, 0x408E, 0x408F, 0x408F, 0x408F, 0x408F, 0x4090, 0x4090, 0x4090,
|
||||
0x4090, 0x4091, 0x4091, 0x4091, 0x4091, 0x4092, 0x4092, 0x4092, 0x4092, 0x4093, 0x4093, 0x4093,
|
||||
0x4093, 0x4094, 0x4094, 0x4094, 0x4094, 0x4095, 0x4095, 0x4095, 0x4096, 0x4096, 0x4096, 0x4096,
|
||||
0x4097, 0x4097, 0x4097, 0x4097, 0x4098, 0x4098, 0x4098, 0x4098, 0x4099, 0x4099, 0x4099, 0x4099,
|
||||
0x409A, 0x409A, 0x409A, 0x409A, 0x409B, 0x409B, 0x409B, 0x409B, 0x409C, 0x409C, 0x409C, 0x409C,
|
||||
0x409D, 0x409D, 0x409D, 0x409D, 0x409E, 0x409E, 0x409E, 0x409E, 0x409F, 0x409F, 0x409F, 0x409F,
|
||||
0x40A0, 0x40A0, 0x40A0, 0x40A1, 0x40A1, 0x40A1, 0x40A1, 0x40A2, 0x40A2, 0x40A2, 0x40A2, 0x40A3,
|
||||
0x40A3, 0x40A3, 0x40A3, 0x40A4, 0x40A4, 0x40A4, 0x40A4, 0x40A5, 0x40A5, 0x40A5, 0x40A5, 0x40A6,
|
||||
0x40A6, 0x40A6, 0x40A6, 0x40A7, 0x40A7, 0x40A7, 0x40A7, 0x40A8, 0x40A8, 0x40A8, 0x40A8, 0x40A9,
|
||||
0x40A9, 0x40A9, 0x40A9, 0x40AA, 0x40AA, 0x40AA, 0x40AA, 0x40AB, 0x40AB, 0x40AB, 0x40AC, 0x40AC,
|
||||
0x40AC, 0x40AC, 0x40AD, 0x40AD, 0x40AD, 0x40AD, 0x40AE, 0x40AE, 0x40AE, 0x40AE, 0x40AF, 0x40AF,
|
||||
0x40AF, 0x40AF, 0x40B0, 0x40B0, 0x40B0, 0x40B0, 0x40B1, 0x40B1, 0x40B1, 0x40B1, 0x40B2, 0x40B2,
|
||||
0x40B2, 0x40B2, 0x40B3, 0x40B3, 0x40B3, 0x40B3, 0x40B4, 0x40B4, 0x40B4, 0x40B4, 0x40B5, 0x40B5,
|
||||
0x40B5, 0x40B6, 0x40B6, 0x40B6, 0x40B6, 0x40B7, 0x40B7, 0x40B7, 0x40B7, 0x40B8, 0x40B8, 0x40B8,
|
||||
0x40B8, 0x40B9, 0x40B9, 0x40B9, 0x40B9, 0x40BA, 0x40BA, 0x40BA, 0x40BA, 0x40BB, 0x40BB, 0x40BB,
|
||||
0x40BB, 0x40BC, 0x40BC, 0x40BC, 0x40BC, 0x40BD, 0x40BD, 0x40BD, 0x40BD, 0x40BE, 0x40BE, 0x40BE,
|
||||
0x40BE, 0x40BF, 0x40BF, 0x40BF, 0x40BF, 0x40C0, 0x40C0, 0x40C0, 0x40C1, 0x40C1, 0x40C1, 0x40C1,
|
||||
0x40C2, 0x40C2, 0x40C2, 0x40C2, 0x40C3, 0x40C3, 0x40C3, 0x40C3, 0x40C4, 0x40C4, 0x40C4, 0x40C4,
|
||||
0x40C5, 0x40C5, 0x40C5, 0x40C5, 0x40C6, 0x40C6, 0x40C6, 0x40C6, 0x40C7, 0x40C7, 0x40C7, 0x40C7,
|
||||
0x40C8, 0x40C8, 0x40C8, 0x40C8, 0x40C9, 0x40C9, 0x40C9, 0x40C9, 0x40CA, 0x40CA, 0x40CA, 0x40CA,
|
||||
0x40CB, 0x40CB, 0x40CB, 0x40CC, 0x40CC, 0x40CC, 0x40CC, 0x40CD, 0x40CD, 0x40CD, 0x40CD, 0x40CE,
|
||||
0x40CE, 0x40CE, 0x40CE, 0x40CF, 0x40CF, 0x40CF, 0x40CF, 0x40D0, 0x40D0, 0x40D0, 0x40D0, 0x40D1,
|
||||
0x40D1, 0x40D1, 0x40D1, 0x40D2, 0x40D2, 0x40D2, 0x40D2, 0x40D3, 0x40D3, 0x40D3, 0x40D3, 0x40D4,
|
||||
0x40D4, 0x40D4, 0x40D4, 0x40D5, 0x40D5, 0x40D5, 0x40D6, 0x40D6, 0x40D6, 0x40D6, 0x40D7, 0x40D7,
|
||||
0x40D7, 0x40D7, 0x40D8, 0x40D8, 0x40D8, 0x40D8, 0x40D9, 0x40D9, 0x40D9, 0x40D9, 0x40DA, 0x40DA,
|
||||
0x40DA, 0x40DA, 0x40DB, 0x40DB, 0x40DB, 0x40DB, 0x40DC, 0x40DC, 0x40DC, 0x40DC, 0x40DD, 0x40DD,
|
||||
0x40DD, 0x40DD, 0x40DE, 0x40DE, 0x40DE, 0x40DE, 0x40DF, 0x40DF, 0x40DF, 0x40DF, 0x40E0, 0x40E0,
|
||||
0x40E0, 0x40E1, 0x40E1, 0x40E1, 0x40E1, 0x40E2, 0x40E2, 0x40E2, 0x40E2, 0x40E3, 0x40E3, 0x40E3,
|
||||
0x40E3, 0x40E4, 0x40E4, 0x40E4, 0x40E4, 0x40E5, 0x40E5, 0x40E5, 0x40E5, 0x40E6, 0x40E6, 0x40E6,
|
||||
0x40E6, 0x40E7, 0x40E7, 0x40E7, 0x40E7, 0x40E8, 0x40E8, 0x40E8, 0x40E8, 0x40E9, 0x40E9, 0x40E9,
|
||||
0x40E9, 0x40EA, 0x40EA, 0x40EA, 0x40EA, 0x40EB, 0x40EB, 0x40EB, 0x40EC, 0x40EC, 0x40EC, 0x40EC,
|
||||
0x40ED, 0x40ED, 0x40ED, 0x40ED, 0x40EE, 0x40EE, 0x40EE, 0x40EE, 0x40EF, 0x40EF, 0x40EF, 0x40EF,
|
||||
0x40F0, 0x40F0, 0x40F0, 0x40F0, 0x40F1, 0x40F1, 0x40F1, 0x40F1, 0x40F2, 0x40F2, 0x40F2, 0x40F2,
|
||||
0x40F3, 0x40F3, 0x40F3, 0x40F3, 0x40F4, 0x40F4, 0x40F4, 0x40F4, 0x40F5, 0x40F5, 0x40F5, 0x40F6,
|
||||
0x40F6, 0x40F6, 0x40F6, 0x40F7, 0x40F7, 0x40F7, 0x40F7, 0x40F8, 0x40F8, 0x40F8, 0x40F8, 0x40F9,
|
||||
0x40F9, 0x40F9, 0x40F9, 0x40FA, 0x40FA, 0x40FA, 0x40FA, 0x40FB, 0x40FB, 0x40FB, 0x40FB, 0x40FC,
|
||||
0x40FC, 0x40FC, 0x40FC, 0x40FD, 0x40FD, 0x40FD, 0x40FD, 0x40FE, 0x40FE, 0x40FE, 0x40FE, 0x40FF,
|
||||
0x40FF, 0x40FF, 0x40FF, 0x4100, 0xBC03, 0xBC83, 0xBCC5, 0xBD03, 0xBD24, 0xBD45, 0xBD65, 0xBD83,
|
||||
0xBD93, 0xBDA4, 0xBDB4, 0xBDC5, 0xBDD5, 0xBDE5, 0xBDF6, 0xBE03, 0xBE0B, 0xBE13, 0xBE1C, 0xBE24,
|
||||
0xBE2C, 0xBE34, 0xBE3C, 0xBE45, 0xBE4D, 0xBE55, 0xBE5D, 0xBE65, 0xBE6E, 0xBE76, 0xBE7E, 0xBE83,
|
||||
0xBE87, 0xBE8B, 0xBE8F, 0xBE93, 0xBE98, 0xBE9C, 0xBEA0, 0xBEA4, 0xBEA8, 0xBEAC, 0xBEB0, 0xBEB4,
|
||||
0xBEB8, 0xBEBC, 0xBEC1, 0xBEC5, 0xBEC9, 0xBECD, 0xBED1, 0xBED5, 0xBED9, 0xBEDD, 0xBEE1, 0xBEE5,
|
||||
0xBEE9, 0xBEEE, 0xBEF2, 0xBEF6, 0xBEFA, 0xBEFE, 0xBF01, 0xBF03, 0xBF05, 0xBF07, 0xBF09, 0xBF0B,
|
||||
0xBF0D, 0xBF0F, 0xBF11, 0xBF13, 0xBF16, 0xBF18, 0xBF1A, 0xBF1C, 0xBF1E, 0xBF20, 0xBF22, 0xBF24,
|
||||
0xBF26, 0xBF28, 0xBF2A, 0xBF2C, 0xBF2E, 0xBF30, 0xBF32, 0xBF34, 0xBF36, 0xBF38, 0xBF3A, 0xBF3C,
|
||||
0xBF3E, 0xBF41, 0xBF43, 0xBF45, 0xBF47, 0xBF49, 0xBF4B, 0xBF4D, 0xBF4F, 0xBF51, 0xBF53, 0xBF55,
|
||||
0xBF57, 0xBF59, 0xBF5B, 0xBF5D, 0xBF5F, 0xBF61, 0xBF63, 0xBF65, 0xBF67, 0xBF69, 0xBF6C, 0xBF6E,
|
||||
0xBF70, 0xBF72, 0xBF74, 0xBF76, 0xBF78, 0xBF7A, 0xBF7C, 0xBF7E, 0xBF80, 0xBF81, 0xBF82, 0xBF83,
|
||||
0xBF84, 0xBF85, 0xBF86, 0xBF87, 0xBF88, 0xBF89, 0xBF8A, 0xBF8B, 0xBF8C, 0xBF8D, 0xBF8E, 0xBF8F,
|
||||
0xBF90, 0xBF91, 0xBF92, 0xBF93, 0xBF94, 0xBF96, 0xBF97, 0xBF98, 0xBF99, 0xBF9A, 0xBF9B, 0xBF9C,
|
||||
0xBF9D, 0xBF9E, 0xBF9F, 0xBFA0, 0xBFA1, 0xBFA2, 0xBFA3, 0xBFA4, 0xBFA5, 0xBFA6, 0xBFA7, 0xBFA8,
|
||||
0xBFA9, 0xBFAA, 0xBFAB, 0xBFAC, 0xBFAD, 0xBFAE, 0xBFAF, 0xBFB0, 0xBFB1, 0xBFB2, 0xBFB3, 0xBFB4,
|
||||
0xBFB5, 0xBFB6, 0xBFB7, 0xBFB8, 0xBFB9, 0xBFBA, 0xBFBB, 0xBFBC, 0xBFBD, 0xBFBE, 0xBFBF, 0xBFC1,
|
||||
0xBFC2, 0xBFC3, 0xBFC4, 0xBFC5, 0xBFC6, 0xBFC7, 0xBFC8, 0xBFC9, 0xBFCA, 0xBFCB, 0xBFCC, 0xBFCD,
|
||||
0xBFCE, 0xBFCF, 0xBFD0, 0xBFD1, 0xBFD2, 0xBFD3, 0xBFD4, 0xBFD5, 0xBFD6, 0xBFD7, 0xBFD8, 0xBFD9,
|
||||
0xBFDA, 0xBFDB, 0xBFDC, 0xBFDD, 0xBFDE, 0xBFDF, 0xBFE0, 0xBFE1, 0xBFE2, 0xBFE3, 0xBFE4, 0xBFE5,
|
||||
0xBFE6, 0xBFE7, 0xBFE8, 0xBFE9, 0xBFEA, 0xBFEC, 0xBFED, 0xBFEE, 0xBFEF, 0xBFF0, 0xBFF1, 0xBFF2,
|
||||
0xBFF3, 0xBFF4, 0xBFF5, 0xBFF6, 0xBFF7, 0xBFF8, 0xBFF9, 0xBFFA, 0xBFFB, 0xBFFC, 0xBFFD, 0xBFFE,
|
||||
0xBFFF, 0xC000, 0xC001, 0xC001, 0xC002, 0xC002, 0xC003, 0xC003, 0xC004, 0xC004, 0xC005, 0xC005,
|
||||
0xC006, 0xC006, 0xC007, 0xC007, 0xC008, 0xC008, 0xC009, 0xC009, 0xC00A, 0xC00A, 0xC00B, 0xC00B,
|
||||
0xC00C, 0xC00C, 0xC00D, 0xC00D, 0xC00E, 0xC00E, 0xC00F, 0xC00F, 0xC010, 0xC010, 0xC011, 0xC011,
|
||||
0xC012, 0xC012, 0xC013, 0xC013, 0xC014, 0xC014, 0xC015, 0xC016, 0xC016, 0xC017, 0xC017, 0xC018,
|
||||
0xC018, 0xC019, 0xC019, 0xC01A, 0xC01A, 0xC01B, 0xC01B, 0xC01C, 0xC01C, 0xC01D, 0xC01D, 0xC01E,
|
||||
0xC01E, 0xC01F, 0xC01F, 0xC020, 0xC020, 0xC021, 0xC021, 0xC022, 0xC022, 0xC023, 0xC023, 0xC024,
|
||||
0xC024, 0xC025, 0xC025, 0xC026, 0xC026, 0xC027, 0xC027, 0xC028, 0xC028, 0xC029, 0xC029, 0xC02A,
|
||||
0xC02A, 0xC02B, 0xC02C, 0xC02C, 0xC02D, 0xC02D, 0xC02E, 0xC02E, 0xC02F, 0xC02F, 0xC030, 0xC030,
|
||||
0xC031, 0xC031, 0xC032, 0xC032, 0xC033, 0xC033, 0xC034, 0xC034, 0xC035, 0xC035, 0xC036, 0xC036,
|
||||
0xC037, 0xC037, 0xC038, 0xC038, 0xC039, 0xC039, 0xC03A, 0xC03A, 0xC03B, 0xC03B, 0xC03C, 0xC03C,
|
||||
0xC03D, 0xC03D, 0xC03E, 0xC03E, 0xC03F, 0xC03F, 0xC040, 0xC041, 0xC041, 0xC042, 0xC042, 0xC043,
|
||||
0xC043, 0xC044, 0xC044, 0xC045, 0xC045, 0xC046, 0xC046, 0xC047, 0xC047, 0xC048, 0xC048, 0xC049,
|
||||
0xC049, 0xC04A, 0xC04A, 0xC04B, 0xC04B, 0xC04C, 0xC04C, 0xC04D, 0xC04D, 0xC04E, 0xC04E, 0xC04F,
|
||||
0xC04F, 0xC050, 0xC050, 0xC051, 0xC051, 0xC052, 0xC052, 0xC053, 0xC053, 0xC054, 0xC054, 0xC055,
|
||||
0xC056, 0xC056, 0xC057, 0xC057, 0xC058, 0xC058, 0xC059, 0xC059, 0xC05A, 0xC05A, 0xC05B, 0xC05B,
|
||||
0xC05C, 0xC05C, 0xC05D, 0xC05D, 0xC05E, 0xC05E, 0xC05F, 0xC05F, 0xC060, 0xC060, 0xC061, 0xC061,
|
||||
0xC062, 0xC062, 0xC063, 0xC063, 0xC064, 0xC064, 0xC065, 0xC065, 0xC066, 0xC066, 0xC067, 0xC067,
|
||||
0xC068, 0xC068, 0xC069, 0xC069, 0xC06A, 0xC06A, 0xC06B, 0xC06C, 0xC06C, 0xC06D, 0xC06D, 0xC06E,
|
||||
0xC06E, 0xC06F, 0xC06F, 0xC070, 0xC070, 0xC071, 0xC071, 0xC072, 0xC072, 0xC073, 0xC073, 0xC074,
|
||||
0xC074, 0xC075, 0xC075, 0xC076, 0xC076, 0xC077, 0xC077, 0xC078, 0xC078, 0xC079, 0xC079, 0xC07A,
|
||||
0xC07A, 0xC07B, 0xC07B, 0xC07C, 0xC07C, 0xC07D, 0xC07D, 0xC07E, 0xC07E, 0xC07F, 0xC07F, 0xC080,
|
||||
0xC080, 0xC081, 0xC081, 0xC081, 0xC081, 0xC082, 0xC082, 0xC082, 0xC082, 0xC083, 0xC083, 0xC083,
|
||||
0xC083, 0xC084, 0xC084, 0xC084, 0xC084, 0xC085, 0xC085, 0xC085, 0xC085, 0xC086, 0xC086, 0xC086,
|
||||
0xC086, 0xC087, 0xC087, 0xC087, 0xC087, 0xC088, 0xC088, 0xC088, 0xC088, 0xC089, 0xC089, 0xC089,
|
||||
0xC089, 0xC08A, 0xC08A, 0xC08A, 0xC08A, 0xC08B, 0xC08B, 0xC08B, 0xC08C, 0xC08C, 0xC08C, 0xC08C,
|
||||
0xC08D, 0xC08D, 0xC08D, 0xC08D, 0xC08E, 0xC08E, 0xC08E, 0xC08E, 0xC08F, 0xC08F, 0xC08F, 0xC08F,
|
||||
0xC090, 0xC090, 0xC090, 0xC090, 0xC091, 0xC091, 0xC091, 0xC091, 0xC092, 0xC092, 0xC092, 0xC092,
|
||||
0xC093, 0xC093, 0xC093, 0xC093, 0xC094, 0xC094, 0xC094, 0xC094, 0xC095, 0xC095, 0xC095, 0xC096,
|
||||
0xC096, 0xC096, 0xC096, 0xC097, 0xC097, 0xC097, 0xC097, 0xC098, 0xC098, 0xC098, 0xC098, 0xC099,
|
||||
0xC099, 0xC099, 0xC099, 0xC09A, 0xC09A, 0xC09A, 0xC09A, 0xC09B, 0xC09B, 0xC09B, 0xC09B, 0xC09C,
|
||||
0xC09C, 0xC09C, 0xC09C, 0xC09D, 0xC09D, 0xC09D, 0xC09D, 0xC09E, 0xC09E, 0xC09E, 0xC09E, 0xC09F,
|
||||
0xC09F, 0xC09F, 0xC09F, 0xC0A0, 0xC0A0, 0xC0A0, 0xC0A1, 0xC0A1, 0xC0A1, 0xC0A1, 0xC0A2, 0xC0A2,
|
||||
0xC0A2, 0xC0A2, 0xC0A3, 0xC0A3, 0xC0A3, 0xC0A3, 0xC0A4, 0xC0A4, 0xC0A4, 0xC0A4, 0xC0A5, 0xC0A5,
|
||||
0xC0A5, 0xC0A5, 0xC0A6, 0xC0A6, 0xC0A6, 0xC0A6, 0xC0A7, 0xC0A7, 0xC0A7, 0xC0A7, 0xC0A8, 0xC0A8,
|
||||
0xC0A8, 0xC0A8, 0xC0A9, 0xC0A9, 0xC0A9, 0xC0A9, 0xC0AA, 0xC0AA, 0xC0AA, 0xC0AA, 0xC0AB, 0xC0AB,
|
||||
0xC0AB, 0xC0AC, 0xC0AC, 0xC0AC, 0xC0AC, 0xC0AD, 0xC0AD, 0xC0AD, 0xC0AD, 0xC0AE, 0xC0AE, 0xC0AE,
|
||||
0xC0AE, 0xC0AF, 0xC0AF, 0xC0AF, 0xC0AF, 0xC0B0, 0xC0B0, 0xC0B0, 0xC0B0, 0xC0B1, 0xC0B1, 0xC0B1,
|
||||
0xC0B1, 0xC0B2, 0xC0B2, 0xC0B2, 0xC0B2, 0xC0B3, 0xC0B3, 0xC0B3, 0xC0B3, 0xC0B4, 0xC0B4, 0xC0B4,
|
||||
0xC0B4, 0xC0B5, 0xC0B5, 0xC0B5, 0xC0B6, 0xC0B6, 0xC0B6, 0xC0B6, 0xC0B7, 0xC0B7, 0xC0B7, 0xC0B7,
|
||||
0xC0B8, 0xC0B8, 0xC0B8, 0xC0B8, 0xC0B9, 0xC0B9, 0xC0B9, 0xC0B9, 0xC0BA, 0xC0BA, 0xC0BA, 0xC0BA,
|
||||
0xC0BB, 0xC0BB, 0xC0BB, 0xC0BB, 0xC0BC, 0xC0BC, 0xC0BC, 0xC0BC, 0xC0BD, 0xC0BD, 0xC0BD, 0xC0BD,
|
||||
0xC0BE, 0xC0BE, 0xC0BE, 0xC0BE, 0xC0BF, 0xC0BF, 0xC0BF, 0xC0BF, 0xC0C0, 0xC0C0, 0xC0C0, 0xC0C1,
|
||||
0xC0C1, 0xC0C1, 0xC0C1, 0xC0C2, 0xC0C2, 0xC0C2, 0xC0C2, 0xC0C3, 0xC0C3, 0xC0C3, 0xC0C3, 0xC0C4,
|
||||
0xC0C4, 0xC0C4, 0xC0C4, 0xC0C5, 0xC0C5, 0xC0C5, 0xC0C5, 0xC0C6, 0xC0C6, 0xC0C6, 0xC0C6, 0xC0C7,
|
||||
0xC0C7, 0xC0C7, 0xC0C7, 0xC0C8, 0xC0C8, 0xC0C8, 0xC0C8, 0xC0C9, 0xC0C9, 0xC0C9, 0xC0C9, 0xC0CA,
|
||||
0xC0CA, 0xC0CA, 0xC0CA, 0xC0CB, 0xC0CB, 0xC0CB, 0xC0CC, 0xC0CC, 0xC0CC, 0xC0CC, 0xC0CD, 0xC0CD,
|
||||
0xC0CD, 0xC0CD, 0xC0CE, 0xC0CE, 0xC0CE, 0xC0CE, 0xC0CF, 0xC0CF, 0xC0CF, 0xC0CF, 0xC0D0, 0xC0D0,
|
||||
0xC0D0, 0xC0D0, 0xC0D1, 0xC0D1, 0xC0D1, 0xC0D1, 0xC0D2, 0xC0D2, 0xC0D2, 0xC0D2, 0xC0D3, 0xC0D3,
|
||||
0xC0D3, 0xC0D3, 0xC0D4, 0xC0D4, 0xC0D4, 0xC0D4, 0xC0D5, 0xC0D5, 0xC0D5, 0xC0D6, 0xC0D6, 0xC0D6,
|
||||
0xC0D6, 0xC0D7, 0xC0D7, 0xC0D7, 0xC0D7, 0xC0D8, 0xC0D8, 0xC0D8, 0xC0D8, 0xC0D9, 0xC0D9, 0xC0D9,
|
||||
0xC0D9, 0xC0DA, 0xC0DA, 0xC0DA, 0xC0DA, 0xC0DB, 0xC0DB, 0xC0DB, 0xC0DB, 0xC0DC, 0xC0DC, 0xC0DC,
|
||||
0xC0DC, 0xC0DD, 0xC0DD, 0xC0DD, 0xC0DD, 0xC0DE, 0xC0DE, 0xC0DE, 0xC0DE, 0xC0DF, 0xC0DF, 0xC0DF,
|
||||
0xC0DF, 0xC0E0, 0xC0E0, 0xC0E0, 0xC0E1, 0xC0E1, 0xC0E1, 0xC0E1, 0xC0E2, 0xC0E2, 0xC0E2, 0xC0E2,
|
||||
0xC0E3, 0xC0E3, 0xC0E3, 0xC0E3, 0xC0E4, 0xC0E4, 0xC0E4, 0xC0E4, 0xC0E5, 0xC0E5, 0xC0E5, 0xC0E5,
|
||||
0xC0E6, 0xC0E6, 0xC0E6, 0xC0E6, 0xC0E7, 0xC0E7, 0xC0E7, 0xC0E7, 0xC0E8, 0xC0E8, 0xC0E8, 0xC0E8,
|
||||
0xC0E9, 0xC0E9, 0xC0E9, 0xC0E9, 0xC0EA, 0xC0EA, 0xC0EA, 0xC0EA, 0xC0EB, 0xC0EB, 0xC0EB, 0xC0EC,
|
||||
0xC0EC, 0xC0EC, 0xC0EC, 0xC0ED, 0xC0ED, 0xC0ED, 0xC0ED, 0xC0EE, 0xC0EE, 0xC0EE, 0xC0EE, 0xC0EF,
|
||||
0xC0EF, 0xC0EF, 0xC0EF, 0xC0F0, 0xC0F0, 0xC0F0, 0xC0F0, 0xC0F1, 0xC0F1, 0xC0F1, 0xC0F1, 0xC0F2,
|
||||
0xC0F2, 0xC0F2, 0xC0F2, 0xC0F3, 0xC0F3, 0xC0F3, 0xC0F3, 0xC0F4, 0xC0F4, 0xC0F4, 0xC0F4, 0xC0F5,
|
||||
0xC0F5, 0xC0F5, 0xC0F6, 0xC0F6, 0xC0F6, 0xC0F6, 0xC0F7, 0xC0F7, 0xC0F7, 0xC0F7, 0xC0F8, 0xC0F8,
|
||||
0xC0F8, 0xC0F8, 0xC0F9, 0xC0F9, 0xC0F9, 0xC0F9, 0xC0FA, 0xC0FA, 0xC0FA, 0xC0FA, 0xC0FB, 0xC0FB,
|
||||
0xC0FB, 0xC0FB, 0xC0FC, 0xC0FC, 0xC0FC, 0xC0FC, 0xC0FD, 0xC0FD, 0xC0FD, 0xC0FD, 0xC0FE, 0xC0FE,
|
||||
0xC0FE, 0xC0FE, 0xC0FF, 0xC0FF, 0xC0FF, 0xC0FF, 0xC100, 0xC100,
|
||||
};
|
||||
|
||||
static uint16_t sigmode_golden_bf16[] = {
|
||||
0x3f00, 0x3f01, 0x3f01, 0x3f02, 0x3f02, 0x3f03, 0x3f03, 0x3f04, 0x3f04, 0x3f05, 0x3f05, 0x3f06,
|
||||
0x3f06, 0x3f07, 0x3f07, 0x3f08, 0x3f08, 0x3f09, 0x3f09, 0x3f0a, 0x3f0a, 0x3f0b, 0x3f0b, 0x3f0c,
|
||||
0x3f0c, 0x3f0d, 0x3f0d, 0x3f0e, 0x3f0e, 0x3f0f, 0x3f0f, 0x3f10, 0x3f10, 0x3f11, 0x3f11, 0x3f12,
|
||||
0x3f12, 0x3f13, 0x3f13, 0x3f14, 0x3f14, 0x3f15, 0x3f15, 0x3f16, 0x3f16, 0x3f17, 0x3f17, 0x3f18,
|
||||
0x3f19, 0x3f19, 0x3f1a, 0x3f1a, 0x3f1b, 0x3f1b, 0x3f1b, 0x3f1c, 0x3f1d, 0x3f1d, 0x3f1e, 0x3f1e,
|
||||
0x3f1f, 0x3f1f, 0x3f20, 0x3f1f, 0x3f20, 0x3f20, 0x3f21, 0x3f21, 0x3f22, 0x3f22, 0x3f23, 0x3f23,
|
||||
0x3f24, 0x3f24, 0x3f25, 0x3f25, 0x3f26, 0x3f26, 0x3f27, 0x3f27, 0x3f28, 0x3f28, 0x3f29, 0x3f29,
|
||||
0x3f2a, 0x3f2a, 0x3f2a, 0x3f2a, 0x3f2b, 0x3f2b, 0x3f2c, 0x3f2c, 0x3f2d, 0x3f2d, 0x3f2e, 0x3f2f,
|
||||
0x3f2f, 0x3f30, 0x3f30, 0x3f30, 0x3f31, 0x3f31, 0x3f31, 0x3f32, 0x3f32, 0x3f32, 0x3f33, 0x3f33,
|
||||
0x3f34, 0x3f34, 0x3f35, 0x3f36, 0x3f36, 0x3f36, 0x3f37, 0x3f37, 0x3f38, 0x3f38, 0x3f38, 0x3f39,
|
||||
0x3f39, 0x3f3a, 0x3f3a, 0x3f3a, 0x3f3b, 0x3f3b, 0x3f3b, 0x3f3c, 0x3f3c, 0x3f3d, 0x3f3d, 0x3f3d,
|
||||
0x3f3e, 0x3f3e, 0x3f3e, 0x3f3f, 0x3f3f, 0x3f40, 0x3f40, 0x3f40, 0x3f41, 0x3f41, 0x3f41, 0x3f42,
|
||||
0x3f42, 0x3f42, 0x3f43, 0x3f44, 0x3f44, 0x3f44, 0x3f45, 0x3f45, 0x3f45, 0x3f46, 0x3f46, 0x3f46,
|
||||
0x3f47, 0x3f47, 0x3f48, 0x3f48, 0x3f48, 0x3f49, 0x3f49, 0x3f49, 0x3f4a, 0x3f4a, 0x3f4b, 0x3f4b,
|
||||
0x3f4b, 0x3f4c, 0x3f4c, 0x3f4c, 0x3f4c, 0x3f4c, 0x3f4d, 0x3f4d, 0x3f4d, 0x3f4e, 0x3f4e, 0x3f4e,
|
||||
0x3f4f, 0x3f4f, 0x3f50, 0x3f50, 0x3f50, 0x3f51, 0x3f51, 0x3f51, 0x3f51, 0x3f52, 0x3f52, 0x3f52,
|
||||
0x3f52, 0x3f53, 0x3f53, 0x3f54, 0x3f54, 0x3f55, 0x3f55, 0x3f55, 0x3f55, 0x3f56, 0x3f56, 0x3f56,
|
||||
0x3f56, 0x3f57, 0x3f57, 0x3f57, 0x3f57, 0x3f58, 0x3f58, 0x3f58, 0x3f58, 0x3f59, 0x3f59, 0x3f59,
|
||||
0x3f59, 0x3f5a, 0x3f5a, 0x3f5a, 0x3f5a, 0x3f5a, 0x3f5b, 0x3f5b, 0x3f5b, 0x3f5b, 0x3f5c, 0x3f5c,
|
||||
0x3f5c, 0x3f5c, 0x3f5d, 0x3f5d, 0x3f5d, 0x3f5e, 0x3f5e, 0x3f5e, 0x3f5e, 0x3f5f, 0x3f5f, 0x3f5f,
|
||||
0x3f5f, 0x3f60, 0x3f60, 0x3f60, 0x3f60, 0x3f61, 0x3f61, 0x3f61, 0x3f61, 0x3f62, 0x3f61, 0x3f61,
|
||||
0x3f61, 0x3f62, 0x3f62, 0x3f62, 0x3f62, 0x3f63, 0x3f63, 0x3f63, 0x3f63, 0x3f64, 0x3f64, 0x3f64,
|
||||
0x3f64, 0x3f65, 0x3f65, 0x3f65, 0x3f65, 0x3f66, 0x3f66, 0x3f66, 0x3f66, 0x3f66, 0x3f66, 0x3f66,
|
||||
0x3f66, 0x3f67, 0x3f67, 0x3f67, 0x3f67, 0x3f68, 0x3f68, 0x3f68, 0x3f68, 0x3f69, 0x3f69, 0x3f69,
|
||||
0x3f69, 0x3f69, 0x3f69, 0x3f69, 0x3f6a, 0x3f6a, 0x3f6a, 0x3f6a, 0x3f6a, 0x3f6a, 0x3f6a, 0x3f6a,
|
||||
0x3f6b, 0x3f6b, 0x3f6b, 0x3f6b, 0x3f6b, 0x3f6b, 0x3f6b, 0x3f6b, 0x3f6c, 0x3f6c, 0x3f6c, 0x3f6c,
|
||||
0x3f6d, 0x3f6d, 0x3f6d, 0x3f6d, 0x3f6e, 0x3f6e, 0x3f6e, 0x3f6e, 0x3f6e, 0x3f6e, 0x3f6e, 0x3f6e,
|
||||
0x3f6f, 0x3f6f, 0x3f6f, 0x3f6f, 0x3f6f, 0x3f6f, 0x3f6f, 0x3f6f, 0x3f6f, 0x3f6f, 0x3f70, 0x3f70,
|
||||
0x3f70, 0x3f70, 0x3f70, 0x3f70, 0x3f70, 0x3f71, 0x3f71, 0x3f71, 0x3f71, 0x3f71, 0x3f71, 0x3f71,
|
||||
0x3f71, 0x3f72, 0x3f72, 0x3f71, 0x3f71, 0x3f71, 0x3f71, 0x3f71, 0x3f71, 0x3f72, 0x3f72, 0x3f72,
|
||||
0x3f72, 0x3f72, 0x3f72, 0x3f72, 0x3f72, 0x3f73, 0x3f73, 0x3f73, 0x3f73, 0x3f73, 0x3f73, 0x3f73,
|
||||
0x3f73, 0x3f74, 0x3f74, 0x3f74, 0x3f74, 0x3f74, 0x3f74, 0x3f74, 0x3f75, 0x3f75, 0x3f75, 0x3f75,
|
||||
0x3f75, 0x3f75, 0x3f75, 0x3f75, 0x3f75, 0x3f75, 0x3f75, 0x3f75, 0x3f75, 0x3f75, 0x3f75, 0x3f75,
|
||||
0x3f75, 0x3f75, 0x3f76, 0x3f76, 0x3f76, 0x3f76, 0x3f76, 0x3f76, 0x3f76, 0x3f76, 0x3f76, 0x3f76,
|
||||
0x3f76, 0x3f76, 0x3f76, 0x3f76, 0x3f76, 0x3f76, 0x3f77, 0x3f77, 0x3f77, 0x3f77, 0x3f77, 0x3f77,
|
||||
0x3f77, 0x3f78, 0x3f78, 0x3f78, 0x3f78, 0x3f78, 0x3f78, 0x3f78, 0x3f78, 0x3f78, 0x3f78, 0x3f78,
|
||||
0x3f78, 0x3f78, 0x3f78, 0x3f78, 0x3f78, 0x3f78, 0x3f78, 0x3f78, 0x3f78, 0x3f78, 0x3f78, 0x3f78,
|
||||
0x3f78, 0x3f79, 0x3f79, 0x3f79, 0x3f79, 0x3f79, 0x3f79, 0x3f79, 0x3f79, 0x3f79, 0x3f79, 0x3f79,
|
||||
0x3f79, 0x3f79, 0x3f79, 0x3f79, 0x3f7a, 0x3f7a, 0x3f7a, 0x3f7a, 0x3f7a, 0x3f7a, 0x3f7a, 0x3f7a,
|
||||
0x3f7a, 0x3f7a, 0x3f7a, 0x3f7a, 0x3f7a, 0x3f7a, 0x3f7a, 0x3f7a, 0x3f7a, 0x3f7a, 0x3f7a, 0x3f7a,
|
||||
0x3f7a, 0x3f7a, 0x3f7a, 0x3f7a, 0x3f7b, 0x3f7b, 0x3f7b, 0x3f7b, 0x3f7b, 0x3f7b, 0x3f7b, 0x3f7b,
|
||||
0x3f7b, 0x3f7b, 0x3f7b, 0x3f7b, 0x3f7b, 0x3f7b, 0x3f7b, 0x3f7b, 0x3f7b, 0x3f7b, 0x3f7b, 0x3f7b,
|
||||
0x3f7b, 0x3f7b, 0x3f7c, 0x3f7c, 0x3f7c, 0x3f7c, 0x3f7c, 0x3f7c, 0x3f7c, 0x3f7c, 0x3f7c, 0x3f7c,
|
||||
0x3f7c, 0x3f7c, 0x3f7c, 0x3f7c, 0x3f7c, 0x3f7c, 0x3f7c, 0x3f7c, 0x3f7c, 0x3f7c, 0x3f7c, 0x3f7c,
|
||||
0x3f7c, 0x3f7c, 0x3f7c, 0x3f7c, 0x3f7c, 0x3f7c, 0x3f7c, 0x3f7c, 0x3f7c, 0x3f7c, 0x3f7d, 0x3f7d,
|
||||
0x3f7d, 0x3f7d, 0x3f7d, 0x3f7d, 0x3f7d, 0x3f7d, 0x3f7d, 0x3f7d, 0x3f7d, 0x3f7d, 0x3f7d, 0x3f7d,
|
||||
0x3f7d, 0x3f7d, 0x3f7d, 0x3f7d, 0x3f7d, 0x3f7d, 0x3f7d, 0x3f7d, 0x3f7d, 0x3f7d, 0x3f7d, 0x3f7d,
|
||||
0x3f7d, 0x3f7d, 0x3f7d, 0x3f7d, 0x3f7d, 0x3f7d, 0x3f7d, 0x3f7d, 0x3f7d, 0x3f7d, 0x3f7d, 0x3f7d,
|
||||
0x3f7d, 0x3f7e, 0x3f7e, 0x3f7e, 0x3f7e, 0x3f7e, 0x3f7e, 0x3f7e, 0x3f7e, 0x3f7e, 0x3f7e, 0x3f7e,
|
||||
0x3f7e, 0x3f7e, 0x3f7e, 0x3f7e, 0x3f7e, 0x3f7e, 0x3f7e, 0x3f7e, 0x3f7e, 0x3f7e, 0x3f7e, 0x3f7e,
|
||||
0x3f7e, 0x3f7e, 0x3f7e, 0x3f7e, 0x3f7e, 0x3f7e, 0x3f7e, 0x3f7e, 0x3f7e, 0x3f7e, 0x3f7e, 0x3f7e,
|
||||
0x3f7e, 0x3f7e, 0x3f7e, 0x3f7e, 0x3f7e, 0x3f7e, 0x3f7e, 0x3f7e, 0x3f7e, 0x3f7e, 0x3f7e, 0x3f7e,
|
||||
0x3f7e, 0x3f7e, 0x3f7e, 0x3f7e, 0x3f7e, 0x3f7e, 0x3f7e, 0x3f7e, 0x3f7e, 0x3f7e, 0x3f7e, 0x3f7e,
|
||||
0x3f7e, 0x3f7e, 0x3f7e, 0x3f7e, 0x3f7e, 0x3f7e, 0x3f7e, 0x3f7e, 0x3f7e, 0x3f7e, 0x3f7e, 0x3f7f,
|
||||
0x3f7f, 0x3f7f, 0x3f7f, 0x3f7f, 0x3f7f, 0x3f7f, 0x3f7f, 0x3f7f, 0x3f7f, 0x3f7f, 0x3f7f, 0x3f7f,
|
||||
0x3f7f, 0x3f7f, 0x3f7f, 0x3f7f, 0x3f7f, 0x3f7f, 0x3f7f, 0x3f7f, 0x3f7f, 0x3f7f, 0x3f7f, 0x3f7f,
|
||||
0x3f7f, 0x3f7f, 0x3f7f, 0x3f7f, 0x3f7f, 0x3f7f, 0x3f7f, 0x3f7f, 0x3f7f, 0x3f7f, 0x3f7f, 0x3f7f,
|
||||
0x3f7f, 0x3f7f, 0x3f7f, 0x3f7f, 0x3f7f, 0x3f7f, 0x3f7f, 0x3f7f, 0x3f7f, 0x3f7f, 0x3f7f, 0x3f7f,
|
||||
0x3f7f, 0x3f7f, 0x3f7f, 0x3f7f, 0x3f7f, 0x3f7f, 0x3f7f, 0x3f7f, 0x3f7f, 0x3f7f, 0x3f7f, 0x3f7f,
|
||||
0x3f7f, 0x3f7f, 0x3f7f, 0x3f7f, 0x3f7f, 0x3f7f, 0x3f7f, 0x3f7f, 0x3f7f, 0x3f7f, 0x3f7f, 0x3f7f,
|
||||
0x3f7f, 0x3f7f, 0x3f7f, 0x3f7f, 0x3f7f, 0x3f7f, 0x3f7f, 0x3f7f, 0x3f7f, 0x3f7f, 0x3f7f, 0x3f7f,
|
||||
0x3f7f, 0x3f7f, 0x3f7f, 0x3f7f, 0x3f7f, 0x3f7f, 0x3f7f, 0x3f7f, 0x3f7f, 0x3f7f, 0x3f7f, 0x3f7f,
|
||||
0x3f7f, 0x3f7f, 0x3f7f, 0x3f7f, 0x3f7f, 0x3f7f, 0x3f7f, 0x3f7f, 0x3f7f, 0x3f7f, 0x3f7f, 0x3f7f,
|
||||
0x3f7f, 0x3f7f, 0x3f7f, 0x3f7f, 0x3f7f, 0x3f7f, 0x3f7f, 0x3f7f, 0x3f7f, 0x3f7f, 0x3f7f, 0x3f7f,
|
||||
0x3f7f, 0x3f7f, 0x3f7f, 0x3f7f, 0x3f7f, 0x3f7f, 0x3f7f, 0x3f7f, 0x3f7f, 0x3f7f, 0x3f7f, 0x3f7f,
|
||||
0x3f80, 0x3f80, 0x3f80, 0x3f80, 0x3f80, 0x3f80, 0x3f80, 0x3f80, 0x3f80, 0x3f80, 0x3f80, 0x3f80,
|
||||
0x3f80, 0x3f80, 0x3f80, 0x3f80, 0x3f80, 0x3f80, 0x3f80, 0x3f80, 0x3f80, 0x3f80, 0x3f80, 0x3f80,
|
||||
0x3f80, 0x3f80, 0x3f80, 0x3f80, 0x3f80, 0x3f80, 0x3f80, 0x3f80, 0x3f80, 0x3f80, 0x3f80, 0x3f80,
|
||||
0x3f80, 0x3f80, 0x3f80, 0x3f80, 0x3f80, 0x3f80, 0x3f80, 0x3f80, 0x3f80, 0x3f80, 0x3f80, 0x3f80,
|
||||
0x3f80, 0x3f80, 0x3f80, 0x3f80, 0x3f80, 0x3f80, 0x3f80, 0x3f80, 0x3f80, 0x3f80, 0x3f80, 0x3f80,
|
||||
0x3f80, 0x3f80, 0x3f80, 0x3f80, 0x3f80, 0x3f80, 0x3f80, 0x3f80, 0x3f80, 0x3f80, 0x3f80, 0x3f80,
|
||||
0x3f80, 0x3f80, 0x3f80, 0x3f80, 0x3f80, 0x3f80, 0x3f80, 0x3f80, 0x3f80, 0x3f80, 0x3f80, 0x3f80,
|
||||
0x3f80, 0x3f80, 0x3f80, 0x3f80, 0x3f80, 0x3f80, 0x3f80, 0x3f80, 0x3f80, 0x3f80, 0x3f80, 0x3f80,
|
||||
0x3f80, 0x3f80, 0x3f80, 0x3f80, 0x3f80, 0x3f80, 0x3f80, 0x3f80, 0x3f80, 0x3f80, 0x3f80, 0x3f80,
|
||||
0x3f80, 0x3f80, 0x3f80, 0x3f80, 0x3f80, 0x3f80, 0x3f80, 0x3f80, 0x3f80, 0x3f80, 0x3f80, 0x3f80,
|
||||
0x3f80, 0x3f80, 0x3f80, 0x3f80, 0x3f80, 0x3f80, 0x3f80, 0x3f80, 0x3f80, 0x3f80, 0x3f80, 0x3f80,
|
||||
0x3f80, 0x3f80, 0x3f80, 0x3f80, 0x3f80, 0x3f80, 0x3f80, 0x3f80, 0x3f80, 0x3f80, 0x3f80, 0x3f80,
|
||||
0x3f80, 0x3f80, 0x3f80, 0x3f80, 0x3f80, 0x3f80, 0x3f80, 0x3f80, 0x3f80, 0x3f80, 0x3f80, 0x3f80,
|
||||
0x3f80, 0x3f80, 0x3f80, 0x3f80, 0x3f80, 0x3f80, 0x3f80, 0x3f80, 0x3f80, 0x3f80, 0x3f80, 0x3f80,
|
||||
0x3f80, 0x3f80, 0x3f80, 0x3f80, 0x3f80, 0x3f80, 0x3f80, 0x3f80, 0x3f80, 0x3f80, 0x3f80, 0x3f80,
|
||||
0x3f80, 0x3f80, 0x3f80, 0x3f80, 0x3f80, 0x3f80, 0x3f80, 0x3f80, 0x3f80, 0x3f80, 0x3f80, 0x3f80,
|
||||
0x3f80, 0x3f80, 0x3f80, 0x3f80, 0x3f80, 0x3f80, 0x3f80, 0x3f80, 0x3f80, 0x3f80, 0x3f80, 0x3f80,
|
||||
0x3f80, 0x3f80, 0x3f80, 0x3f80, 0x3f80, 0x3f80, 0x3f80, 0x3f80, 0x3f80, 0x3f80, 0x3f80, 0x3f80,
|
||||
0x3f80, 0x3f80, 0x3f80, 0x3f80, 0x3eff, 0x3efe, 0x3efd, 0x3efc, 0x3efb, 0x3efa, 0x3ef9, 0x3ef8,
|
||||
0x3ef7, 0x3ef6, 0x3ef5, 0x3ef4, 0x3ef3, 0x3ef2, 0x3ef1, 0x3ef0, 0x3eef, 0x3eee, 0x3eed, 0x3eec,
|
||||
0x3eeb, 0x3eea, 0x3ee9, 0x3ee7, 0x3ee6, 0x3ee5, 0x3ee4, 0x3ee3, 0x3ee2, 0x3ee1, 0x3ee0, 0x3edf,
|
||||
0x3ede, 0x3edd, 0x3edc, 0x3edb, 0x3eda, 0x3ed9, 0x3ed8, 0x3ed7, 0x3ed6, 0x3ed5, 0x3ed4, 0x3ed3,
|
||||
0x3ed2, 0x3ed1, 0x3ed1, 0x3ed0, 0x3ecf, 0x3ece, 0x3ecd, 0x3ecc, 0x3ecb, 0x3eca, 0x3ec9, 0x3ec8,
|
||||
0x3ec7, 0x3ec6, 0x3ec5, 0x3ec4, 0x3ec3, 0x3ec2, 0x3ec1, 0x3ec0, 0x3ebf, 0x3ebe, 0x3ebd, 0x3ebc,
|
||||
0x3ebb, 0x3eba, 0x3eba, 0x3eb9, 0x3eb7, 0x3eb6, 0x3eb5, 0x3eb4, 0x3eb4, 0x3eb3, 0x3eb2, 0x3eb1,
|
||||
0x3eb0, 0x3eaf, 0x3eaf, 0x3eae, 0x3ead, 0x3eab, 0x3eaa, 0x3ea9, 0x3ea8, 0x3ea7, 0x3ea7, 0x3ea6,
|
||||
0x3ea5, 0x3ea4, 0x3ea3, 0x3ea2, 0x3ea1, 0x3ea0, 0x3e9f, 0x3e9e, 0x3e9e, 0x3e9d, 0x3e9c, 0x3e9b,
|
||||
0x3e9a, 0x3e99, 0x3e98, 0x3e98, 0x3e97, 0x3e97, 0x3e96, 0x3e95, 0x3e94, 0x3e93, 0x3e92, 0x3e91,
|
||||
0x3e90, 0x3e8f, 0x3e8e, 0x3e8e, 0x3e8d, 0x3e8c, 0x3e8b, 0x3e8a, 0x3e8a, 0x3e89, 0x3e88, 0x3e88,
|
||||
0x3e87, 0x3e86, 0x3e85, 0x3e85, 0x3e83, 0x3e82, 0x3e82, 0x3e81, 0x3e80, 0x3e7e, 0x3e7d, 0x3e7c,
|
||||
0x3e7b, 0x3e7a, 0x3e78, 0x3e77, 0x3e75, 0x3e72, 0x3e71, 0x3e6f, 0x3e6e, 0x3e6c, 0x3e6b, 0x3e69,
|
||||
0x3e68, 0x3e67, 0x3e65, 0x3e64, 0x3e63, 0x3e61, 0x3e60, 0x3e5f, 0x3e5d, 0x3e5c, 0x3e5a, 0x3e59,
|
||||
0x3e58, 0x3e56, 0x3e55, 0x3e54, 0x3e52, 0x3e51, 0x3e50, 0x3e4f, 0x3e4e, 0x3e4c, 0x3e4b, 0x3e4a,
|
||||
0x3e49, 0x3e47, 0x3e46, 0x3e45, 0x3e44, 0x3e43, 0x3e41, 0x3e40, 0x3e3f, 0x3e3e, 0x3e3c, 0x3e3a,
|
||||
0x3e39, 0x3e37, 0x3e36, 0x3e35, 0x3e34, 0x3e33, 0x3e31, 0x3e30, 0x3e2f, 0x3e2e, 0x3e2c, 0x3e2b,
|
||||
0x3e2a, 0x3e29, 0x3e28, 0x3e27, 0x3e26, 0x3e25, 0x3e24, 0x3e23, 0x3e22, 0x3e20, 0x3e20, 0x3e1f,
|
||||
0x3e1e, 0x3e1d, 0x3e1c, 0x3e1b, 0x3e1a, 0x3e19, 0x3e18, 0x3e17, 0x3e16, 0x3e15, 0x3e14, 0x3e13,
|
||||
0x3e12, 0x3e11, 0x3e10, 0x3e0f, 0x3e0e, 0x3e0c, 0x3e0b, 0x3e0a, 0x3e09, 0x3e08, 0x3e07, 0x3e06,
|
||||
0x3e05, 0x3e04, 0x3e03, 0x3e03, 0x3e02, 0x3e01, 0x3e00, 0x3dff, 0x3dfd, 0x3dfb, 0x3df9, 0x3df8,
|
||||
0x3df6, 0x3df4, 0x3df1, 0x3df1, 0x3ded, 0x3ded, 0x3dea, 0x3dea, 0x3de7, 0x3de7, 0x3de4, 0x3de4,
|
||||
0x3de1, 0x3de1, 0x3dde, 0x3dde, 0x3ddb, 0x3ddb, 0x3dd8, 0x3dd8, 0x3dd5, 0x3dd5, 0x3dd2, 0x3dd2,
|
||||
0x3dcf, 0x3dcf, 0x3dcc, 0x3dcc, 0x3dc9, 0x3dc9, 0x3dc7, 0x3dc7, 0x3dc3, 0x3dc3, 0x3dc0, 0x3dc0,
|
||||
0x3dbe, 0x3dbe, 0x3dbb, 0x3dbb, 0x3db9, 0x3db9, 0x3db6, 0x3db4, 0x3db4, 0x3db1, 0x3db1, 0x3dae,
|
||||
0x3dae, 0x3dac, 0x3dac, 0x3da9, 0x3da9, 0x3da7, 0x3da7, 0x3da5, 0x3da5, 0x3da3, 0x3da3, 0x3da0,
|
||||
0x3da0, 0x3d9e, 0x3d9e, 0x3d9b, 0x3d9b, 0x3d99, 0x3d99, 0x3d97, 0x3d97, 0x3d94, 0x3d94, 0x3d93,
|
||||
0x3d93, 0x3d91, 0x3d91, 0x3d8f, 0x3d8f, 0x3d8d, 0x3d8d, 0x3d8a, 0x3d8a, 0x3d88, 0x3d88, 0x3d86,
|
||||
0x3d86, 0x3d84, 0x3d82, 0x3d82, 0x3d80, 0x3d80, 0x3d7d, 0x3d7d, 0x3d79, 0x3d79, 0x3d76, 0x3d76,
|
||||
0x3d72, 0x3d72, 0x3d6f, 0x3d6f, 0x3d6b, 0x3d6b, 0x3d68, 0x3d68, 0x3d65, 0x3d65, 0x3d61, 0x3d61,
|
||||
0x3d5e, 0x3d5e, 0x3d5b, 0x3d5b, 0x3d58, 0x3d58, 0x3d55, 0x3d55, 0x3d52, 0x3d52, 0x3d4e, 0x3d4e,
|
||||
0x3d4b, 0x3d4b, 0x3d48, 0x3d48, 0x3d45, 0x3d45, 0x3d42, 0x3d3f, 0x3d3f, 0x3d3c, 0x3d3c, 0x3d3a,
|
||||
0x3d3a, 0x3d37, 0x3d37, 0x3d34, 0x3d34, 0x3d32, 0x3d32, 0x3d2f, 0x3d2f, 0x3d2c, 0x3d2c, 0x3d2a,
|
||||
0x3d2a, 0x3d27, 0x3d27, 0x3d24, 0x3d24, 0x3d22, 0x3d22, 0x3d20, 0x3d20, 0x3d1d, 0x3d1d, 0x3d1b,
|
||||
0x3d1b, 0x3d19, 0x3d19, 0x3d17, 0x3d17, 0x3d15, 0x3d15, 0x3d12, 0x3d12, 0x3d10, 0x3d10, 0x3d0e,
|
||||
0x3d0c, 0x3d0c, 0x3d0a, 0x3d0a, 0x3d08, 0x3d08, 0x3d06, 0x3d06, 0x3d04, 0x3d04, 0x3d02, 0x3d02,
|
||||
0x3cff, 0x3cff, 0x3cfb, 0x3cfb, 0x3cf8, 0x3cf8, 0x3cf4, 0x3cf4, 0x3cf0, 0x3cf0, 0x3cec, 0x3cec,
|
||||
0x3ce9, 0x3ce9, 0x3ce5, 0x3ce5, 0x3ce2, 0x3ce2, 0x3cdf, 0x3cdf, 0x3cdb, 0x3cdb, 0x3cd8, 0x3cd8,
|
||||
0x3cd5, 0x3cd5, 0x3cd2, 0x3cd2, 0x3ccf, 0x3ccf, 0x3ccc, 0x3cc8, 0x3cc8, 0x3cc5, 0x3cc5, 0x3cc2,
|
||||
0x3cc2, 0x3cbf, 0x3cbf, 0x3cbc, 0x3cbc, 0x3cb9, 0x3cb9, 0x3cb6, 0x3cb6, 0x3cb4, 0x3cb4, 0x3cb1,
|
||||
0x3cb1, 0x3cae, 0x3cae, 0x3cac, 0x3cac, 0x3ca9, 0x3ca9, 0x3ca7, 0x3ca7, 0x3ca5, 0x3ca5, 0x3ca2,
|
||||
0x3ca2, 0x3ca0, 0x3ca0, 0x3c9d, 0x3c9d, 0x3c9b, 0x3c9b, 0x3c98, 0x3c98, 0x3c96, 0x3c96, 0x3c93,
|
||||
0x3c93, 0x3c8f, 0x3c8f, 0x3c8f, 0x3c8f, 0x3c8b, 0x3c8b, 0x3c8b, 0x3c8b, 0x3c87, 0x3c87, 0x3c87,
|
||||
0x3c87, 0x3c82, 0x3c82, 0x3c82, 0x3c82, 0x3c7c, 0x3c7c, 0x3c7c, 0x3c7c, 0x3c75, 0x3c75, 0x3c75,
|
||||
0x3c75, 0x3c6e, 0x3c6e, 0x3c6e, 0x3c6e, 0x3c66, 0x3c66, 0x3c66, 0x3c66, 0x3c5f, 0x3c5f, 0x3c5f,
|
||||
0x3c5f, 0x3c59, 0x3c59, 0x3c59, 0x3c59, 0x3c53, 0x3c53, 0x3c53, 0x3c4c, 0x3c4c, 0x3c4c, 0x3c4c,
|
||||
0x3c46, 0x3c46, 0x3c46, 0x3c46, 0x3c3f, 0x3c3f, 0x3c3f, 0x3c3f, 0x3c39, 0x3c39, 0x3c39, 0x3c39,
|
||||
0x3c34, 0x3c34, 0x3c34, 0x3c34, 0x3c2f, 0x3c2f, 0x3c2f, 0x3c2f, 0x3c29, 0x3c29, 0x3c29, 0x3c29,
|
||||
0x3c24, 0x3c24, 0x3c24, 0x3c24, 0x3c1f, 0x3c1f, 0x3c1f, 0x3c1f, 0x3c1a, 0x3c1a, 0x3c1a, 0x3c16,
|
||||
0x3c16, 0x3c16, 0x3c16, 0x3c12, 0x3c12, 0x3c12, 0x3c12, 0x3c0d, 0x3c0d, 0x3c0d, 0x3c0d, 0x3c09,
|
||||
0x3c09, 0x3c09, 0x3c09, 0x3c04, 0x3c04, 0x3c04, 0x3c04, 0x3c00, 0x3c00, 0x3c00, 0x3c00, 0x3bf8,
|
||||
0x3bf8, 0x3bf8, 0x3bf8, 0x3bf1, 0x3bf1, 0x3bf1, 0x3bf1, 0x3be9, 0x3be9, 0x3be9, 0x3be9, 0x3be2,
|
||||
0x3be2, 0x3be2, 0x3be2, 0x3bdb, 0x3bdb, 0x3bdb, 0x3bd4, 0x3bd4, 0x3bd4, 0x3bd4, 0x3bce, 0x3bce,
|
||||
0x3bce, 0x3bce, 0x3bc8, 0x3bc8, 0x3bc8, 0x3bc8, 0x3bc2, 0x3bc2, 0x3bc2, 0x3bc2, 0x3bbc, 0x3bbc,
|
||||
0x3bbc, 0x3bbc, 0x3bb6, 0x3bb6, 0x3bb6, 0x3bb6, 0x3bb0, 0x3bb0, 0x3bb0, 0x3bb0, 0x3bab, 0x3bab,
|
||||
0x3bab, 0x3bab, 0x3ba6, 0x3ba6, 0x3ba6, 0x3ba6, 0x3ba1, 0x3ba1, 0x3ba1, 0x3ba1, 0x3b9c, 0x3b9c,
|
||||
0x3b9c, 0x3b97, 0x3b97, 0x3b97, 0x3b97, 0x3b92, 0x3b92, 0x3b92, 0x3b92, 0x3b8e, 0x3b8e, 0x3b8e,
|
||||
0x3b8e, 0x3b8a, 0x3b8a, 0x3b8a, 0x3b8a, 0x3b85, 0x3b85, 0x3b85, 0x3b85, 0x3b81, 0x3b81, 0x3b81,
|
||||
0x3b81, 0x3b7b, 0x3b7b, 0x3b7b, 0x3b7b, 0x3b73, 0x3b73, 0x3b73, 0x3b73, 0x3b6c, 0x3b6c, 0x3b6c,
|
||||
0x3b6c, 0x3b65, 0x3b65, 0x3b65, 0x3b5d, 0x3b5d, 0x3b5d, 0x3b5d, 0x3b56, 0x3b56, 0x3b56, 0x3b56,
|
||||
0x3b50, 0x3b50, 0x3b50, 0x3b50, 0x3b4a, 0x3b4a, 0x3b4a, 0x3b4a, 0x3b43, 0x3b43, 0x3b43, 0x3b43,
|
||||
0x3b3d, 0x3b3d, 0x3b3d, 0x3b3d, 0x3b38, 0x3b38, 0x3b38, 0x3b38, 0x3b32, 0x3b32, 0x3b32, 0x3b32,
|
||||
0x3b2c, 0x3b2c, 0x3b2c, 0x3b2c, 0x3b27, 0x3b27, 0x3b27, 0x3b27, 0x3b22, 0x3b22, 0x3b22, 0x3b1d,
|
||||
0x3b1d, 0x3b1d, 0x3b1d, 0x3b18, 0x3b18, 0x3b18, 0x3b18, 0x3b13, 0x3b13, 0x3b13, 0x3b13, 0x3b0f,
|
||||
0x3b0f, 0x3b0f, 0x3b0f, 0x3b0b, 0x3b0b, 0x3b0b, 0x3b0b, 0x3b06, 0x3b06, 0x3b06, 0x3b06, 0x3b02,
|
||||
0x3b02, 0x3b02, 0x3b02, 0x3afd, 0x3afd, 0x3afd, 0x3afd, 0x3af5, 0x3af5, 0x3af5, 0x3af5, 0x3aed,
|
||||
0x3aed, 0x3aed, 0x3aed, 0x3ae6, 0x3ae6, 0x3ae6, 0x3adf, 0x3adf, 0x3adf, 0x3adf, 0x3ad8, 0x3ad8,
|
||||
0x3ad8, 0x3ad8, 0x3ad1, 0x3ad1, 0x3ad1, 0x3ad1, 0x3acb, 0x3acb, 0x3acb, 0x3acb, 0x3ac5, 0x3ac5,
|
||||
0x3ac5, 0x3ac5, 0x3abf, 0x3abf, 0x3abf, 0x3abf, 0x3ab9, 0x3ab9, 0x3ab9, 0x3ab9, 0x3ab3, 0x3ab3,
|
||||
0x3ab3, 0x3ab3, 0x3aae, 0x3aae, 0x3aae, 0x3aae, 0x3aa9, 0x3aa9, 0x3aa9, 0x3aa3, 0x3aa3, 0x3aa3,
|
||||
0x3aa3, 0x3a9e, 0x3a9e, 0x3a9e, 0x3a9e, 0x3a99, 0x3a99, 0x3a99, 0x3a99, 0x3a94, 0x3a94, 0x3a94,
|
||||
0x3a94, 0x3a90, 0x3a90, 0x3a90, 0x3a90, 0x3a8c, 0x3a8c, 0x3a8c, 0x3a8c, 0x3a87, 0x3a87, 0x3a87,
|
||||
0x3a87, 0x3a83, 0x3a83, 0x3a83, 0x3a83, 0x3a7e, 0x3a7e, 0x3a7e, 0x3a7e, 0x3a76, 0x3a76, 0x3a76,
|
||||
0x3a76, 0x3a6f, 0x3a6f, 0x3a6f, 0x3a68, 0x3a68, 0x3a68, 0x3a68, 0x3a60, 0x3a60, 0x3a60, 0x3a60,
|
||||
0x3a59, 0x3a59, 0x3a59, 0x3a59, 0x3a53, 0x3a53, 0x3a53, 0x3a53, 0x3a4d, 0x3a4d, 0x3a4d, 0x3a4d,
|
||||
0x3a46, 0x3a46, 0x3a46, 0x3a46, 0x3a40, 0x3a40, 0x3a40, 0x3a40, 0x3a3a, 0x3a3a, 0x3a3a, 0x3a3a,
|
||||
0x3a34, 0x3a34, 0x3a34, 0x3a34, 0x3a2f, 0x3a2f, 0x3a2f, 0x3a2f, 0x3a2a, 0x3a2a, 0x3a2a, 0x3a24,
|
||||
0x3a24, 0x3a24, 0x3a24, 0x3a1f, 0x3a1f, 0x3a1f, 0x3a1f, 0x3a1a, 0x3a1a, 0x3a1a, 0x3a1a, 0x3a15,
|
||||
0x3a15, 0x3a15, 0x3a15, 0x3a11, 0x3a11, 0x3a11, 0x3a11, 0x3a0d, 0x3a0d, 0x3a0d, 0x3a0d, 0x3a08,
|
||||
0x3a08, 0x3a08, 0x3a08, 0x3a04, 0x3a04, 0x3a04, 0x3a04, 0x3a00, 0x3a00, 0x3a00, 0x3a00, 0x39f8,
|
||||
0x39f8, 0x39f8, 0x39f0, 0x39f0, 0x39f0, 0x39f0, 0x39e9, 0x39e9, 0x39e9, 0x39e9, 0x39e2, 0x39e2,
|
||||
0x39e2, 0x39e2, 0x39db, 0x39db, 0x39db, 0x39db, 0x39d4, 0x39d4, 0x39d4, 0x39d4, 0x39ce, 0x39ce,
|
||||
0x39ce, 0x39ce, 0x39c7, 0x39c7, 0x39c7, 0x39c7, 0x39c1, 0x39c1, 0x39c1, 0x39c1, 0x39bb, 0x39bb,
|
||||
0x39bb, 0x39bb, 0x39b5, 0x39b5, 0x39b5, 0x39b5, 0x39b0, 0x39b0,
|
||||
};
|
||||
|
||||
// FIXME: not hard code
|
||||
// contribute from hw, fix with `PRE_DATA` input
|
||||
static double sigmode_golden[] = {
|
||||
0.5, 0.501999989, 0.503999915, 0.505999712, 0.507999317, 0.509998667, 0.511997697,
|
||||
0.513996342, 0.515994541, 0.517992228, 0.51998934, 0.521985814, 0.523981585, 0.525976591,
|
||||
0.527970767, 0.529964052, 0.531956381, 0.533947691, 0.535937921, 0.537927006, 0.539914885,
|
||||
0.541901494, 0.543886772, 0.545870657, 0.547853086, 0.549833997, 0.55181333, 0.553791023,
|
||||
0.555767014, 0.557741243, 0.559713649, 0.561684172, 0.56365275, 0.565619325, 0.567583836,
|
||||
0.569546224, 0.571506429, 0.573464394, 0.575420058, 0.577373363, 0.579324252, 0.581272667,
|
||||
0.583218549, 0.585161842, 0.58710249, 0.589040434, 0.59097562, 0.59290799, 0.594837491,
|
||||
0.596764066, 0.59868766, 0.60060822, 0.60252569, 0.604440017, 0.606351149, 0.608259031,
|
||||
0.610163611, 0.612064837, 0.613962657, 0.61585702, 0.617747875, 0.61963517, 0.621518857,
|
||||
0.623398885, 0.625275204, 0.627147766, 0.629016523, 0.630881426, 0.632742428, 0.634599482,
|
||||
0.63645254, 0.638301558, 0.640146488, 0.641987286, 0.643823907, 0.645656306, 0.64748444,
|
||||
0.649308265, 0.651127739, 0.652942818, 0.654753461, 0.656559626, 0.658361272, 0.66015836,
|
||||
0.661950848, 0.663738697, 0.665521869, 0.667300325, 0.669074026, 0.670842936, 0.672607017,
|
||||
0.674366233, 0.676120548, 0.677869926, 0.679614333, 0.681353734, 0.683088095, 0.684817383,
|
||||
0.686541565, 0.688260608, 0.689974481, 0.691683153, 0.693386592, 0.695084769, 0.696777653,
|
||||
0.698465216, 0.700147429, 0.701824263, 0.703495691, 0.705161686, 0.706822221, 0.70847727,
|
||||
0.710126808, 0.71177081, 0.71340925, 0.715042106, 0.716669353, 0.718290968, 0.71990693,
|
||||
0.721517216, 0.723121805, 0.724720676, 0.726313808, 0.727901182, 0.729482779, 0.731058579,
|
||||
0.732628564, 0.734192716, 0.735751018, 0.737303454, 0.738850006, 0.740390659, 0.741925398,
|
||||
0.743454208, 0.744977074, 0.746493983, 0.748004922, 0.749509876, 0.751008835, 0.752501785,
|
||||
0.753988716, 0.755469617, 0.756944477, 0.758413287, 0.759876035, 0.761332715, 0.762783316,
|
||||
0.764227831, 0.765666252, 0.767098572, 0.768524783, 0.769944881, 0.771358858, 0.772766709,
|
||||
0.774168429, 0.775564014, 0.77695346, 0.778336762, 0.779713917, 0.781084923, 0.782449776,
|
||||
0.783808476, 0.78516102, 0.786507407, 0.787847636, 0.789181707, 0.790509619, 0.791831373,
|
||||
0.79314697, 0.794456411, 0.795759698, 0.797056831, 0.798347814, 0.79963265, 0.80091134,
|
||||
0.802183889, 0.803450299, 0.804710577, 0.805964724, 0.807212748, 0.808454651, 0.809690441,
|
||||
0.810920123, 0.812143702, 0.813361186, 0.814572581, 0.815777894, 0.816977132, 0.818170304,
|
||||
0.819357418, 0.820538481, 0.821713502, 0.82288249, 0.824045455, 0.825202406, 0.826353353,
|
||||
0.827498306, 0.828637274, 0.82977027, 0.830897303, 0.832018385, 0.833133528, 0.834242742,
|
||||
0.83534604, 0.836443435, 0.837534937, 0.838620561, 0.83970032, 0.840774225, 0.841842291,
|
||||
0.842904531, 0.843960959, 0.84501159, 0.846056436, 0.847095514, 0.848128836, 0.84915642,
|
||||
0.850178278, 0.851194427, 0.852204883, 0.85320966, 0.854208775, 0.855202244, 0.856190082,
|
||||
0.857172307, 0.858148935, 0.859119982, 0.860085466, 0.861045403, 0.861999811, 0.862948707,
|
||||
0.863892109, 0.864830034, 0.8657625, 0.866689525, 0.867611126, 0.868527324, 0.869438134,
|
||||
0.870343577, 0.871243671, 0.872138434, 0.873027885, 0.873912043, 0.874790928, 0.875664558,
|
||||
0.876532952, 0.877396131, 0.878254114, 0.879106919, 0.879954567, 0.880797078, 0.881634471,
|
||||
0.882466767, 0.883293985, 0.884116145, 0.884933268, 0.885745374, 0.886552483, 0.887354615,
|
||||
0.888151792, 0.888944033, 0.88973136, 0.890513792, 0.89129135, 0.892064056, 0.89283193,
|
||||
0.893594992, 0.894353264, 0.895106767, 0.895855521, 0.896599549, 0.897338869, 0.898073505,
|
||||
0.898803476, 0.899528804, 0.900249511, 0.900965617, 0.901677143, 0.902384111, 0.903086543,
|
||||
0.903784458, 0.90447788, 0.905166828, 0.905851324, 0.90653139, 0.907207047, 0.907878316,
|
||||
0.908545218, 0.909207776, 0.90986601, 0.910519941, 0.911169591, 0.911814981, 0.912456133,
|
||||
0.913093067, 0.913725806, 0.914354369, 0.91497878, 0.915599058, 0.916215226, 0.916827304,
|
||||
0.917435313, 0.918039275, 0.91863921, 0.919235141, 0.919827088, 0.920415072, 0.920999114,
|
||||
0.921579235, 0.922155456, 0.922727798, 0.923296282, 0.923860929, 0.92442176, 0.924978795,
|
||||
0.925532055, 0.926081561, 0.926627334, 0.927169394, 0.927707762, 0.928242458, 0.928773503,
|
||||
0.929300917, 0.929824721, 0.930344935, 0.93086158, 0.931374675, 0.931884241, 0.932390297,
|
||||
0.932892865, 0.933391964, 0.933887615, 0.934379836, 0.934868648, 0.93535407, 0.935836124,
|
||||
0.936314827, 0.9367902, 0.937262263, 0.937731034, 0.938196534, 0.938658781, 0.939117796,
|
||||
0.939573597, 0.940026203, 0.940475634, 0.940921909, 0.941365046, 0.941805065, 0.942241985,
|
||||
0.942675824, 0.943106601, 0.943534335, 0.943959044, 0.944380747, 0.944799462, 0.945215208,
|
||||
0.945628003, 0.946037865, 0.946444813, 0.946848864, 0.947250036, 0.947648348, 0.948043817,
|
||||
0.948436462, 0.948826299, 0.949213347, 0.949597623, 0.949979144, 0.950357929, 0.950733994,
|
||||
0.951107357, 0.951478034, 0.951846044, 0.952211402, 0.952574127, 0.952934234, 0.953291742,
|
||||
0.953646665, 0.953999022, 0.954348829, 0.954696102, 0.955040858, 0.955383113, 0.955722883,
|
||||
0.956060185, 0.956395034, 0.956727447, 0.95705744, 0.957385028, 0.957710228, 0.958033055,
|
||||
0.958353525, 0.958671653, 0.958987455, 0.959300946, 0.959612142, 0.959921058, 0.960227709,
|
||||
0.960532111, 0.960834277, 0.961134224, 0.961431966, 0.961727518, 0.962020894, 0.962312109,
|
||||
0.962601179, 0.962888117, 0.963172937, 0.963455655, 0.963736284, 0.964014838, 0.964291332,
|
||||
0.96456578, 0.964838195, 0.965108591, 0.965376983, 0.965643384, 0.965907808, 0.966170267,
|
||||
0.966430777, 0.966689349, 0.966945998, 0.967200737, 0.967453578, 0.967704535, 0.967953622,
|
||||
0.96820085, 0.968446233, 0.968689784, 0.968931516, 0.96917144, 0.969409571, 0.969645919,
|
||||
0.969880498, 0.97011332, 0.970344398, 0.970573743, 0.970801367, 0.971027284, 0.971251504,
|
||||
0.97147404, 0.971694904, 0.971914107, 0.972131661, 0.972347578, 0.972561869, 0.972774546,
|
||||
0.97298562, 0.973195103, 0.973403006, 0.973609341, 0.973814117, 0.974017347, 0.974219042,
|
||||
0.974419212, 0.974617868, 0.974815021, 0.975010683, 0.975204863, 0.975397572, 0.97558882,
|
||||
0.975778619, 0.975966979, 0.97615391, 0.976339422, 0.976523525, 0.97670623, 0.976887547,
|
||||
0.977067486, 0.977246057, 0.977423269, 0.977599132, 0.977773657, 0.977946853, 0.978118729,
|
||||
0.978289296, 0.978458562, 0.978626537, 0.978793231, 0.978958653, 0.979122812, 0.979285717,
|
||||
0.979447378, 0.979607804, 0.979767003, 0.979924985, 0.980081758, 0.980237332, 0.980391715,
|
||||
0.980544915, 0.980696943, 0.980847805, 0.980997512, 0.981146071, 0.98129349, 0.981439779,
|
||||
0.981584945, 0.981728996, 0.981871942, 0.98201379, 0.982154548, 0.982294225, 0.982432827,
|
||||
0.982570364, 0.982706843, 0.982842273, 0.982976659, 0.983110012, 0.983242337, 0.983373644,
|
||||
0.983503939, 0.983633229, 0.983761524, 0.983888829, 0.984015152, 0.9841405, 0.984264882,
|
||||
0.984388303, 0.984510772, 0.984632294, 0.984752879, 0.984872531, 0.984991259, 0.985109069,
|
||||
0.985225968, 0.985341963, 0.985457061, 0.985571269, 0.985684592, 0.985797039, 0.985908614,
|
||||
0.986019326, 0.98612918, 0.986238183, 0.986346341, 0.986453661, 0.986560148, 0.98666581,
|
||||
0.986770653, 0.986874682, 0.986977903, 0.987080324, 0.98718195, 0.987282786, 0.987382839,
|
||||
0.987482115, 0.98758062, 0.98767836, 0.987775339, 0.987871565, 0.987967043, 0.988061778,
|
||||
0.988155776, 0.988249042, 0.988341583, 0.988433404, 0.98852451, 0.988614907, 0.9887046,
|
||||
0.988793594, 0.988881895, 0.988969507, 0.989056437, 0.98914269, 0.98922827, 0.989313183,
|
||||
0.989397433, 0.989481027, 0.989563968, 0.989646262, 0.989727914, 0.989808929, 0.989889312,
|
||||
0.989969066, 0.990048198, 0.990126712, 0.990204613, 0.990281905, 0.990358593, 0.990434681,
|
||||
0.990510175, 0.990585079, 0.990659397, 0.990733134, 0.990806295, 0.990878883, 0.990950903,
|
||||
0.99102236, 0.991093257, 0.9911636, 0.991233391, 0.991302637, 0.99137134, 0.991439506,
|
||||
0.991507137, 0.991574239, 0.991640815, 0.991706869, 0.991772406, 0.991837429, 0.991901942,
|
||||
0.99196595, 0.992029456, 0.992092463, 0.992154977, 0.992217, 0.992278537, 0.992339591,
|
||||
0.992400166, 0.992460265, 0.992519893, 0.992579053, 0.992637749, 0.992695983, 0.99275376,
|
||||
0.992811084, 0.992867957, 0.992924384, 0.992980367, 0.993035911, 0.993091018, 0.993145692,
|
||||
0.993199936, 0.993253754, 0.993307149, 0.993360124, 0.993412683, 0.993464828, 0.993516563,
|
||||
0.993567892, 0.993618816, 0.99366934, 0.993719466, 0.993769198, 0.993818539, 0.993867491,
|
||||
0.993916059, 0.993964243, 0.994012049, 0.994059478, 0.994106533, 0.994153219, 0.994199536,
|
||||
0.994245489, 0.994291079, 0.994336311, 0.994381186, 0.994425708, 0.994469878, 0.994513701,
|
||||
0.994557178, 0.994600313, 0.994643108, 0.994685565, 0.994727688, 0.994769478, 0.994810939,
|
||||
0.994852073, 0.994892883, 0.994933371, 0.994973539, 0.995013391, 0.995052928, 0.995092153,
|
||||
0.995131069, 0.995169677, 0.995207981, 0.995245983, 0.995283685, 0.995321089, 0.995358198,
|
||||
0.995395014, 0.995431539, 0.995467776, 0.995503727, 0.995539394, 0.995574779, 0.995609885,
|
||||
0.995644713, 0.995679266, 0.995713547, 0.995747556, 0.995781297, 0.995814772, 0.995847981,
|
||||
0.995880929, 0.995913616, 0.995946044, 0.995978217, 0.996010135, 0.996041801, 0.996073216,
|
||||
0.996104383, 0.996135304, 0.99616598, 0.996196413, 0.996226606, 0.996256561, 0.996286278,
|
||||
0.99631576, 0.996345009, 0.996374027, 0.996402815, 0.996431375, 0.99645971, 0.99648782,
|
||||
0.996515708, 0.996543375, 0.996570823, 0.996598054, 0.99662507, 0.996651872, 0.996678461,
|
||||
0.99670484, 0.99673101, 0.996756974, 0.996782731, 0.996808285, 0.996833636, 0.996858787,
|
||||
0.996883738, 0.996908492, 0.99693305, 0.996957413, 0.996981584, 0.997005563, 0.997029352,
|
||||
0.997052952, 0.997076366, 0.997099594, 0.997122638, 0.9971455, 0.99716818, 0.997190681,
|
||||
0.997213004, 0.997235149, 0.99725712, 0.997278916, 0.997300539, 0.997321991, 0.997343273,
|
||||
0.997364386, 0.997385332, 0.997406112, 0.997426727, 0.997447179, 0.997467468, 0.997487597,
|
||||
0.997507566, 0.997527377, 0.997547031, 0.997566528, 0.997585872, 0.997605062, 0.997624099,
|
||||
0.997642986, 0.997661723, 0.997680312, 0.997698752, 0.997717047, 0.997735197, 0.997753202,
|
||||
0.997771065, 0.997788786, 0.997806367, 0.997823808, 0.99784111, 0.997858276, 0.997875305,
|
||||
0.997892199, 0.997908959, 0.997925586, 0.997942081, 0.997958445, 0.99797468, 0.997990785,
|
||||
0.998006763, 0.998022614, 0.998038339, 0.998053939, 0.998069415, 0.998084769, 0.998100001,
|
||||
0.998115112, 0.998130102, 0.998144974, 0.998159728, 0.998174365, 0.998188885, 0.99820329,
|
||||
0.998217581, 0.998231759, 0.998245823, 0.998259777, 0.998273619, 0.998287351, 0.998300975,
|
||||
0.99831449, 0.998327898, 0.998341199, 0.998354395, 0.998367486, 0.998380473, 0.998393356,
|
||||
0.998406138, 0.998418818, 0.998431397, 0.998443876, 0.998456256, 0.998468538, 0.998480723,
|
||||
0.99849281, 0.998504802, 0.998516698, 0.998528499, 0.998540207, 0.998551822, 0.998563345,
|
||||
0.998574776, 0.998586116, 0.998597366, 0.998608527, 0.998619599, 0.998630583, 0.99864148,
|
||||
0.99865229, 0.998663015, 0.998673654, 0.998684208, 0.998694679, 0.998705066, 0.998715371,
|
||||
0.998725594, 0.998735736, 0.998745797, 0.998755778, 0.99876568, 0.998775503, 0.998785248,
|
||||
0.998794916, 0.998804507, 0.998814021, 0.99882346, 0.998832824, 0.998842113, 0.998851329,
|
||||
0.998860471, 0.998869541, 0.998878538, 0.998887464, 0.998896319, 0.998905104, 0.998913818,
|
||||
0.998922464, 0.99893104, 0.998939549, 0.99894799, 0.998956364, 0.998964671, 0.998972912,
|
||||
0.998981088, 0.998989198, 0.998997244, 0.999005226, 0.999013145, 0.999021001, 0.999028794,
|
||||
0.999036525, 0.999044195, 0.999051803, 0.999059352, 0.99906684, 0.999074268, 0.999081638,
|
||||
0.999088949, 0.999096202, 0.999103397, 0.999110535, 0.999117616, 0.99912464, 0.999131609,
|
||||
0.999138523, 0.999145381, 0.999152185, 0.999158935, 0.999165631, 0.999172274, 0.999178864,
|
||||
0.999185401, 0.999191887, 0.999198321, 0.999204704, 0.999211036, 0.999217317, 0.999223549,
|
||||
0.999229731, 0.999235864, 0.999241948, 0.999247984, 0.999253971, 0.999259911, 0.999265804,
|
||||
0.99927165, 0.999277449, 0.999283202, 0.99928891, 0.999294572, 0.999300189, 0.999305761,
|
||||
0.999311289, 0.999316773, 0.999322213, 0.99932761, 0.999332964, 0.999338276, 0.999343545,
|
||||
0.999348772, 0.999353958, 0.999359103, 0.999364206, 0.999369269, 0.999374291, 0.999379274,
|
||||
0.999384217, 0.999389121, 0.999393985, 0.999398811, 0.999403599, 0.999408348, 0.99941306,
|
||||
0.999417734, 0.99942237, 0.99942697, 0.999431534, 0.999436061, 0.999440552, 0.999445007,
|
||||
0.999449427, 0.999453811, 0.999458161, 0.999462476, 0.999466757, 0.999471004, 0.999475217,
|
||||
0.999479396, 0.999483542, 0.999487655, 0.999491735, 0.999495783, 0.999499799, 0.999503783,
|
||||
0.999507735, 0.999511655, 0.999515544, 0.999519403, 0.99952323, 0.999527027, 0.999530794,
|
||||
0.999534531, 0.999538238, 0.999541916, 0.999545564, 0.999549184, 0.999552774, 0.999556336,
|
||||
0.99955987, 0.999563375, 0.999566853, 0.999570303, 0.999573725, 0.99957712, 0.999580488,
|
||||
0.99958383, 0.999587145, 0.999590433, 0.999593695, 0.999596931, 0.999600142, 0.999603326,
|
||||
0.999606486, 0.99960962, 0.99961273, 0.999615814, 0.999618874, 0.99962191, 0.999624921,
|
||||
0.999627909, 0.999630873, 0.999633813, 0.99963673, 0.999639623, 0.999642494, 0.999645341,
|
||||
0.999648166, 0.999650969, 0.999653749, 0.999656507, 0.999659243, 0.999661957, 0.498000011,
|
||||
0.496000085, 0.494000288, 0.492000683, 0.490001333, 0.488002303, 0.486003658, 0.484005459,
|
||||
0.482007772, 0.48001066, 0.478014186, 0.476018415, 0.474023409, 0.472029233, 0.470035948,
|
||||
0.468043619, 0.466052309, 0.464062079, 0.462072994, 0.460085115, 0.458098506, 0.456113228,
|
||||
0.454129343, 0.452146914, 0.450166003, 0.44818667, 0.446208977, 0.444232986, 0.442258757,
|
||||
0.440286351, 0.438315828, 0.43634725, 0.434380675, 0.432416164, 0.430453776, 0.428493571,
|
||||
0.426535606, 0.424579942, 0.422626637, 0.420675748, 0.418727333, 0.416781451, 0.414838158,
|
||||
0.41289751, 0.410959566, 0.40902438, 0.40709201, 0.405162509, 0.403235934, 0.40131234,
|
||||
0.39939178, 0.39747431, 0.395559983, 0.393648851, 0.391740969, 0.389836389, 0.387935163,
|
||||
0.386037343, 0.38414298, 0.382252125, 0.38036483, 0.378481143, 0.376601115, 0.374724796,
|
||||
0.372852234, 0.370983477, 0.369118574, 0.367257572, 0.365400518, 0.36354746, 0.361698442,
|
||||
0.359853512, 0.358012714, 0.356176093, 0.354343694, 0.35251556, 0.350691735, 0.348872261,
|
||||
0.347057182, 0.345246539, 0.343440374, 0.341638728, 0.33984164, 0.338049152, 0.336261303,
|
||||
0.334478131, 0.332699675, 0.330925974, 0.329157064, 0.327392983, 0.325633767, 0.323879452,
|
||||
0.322130074, 0.320385667, 0.318646266, 0.316911905, 0.315182617, 0.313458435, 0.311739392,
|
||||
0.310025519, 0.308316847, 0.306613408, 0.304915231, 0.303222347, 0.301534784, 0.299852571,
|
||||
0.298175737, 0.296504309, 0.294838314, 0.293177779, 0.29152273, 0.289873192, 0.28822919,
|
||||
0.28659075, 0.284957894, 0.283330647, 0.281709032, 0.28009307, 0.278482784, 0.276878195,
|
||||
0.275279324, 0.273686192, 0.272098818, 0.270517221, 0.268941421, 0.267371436, 0.265807284,
|
||||
0.264248982, 0.262696546, 0.261149994, 0.259609341, 0.258074602, 0.256545792, 0.255022926,
|
||||
0.253506017, 0.251995078, 0.250490124, 0.248991165, 0.247498215, 0.246011284, 0.244530383,
|
||||
0.243055523, 0.241586713, 0.240123965, 0.238667285, 0.237216684, 0.235772169, 0.234333748,
|
||||
0.232901428, 0.231475217, 0.230055119, 0.228641142, 0.227233291, 0.225831571, 0.224435986,
|
||||
0.22304654, 0.221663238, 0.220286083, 0.218915077, 0.217550224, 0.216191524, 0.21483898,
|
||||
0.213492593, 0.212152364, 0.210818293, 0.209490381, 0.208168627, 0.20685303, 0.205543589,
|
||||
0.204240302, 0.202943169, 0.201652186, 0.20036735, 0.19908866, 0.197816111, 0.196549701,
|
||||
0.195289423, 0.194035276, 0.192787252, 0.191545349, 0.190309559, 0.189079877, 0.187856298,
|
||||
0.186638814, 0.185427419, 0.184222106, 0.183022868, 0.181829696, 0.180642582, 0.179461519,
|
||||
0.178286498, 0.17711751, 0.175954545, 0.174797594, 0.173646647, 0.172501694, 0.171362726,
|
||||
0.17022973, 0.169102697, 0.167981615, 0.166866472, 0.165757258, 0.16465396, 0.163556565,
|
||||
0.162465063, 0.161379439, 0.16029968, 0.159225775, 0.158157709, 0.157095469, 0.156039041,
|
||||
0.15498841, 0.153943564, 0.152904486, 0.151871164, 0.15084358, 0.149821722, 0.148805573,
|
||||
0.147795117, 0.14679034, 0.145791225, 0.144797756, 0.143809918, 0.142827693, 0.141851065,
|
||||
0.140880018, 0.139914534, 0.138954597, 0.138000189, 0.137051293, 0.136107891, 0.135169966,
|
||||
0.1342375, 0.133310475, 0.132388874, 0.131472676, 0.130561866, 0.129656423, 0.128756329,
|
||||
0.127861566, 0.126972115, 0.126087957, 0.125209072, 0.124335442, 0.123467048, 0.122603869,
|
||||
0.121745886, 0.120893081, 0.120045433, 0.119202922, 0.118365529, 0.117533233, 0.116706015,
|
||||
0.115883855, 0.115066732, 0.114254626, 0.113447517, 0.112645385, 0.111848208, 0.111055967,
|
||||
0.11026864, 0.109486208, 0.10870865, 0.107935944, 0.10716807, 0.106405008, 0.105646736,
|
||||
0.104893233, 0.104144479, 0.103400451, 0.102661131, 0.101926495, 0.101196524, 0.100471196,
|
||||
0.099750489, 0.099034383, 0.098322857, 0.097615889, 0.096913457, 0.096215542, 0.09552212,
|
||||
0.094833172, 0.094148676, 0.09346861, 0.092792953, 0.092121684, 0.091454782, 0.090792224,
|
||||
0.09013399, 0.089480059, 0.088830409, 0.088185019, 0.087543867, 0.086906933, 0.086274194,
|
||||
0.085645631, 0.08502122, 0.084400942, 0.083784774, 0.083172696, 0.082564687, 0.081960725,
|
||||
0.08136079, 0.080764859, 0.080172912, 0.079584928, 0.079000886, 0.078420765, 0.077844544,
|
||||
0.077272202, 0.076703718, 0.076139071, 0.07557824, 0.075021205, 0.074467945, 0.073918439,
|
||||
0.073372666, 0.072830606, 0.072292238, 0.071757542, 0.071226497, 0.070699083, 0.070175279,
|
||||
0.069655065, 0.06913842, 0.068625325, 0.068115759, 0.067609703, 0.067107135, 0.066608036,
|
||||
0.066112385, 0.065620164, 0.065131352, 0.06464593, 0.064163876, 0.063685173, 0.0632098,
|
||||
0.062737737, 0.062268966, 0.061803466, 0.061341219, 0.060882204, 0.060426403, 0.059973797,
|
||||
0.059524366, 0.059078091, 0.058634954, 0.058194935, 0.057758015, 0.057324176, 0.056893399,
|
||||
0.056465665, 0.056040956, 0.055619253, 0.055200538, 0.054784792, 0.054371997, 0.053962135,
|
||||
0.053555187, 0.053151136, 0.052749964, 0.052351652, 0.051956183, 0.051563538, 0.051173701,
|
||||
0.050786653, 0.050402377, 0.050020856, 0.049642071, 0.049266006, 0.048892643, 0.048521966,
|
||||
0.048153956, 0.047788598, 0.047425873, 0.047065766, 0.046708258, 0.046353335, 0.046000978,
|
||||
0.045651171, 0.045303898, 0.044959142, 0.044616887, 0.044277117, 0.043939815, 0.043604966,
|
||||
0.043272553, 0.04294256, 0.042614972, 0.042289772, 0.041966945, 0.041646475, 0.041328347,
|
||||
0.041012545, 0.040699054, 0.040387858, 0.040078942, 0.039772291, 0.039467889, 0.039165723,
|
||||
0.038865776, 0.038568034, 0.038272482, 0.037979106, 0.037687891, 0.037398821, 0.037111883,
|
||||
0.036827063, 0.036544345, 0.036263716, 0.035985162, 0.035708668, 0.03543422, 0.035161805,
|
||||
0.034891409, 0.034623017, 0.034356616, 0.034092192, 0.033829733, 0.033569223, 0.033310651,
|
||||
0.033054002, 0.032799263, 0.032546422, 0.032295465, 0.032046378, 0.03179915, 0.031553767,
|
||||
0.031310216, 0.031068484, 0.03082856, 0.030590429, 0.030354081, 0.030119502, 0.02988668,
|
||||
0.029655602, 0.029426257, 0.029198633, 0.028972716, 0.028748496, 0.02852596, 0.028305096,
|
||||
0.028085893, 0.027868339, 0.027652422, 0.027438131, 0.027225454, 0.02701438, 0.026804897,
|
||||
0.026596994, 0.026390659, 0.026185883, 0.025982653, 0.025780958, 0.025580788, 0.025382132,
|
||||
0.025184979, 0.024989317, 0.024795137, 0.024602428, 0.02441118, 0.024221381, 0.024033021,
|
||||
0.02384609, 0.023660578, 0.023476475, 0.02329377, 0.023112453, 0.022932514, 0.022753943,
|
||||
0.022576731, 0.022400868, 0.022226343, 0.022053147, 0.021881271, 0.021710704, 0.021541438,
|
||||
0.021373463, 0.021206769, 0.021041347, 0.020877188, 0.020714283, 0.020552622, 0.020392196,
|
||||
0.020232997, 0.020075015, 0.019918242, 0.019762668, 0.019608285, 0.019455085, 0.019303057,
|
||||
0.019152195, 0.019002488, 0.018853929, 0.01870651, 0.018560221, 0.018415055, 0.018271004,
|
||||
0.018128058, 0.01798621, 0.017845452, 0.017705775, 0.017567173, 0.017429636, 0.017293157,
|
||||
0.017157727, 0.017023341, 0.016889988, 0.016757663, 0.016626356, 0.016496061, 0.016366771,
|
||||
0.016238476, 0.016111171, 0.015984848, 0.0158595, 0.015735118, 0.015611697, 0.015489228,
|
||||
0.015367706, 0.015247121, 0.015127469, 0.015008741, 0.014890931, 0.014774032, 0.014658037,
|
||||
0.014542939, 0.014428731, 0.014315408, 0.014202961, 0.014091386, 0.013980674, 0.01387082,
|
||||
0.013761817, 0.013653659, 0.013546339, 0.013439852, 0.01333419, 0.013229347, 0.013125318,
|
||||
0.013022097, 0.012919676, 0.01281805, 0.012717214, 0.012617161, 0.012517885, 0.01241938,
|
||||
0.01232164, 0.012224661, 0.012128435, 0.012032957, 0.011938222, 0.011844224, 0.011750958,
|
||||
0.011658417, 0.011566596, 0.01147549, 0.011385093, 0.0112954, 0.011206406, 0.011118105,
|
||||
0.011030493, 0.010943563, 0.01085731, 0.01077173, 0.010686817, 0.010602567, 0.010518973,
|
||||
0.010436032, 0.010353738, 0.010272086, 0.010191071, 0.010110688, 0.010030934, 0.009951802,
|
||||
0.009873288, 0.009795387, 0.009718095, 0.009641407, 0.009565319, 0.009489825, 0.009414921,
|
||||
0.009340603, 0.009266866, 0.009193705, 0.009121117, 0.009049097, 0.00897764, 0.008906743,
|
||||
0.0088364, 0.008766609, 0.008697363, 0.00862866, 0.008560494, 0.008492863, 0.008425761,
|
||||
0.008359185, 0.008293131, 0.008227594, 0.008162571, 0.008098058, 0.00803405, 0.007970544,
|
||||
0.007907537, 0.007845023, 0.007783, 0.007721463, 0.007660409, 0.007599834, 0.007539735,
|
||||
0.007480107, 0.007420947, 0.007362251, 0.007304017, 0.00724624, 0.007188916, 0.007132043,
|
||||
0.007075616, 0.007019633, 0.006964089, 0.006908982, 0.006854308, 0.006800064, 0.006746246,
|
||||
0.006692851, 0.006639876, 0.006587317, 0.006535172, 0.006483437, 0.006432108, 0.006381184,
|
||||
0.00633066, 0.006280534, 0.006230802, 0.006181461, 0.006132509, 0.006083941, 0.006035757,
|
||||
0.005987951, 0.005940522, 0.005893467, 0.005846781, 0.005800464, 0.005754511, 0.005708921,
|
||||
0.005663689, 0.005618814, 0.005574292, 0.005530122, 0.005486299, 0.005442822, 0.005399687,
|
||||
0.005356892, 0.005314435, 0.005272312, 0.005230522, 0.005189061, 0.005147927, 0.005107117,
|
||||
0.005066629, 0.005026461, 0.004986609, 0.004947072, 0.004907847, 0.004868931, 0.004830323,
|
||||
0.004792019, 0.004754017, 0.004716315, 0.004678911, 0.004641802, 0.004604986, 0.004568461,
|
||||
0.004532224, 0.004496273, 0.004460606, 0.004425221, 0.004390115, 0.004355287, 0.004320734,
|
||||
0.004286453, 0.004252444, 0.004218703, 0.004185228, 0.004152019, 0.004119071, 0.004086384,
|
||||
0.004053956, 0.004021783, 0.003989865, 0.003958199, 0.003926784, 0.003895617, 0.003864696,
|
||||
0.00383402, 0.003803587, 0.003773394, 0.003743439, 0.003713722, 0.00368424, 0.003654991,
|
||||
0.003625973, 0.003597185, 0.003568625, 0.00354029, 0.00351218, 0.003484292, 0.003456625,
|
||||
0.003429177, 0.003401946, 0.00337493, 0.003348128, 0.003321539, 0.00329516, 0.00326899,
|
||||
0.003243026, 0.003217269, 0.003191715, 0.003166364, 0.003141213, 0.003116262, 0.003091508,
|
||||
0.00306695, 0.003042587, 0.003018416, 0.002994437, 0.002970648, 0.002947048, 0.002923634,
|
||||
0.002900406, 0.002877362, 0.0028545, 0.00283182, 0.002809319, 0.002786996, 0.002764851,
|
||||
0.00274288, 0.002721084, 0.002699461, 0.002678009, 0.002656727, 0.002635614, 0.002614668,
|
||||
0.002593888, 0.002573273, 0.002552821, 0.002532532, 0.002512403, 0.002492434, 0.002472623,
|
||||
0.002452969, 0.002433472, 0.002414128, 0.002394938, 0.002375901, 0.002357014, 0.002338277,
|
||||
0.002319688, 0.002301248, 0.002282953, 0.002264803, 0.002246798, 0.002228935, 0.002211214,
|
||||
0.002193633, 0.002176192, 0.00215889, 0.002141724, 0.002124695, 0.002107801, 0.002091041,
|
||||
0.002074414, 0.002057919, 0.002041555, 0.00202532, 0.002009215, 0.001993237, 0.001977386,
|
||||
0.001961661, 0.001946061, 0.001930585, 0.001915231, 0.001899999, 0.001884888, 0.001869898,
|
||||
0.001855026, 0.001840272, 0.001825635, 0.001811115, 0.00179671, 0.001782419, 0.001768241,
|
||||
0.001754177, 0.001740223, 0.001726381, 0.001712649, 0.001699025, 0.00168551, 0.001672102,
|
||||
0.001658801, 0.001645605, 0.001632514, 0.001619527, 0.001606644, 0.001593862, 0.001581182,
|
||||
0.001568603, 0.001556124, 0.001543744, 0.001531462, 0.001519277, 0.00150719, 0.001495198,
|
||||
0.001483302, 0.001471501, 0.001459793, 0.001448178, 0.001436655, 0.001425224, 0.001413884,
|
||||
0.001402634, 0.001391473, 0.001380401, 0.001369417, 0.00135852, 0.00134771, 0.001336985,
|
||||
0.001326346, 0.001315792, 0.001305321, 0.001294934, 0.001284629, 0.001274406, 0.001264264,
|
||||
0.001254203, 0.001244222, 0.00123432, 0.001224497, 0.001214752, 0.001205084, 0.001195493,
|
||||
0.001185979, 0.00117654, 0.001167176, 0.001157887, 0.001148671, 0.001139529, 0.001130459,
|
||||
0.001121462, 0.001112536, 0.001103681, 0.001094896, 0.001086182, 0.001077536, 0.00106896,
|
||||
0.001060451, 0.00105201, 0.001043636, 0.001035329, 0.001027088, 0.001018912, 0.001010802,
|
||||
0.001002756, 0.000994774, 0.000986855, 0.000978999, 0.000971206, 0.000963475, 0.000955805,
|
||||
0.000948197, 0.000940648, 0.00093316, 0.000925732, 0.000918362, 0.000911051, 0.000903798,
|
||||
0.000896603, 0.000889465, 0.000882384, 0.00087536, 0.000868391, 0.000861477, 0.000854619,
|
||||
0.000847815, 0.000841065, 0.000834369, 0.000827726, 0.000821136, 0.000814599, 0.000808113,
|
||||
0.000801679, 0.000795296, 0.000788964, 0.000782683, 0.000776451, 0.000770269, 0.000764136,
|
||||
0.000758052, 0.000752016, 0.000746029, 0.000740089, 0.000734196, 0.00072835, 0.000722551,
|
||||
0.000716798, 0.00071109, 0.000705428, 0.000699811, 0.000694239, 0.000688711, 0.000683227,
|
||||
0.000677787, 0.00067239, 0.000667036, 0.000661724, 0.000656455, 0.000651228, 0.000646042,
|
||||
0.000640897, 0.000635794, 0.000630731, 0.000625709, 0.000620726, 0.000615783, 0.000610879,
|
||||
0.000606015, 0.000601189, 0.000596401, 0.000591652, 0.00058694, 0.000582266, 0.00057763,
|
||||
0.00057303, 0.000568466, 0.000563939, 0.000559448, 0.000554993, 0.000550573, 0.000546189,
|
||||
0.000541839, 0.000537524, 0.000533243, 0.000528996, 0.000524783, 0.000520604, 0.000516458,
|
||||
0.000512345, 0.000508265, 0.000504217, 0.000500201, 0.000496217, 0.000492265, 0.000488345,
|
||||
0.000484456, 0.000480597, 0.00047677, 0.000472973, 0.000469206, 0.000465469, 0.000461762,
|
||||
0.000458084, 0.000454436, 0.000450816, 0.000447226, 0.000443664, 0.00044013, 0.000436625,
|
||||
0.000433147, 0.000429697, 0.000426275, 0.00042288, 0.000419512, 0.00041617, 0.000412855,
|
||||
0.000409567, 0.000406305, 0.000403069, 0.000399858, 0.000396674, 0.000393514, 0.00039038,
|
||||
0.00038727, 0.000384186, 0.000381126, 0.00037809, 0.000375079, 0.000372091, 0.000369127,
|
||||
0.000366187, 0.00036327, 0.000360377, 0.000357506, 0.000354659, 0.000351834, 0.000349031,
|
||||
0.000346251, 0.000343493, 0.000340757, 0.000338043, 0.00033535};
|
||||
|
||||
// static bool check_input_int8_range(float input)
|
||||
//{
|
||||
// bool ret = input > -128.0 && input < 128.0;
|
||||
// if (!ret) {
|
||||
// printf("invalid int8 range, input is %f\n", input);
|
||||
// }
|
||||
// return ret;
|
||||
//}
|
||||
|
||||
static double _gen_sigmoid(float x) { return 1.0 / (1.0 + exp(-(x))); }
|
||||
|
||||
static void tl_lut_ref(uint16_t *ofmap, uint16_t *ifmap, uint16_t *table, uint16_t *table_slope,
|
||||
cvk_tl_shape_t ifmap_shape, cvk_tl_shape_t table_shape, int range_start,
|
||||
int range_end) {
|
||||
int tn, th, tw;
|
||||
|
||||
tn = table_shape.n;
|
||||
th = table_shape.h;
|
||||
tw = table_shape.w;
|
||||
(void)tn;
|
||||
(void)th;
|
||||
(void)tw;
|
||||
(void)table;
|
||||
(void)table_slope;
|
||||
(void)range_start;
|
||||
(void)range_end;
|
||||
assert(tn == 1);
|
||||
assert(th * tw == 256);
|
||||
assert(table);
|
||||
assert(table_slope);
|
||||
assert(ifmap_shape.n);
|
||||
assert(ifmap);
|
||||
assert(ofmap);
|
||||
|
||||
// TODO: use c function
|
||||
// 1. dump all input as binary file
|
||||
#ifdef GDB
|
||||
#define INFP32FILE "infp32file.bin"
|
||||
#define OUTBF16FILE "lutbf16out.bin"
|
||||
FILE *pFile;
|
||||
pFile = fopen(INFP32FILE, "wb");
|
||||
int shape_sz = tl_shape_size(&ifmap_shape);
|
||||
float *f = new float[shape_sz];
|
||||
for (int i = 0; i < shape_sz; i++) {
|
||||
f[i] = convert_bf16_fp32(ifmap[i]);
|
||||
}
|
||||
fwrite(f, 1, shape_sz * sizeof(float), pFile);
|
||||
fclose(pFile);
|
||||
|
||||
// 2. read result from `eval_lut.py`
|
||||
char command[256];
|
||||
sprintf(command,
|
||||
"python eval_lut.py --lut_input_range_start %d --lut_input_range_end "
|
||||
"%d --inputfloat32 %s --outputbf16 %s 2>&1 > 2\n",
|
||||
range_start, range_end, INFP32FILE, OUTBF16FILE);
|
||||
|
||||
int r;
|
||||
r = system(command);
|
||||
printf("command is %s, return %d\n", command, r);
|
||||
assert(r != 0);
|
||||
|
||||
pFile = fopen(OUTBF16FILE, "rb");
|
||||
if (!pFile) {
|
||||
fprintf(stderr, "open golden %s fail\n", OUTBF16FILE);
|
||||
exit(-1);
|
||||
}
|
||||
|
||||
size_t file_length;
|
||||
file_length = fread(ofmap, sizeof(uint16_t), tl_shape_size(&ifmap_shape), pFile);
|
||||
printf("read from golden, file size %lu\n", file_length);
|
||||
fclose(pFile);
|
||||
#else
|
||||
assert(range_start);
|
||||
assert(range_end);
|
||||
for (uint64_t i = 0; i < tl_shape_size(&ifmap_shape); i++) {
|
||||
ofmap[i] = convert_fp32_bf16(_gen_sigmoid(convert_bf16_fp32(ifmap[i])));
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifdef GDB
|
||||
for (uint64_t i = 0; i < tl_shape_size(&ifmap_shape); i++) {
|
||||
printf("ref %lu input 0x%x(%f) golden 0x%x(%f)\n", i, ifmap[i], convert_bf16_fp32(ifmap[i]),
|
||||
ofmap[i], convert_bf16_fp32(ofmap[i]));
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
static bool verify(uint16_t *ofmap_data, uint16_t *ref_data, uint64_t ofmap_size) {
|
||||
int count = 0;
|
||||
uint64_t size = ofmap_size;
|
||||
if (mode == PRE_DATA_COMPARE_FIX) {
|
||||
size = sizeof(sigmode_golden_bf16) / sizeof(uint16_t);
|
||||
} else if (PRE_DATA_MAX_ERROR) {
|
||||
size = sizeof(sigmode_golden) / sizeof(double);
|
||||
}
|
||||
|
||||
for (uint64_t i = 0; i < size; i++) {
|
||||
if (mode == PRE_DATA_COMPARE_FIX) {
|
||||
if (ofmap_data[i] != sigmode_golden_bf16[i]) {
|
||||
fprintf(stderr, "[%d] comparing failed at ofmap_data[%lu], got %x, exp %x\n", count, i,
|
||||
ofmap_data[i], sigmode_golden_bf16[i]);
|
||||
exit(-1);
|
||||
}
|
||||
} else {
|
||||
float got = convert_bf16_fp32(ofmap_data[i]);
|
||||
float exp = convert_bf16_fp32(ref_data[i]);
|
||||
|
||||
if (mode == PRE_DATA_MAX_ERROR) {
|
||||
// cus we have better accuracy ~ 0.0039
|
||||
exp = sigmode_golden[i];
|
||||
}
|
||||
|
||||
if (fabs(got - exp) > MAX_ERROR) {
|
||||
fprintf(stderr,
|
||||
"[%d] comparing failed at ofmap_data[%lu], got %x, exp %x, "
|
||||
"diff(%f - %f) is %f\n",
|
||||
count, i, ofmap_data[i], ref_data[i], got, exp, fabs(got - exp));
|
||||
count++;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (count != 0) {
|
||||
printf("error count is %d\n", count);
|
||||
exit(-1);
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
static void gen_input(uint16_t *ifmap, uint64_t ifmap_size) {
|
||||
if (mode == PRE_DATA_COMPARE_FIX || mode == PRE_DATA_MAX_ERROR) {
|
||||
memcpy(ifmap, &test_pattern, sizeof(test_pattern));
|
||||
|
||||
#ifdef GDB
|
||||
for (uint64_t i = 0; i < ifmap_size; i++) {
|
||||
printf("source if[%lu] is bf16 %f (bf16)with 0x%x\n", i, convert_bf16_fp32(ifmap[i]),
|
||||
ifmap[i]);
|
||||
}
|
||||
#endif
|
||||
} else {
|
||||
int table_hw = 256;
|
||||
for (uint64_t i = 0; i < ifmap_size; i++) {
|
||||
// input range is -8 ~ +8
|
||||
float input = ((int)i % 7) * (((int)i % 2) ? 1 : -1) + 0.03 + (i % table_hw) * 0.002;
|
||||
// float input = ((int)i % 10) * (((int)i % 2) ? 1 : -1) + 0.03 + (i %
|
||||
// table_hw) * 0.002;
|
||||
// assert(check_input_int8_range(input));
|
||||
ifmap[i] = convert_fp32_bf16(input);
|
||||
#ifdef GDB
|
||||
printf("source if[%lu] is bf16 %f, input is %f (bf16)with 0x%x\n", i,
|
||||
convert_bf16_fp32(ifmap[i]), input, ifmap[i]);
|
||||
#endif
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static void testbench(CVI_RT_HANDLE *ctx, cvk_context_t *bmk) {
|
||||
// TODO: check more shape / align
|
||||
cvk_tl_shape_t ifmap_shape;
|
||||
if (mode == PRE_DATA_COMPARE_FIX || mode == PRE_DATA_MAX_ERROR) {
|
||||
ifmap_shape = {1, 32, 8, 8};
|
||||
} else {
|
||||
ifmap_shape = {1, 32, 16, 16};
|
||||
}
|
||||
|
||||
cvk_fmt_t fmt = CVK_FMT_BF16;
|
||||
|
||||
// get table / input shape
|
||||
cvk_tl_shape_t table_shape;
|
||||
cvm_table_shape(bmk, &table_shape);
|
||||
cvk_tl_shape_t ofmap_shape = ifmap_shape;
|
||||
|
||||
uint64_t ifmap_size = tl_shape_size(&ifmap_shape);
|
||||
uint64_t table_size = tl_shape_size(&table_shape);
|
||||
uint64_t ofmap_size = tl_shape_size(&ofmap_shape);
|
||||
|
||||
int data_type_size = bytesize_of_fmt(fmt);
|
||||
uint64_t ifmap_bytesize = ifmap_size * data_type_size;
|
||||
uint64_t table_bytesize = table_size * data_type_size;
|
||||
uint64_t ofmap_bytesize = ofmap_size * data_type_size;
|
||||
|
||||
// alloc tg
|
||||
uint16_t *ifmap = (uint16_t *)xmalloc(ifmap_bytesize);
|
||||
uint16_t *table_data = (uint16_t *)xmalloc(table_bytesize);
|
||||
uint16_t *table_data_slope = (uint16_t *)xmalloc(table_bytesize);
|
||||
uint16_t *ref_data = (uint16_t *)xmalloc(ofmap_bytesize);
|
||||
|
||||
// range depend on ur activation
|
||||
int range_start = -8;
|
||||
int range_end = 8;
|
||||
float scale = cvm_sigmoid_scale(range_start, range_end);
|
||||
|
||||
// fill tg value
|
||||
gen_input(ifmap, ifmap_size);
|
||||
cvm_sigmoid_tbl(table_data, table_data_slope, &table_shape, range_start, range_end);
|
||||
tl_lut_ref(ref_data, ifmap, table_data, table_data_slope, ifmap_shape, table_shape, range_start,
|
||||
range_end);
|
||||
|
||||
// alloc tl
|
||||
cvk_tl_t *tl_ifmap = test_alloc_tl(bmk, ifmap_shape, fmt, /*align*/ 1);
|
||||
cvk_tl_t *cvk_tl_table_answer = test_alloc_tl(bmk, table_shape, fmt, /*align*/ 1);
|
||||
cvk_tl_t *cvk_tl_table_answer_slope = test_alloc_tl(bmk, table_shape, fmt, /*align*/ 1);
|
||||
cvk_tl_t *tl_buf = test_alloc_tl(bmk, ofmap_shape, fmt, /*align*/ 1);
|
||||
cvk_tl_t *tl_ofmap_bf16 = test_alloc_tl(bmk, ofmap_shape, fmt, /*align*/ 1);
|
||||
|
||||
// sys->local
|
||||
test_put_tensor_g2l_comp(ctx, bmk, tl_ifmap, (uint8_t *)ifmap);
|
||||
test_put_tensor_g2l_comp(ctx, bmk, cvk_tl_table_answer, (uint8_t *)table_data);
|
||||
test_put_tensor_g2l_comp(ctx, bmk, cvk_tl_table_answer_slope, (uint8_t *)table_data_slope);
|
||||
|
||||
// emit core function
|
||||
cvm_emit_sigmoid(bmk, tl_ifmap, tl_buf, cvk_tl_table_answer, cvk_tl_table_answer_slope,
|
||||
tl_ofmap_bf16, scale);
|
||||
|
||||
uint16_t *ofmap_data = (uint16_t *)test_get_tensor_l2g_comp(ctx, bmk, tl_ofmap_bf16);
|
||||
|
||||
verify(ofmap_data, ref_data, ofmap_size);
|
||||
|
||||
free_tl(bmk, tl_ofmap_bf16);
|
||||
free_tl(bmk, tl_buf);
|
||||
free_tl(bmk, cvk_tl_table_answer_slope);
|
||||
free_tl(bmk, cvk_tl_table_answer);
|
||||
free_tl(bmk, tl_ifmap);
|
||||
|
||||
free(ifmap);
|
||||
free(table_data);
|
||||
free(table_data_slope);
|
||||
free(ref_data);
|
||||
free(ofmap_data);
|
||||
}
|
||||
|
||||
int main() {
|
||||
CVI_RT_HANDLE ctx;
|
||||
cvk_context_t *bmk;
|
||||
int round_mode;
|
||||
|
||||
round_mode = set_store_feround();
|
||||
|
||||
test_init(&ctx, &bmk);
|
||||
|
||||
for (int i = PRE_DATA_COMPARE_FIX; i < TEST_MODE_MAX; i++) {
|
||||
// for (int i = GEN_DATA_MAX_ERROR; i < TEST_MODE_MAX; i++) {
|
||||
// for (int i = PRE_DATA_MAX_ERROR; i < GEN_DATA_MAX_ERROR; i++) {
|
||||
mode = static_cast<TEST_MODE>(i);
|
||||
printf("test mode %d...\n", mode);
|
||||
testbench(&ctx, bmk);
|
||||
}
|
||||
|
||||
test_exit(&ctx, bmk);
|
||||
restore_feround(round_mode);
|
||||
return 0;
|
||||
}
|
||||
375
cvimath/tests/cvi1835/sqrt.cpp
Normal file
375
cvimath/tests/cvi1835/sqrt.cpp
Normal file
@ -0,0 +1,375 @@
|
||||
/**
|
||||
*/
|
||||
#include <cvimath_internal.h>
|
||||
#include <test_cvikernel_util.h>
|
||||
|
||||
#include <cfloat>
|
||||
#include <iomanip>
|
||||
#include <iostream>
|
||||
#include <map>
|
||||
#include <random>
|
||||
#include <string>
|
||||
//#define DBG
|
||||
|
||||
using namespace std;
|
||||
|
||||
/**
|
||||
* pre_data means we test fixed pattern, it should be same sa lut
|
||||
*/
|
||||
enum TEST_MODE {
|
||||
PRE_DATA_COMPARE_FIX = 0, // pre-data + fix compare
|
||||
GEN_POW_20_DATA_MAX_ERROR, // generate 2^-20 ~ 2^20 value that check epsilon
|
||||
TEST_MODE_MAX,
|
||||
};
|
||||
|
||||
static TEST_MODE mode;
|
||||
|
||||
static uint16_t test_pattern[] = {
|
||||
0x0000, 0x38D2, 0x3952, 0x399D, 0x39D2, 0x3A03, 0x3A1D, 0x3A38, 0x3A52, 0x3A6C, 0x3A83, 0x3A90,
|
||||
0x3A9D, 0x3AAA, 0x3AB8, 0x3AC5, 0x3AD2, 0x3ADF, 0x3AEC, 0x3AF9, 0x3B03, 0x3B0A, 0x3B10, 0x3B17,
|
||||
0x3B1D, 0x3B24, 0x3B2A, 0x3B31, 0x3B38, 0x3B3E, 0x3B45, 0x3B4B, 0x3B52, 0x3B58, 0x3B5F, 0x3B65,
|
||||
0x3B6C, 0x3B72, 0x3B79, 0x3B80, 0x3B83, 0x3B86, 0x3B8A, 0x3B8D, 0x3B90, 0x3B93, 0x3B97, 0x3B9A,
|
||||
0x3B9D, 0x3BA1, 0x3BA4, 0x3BA7, 0x3BAA, 0x3BAE, 0x3BB1, 0x3BB4, 0x3BB8, 0x3BBB, 0x3BBE, 0x3BC1,
|
||||
0x3BC5, 0x3BC8, 0x3BCB, 0x3BCE, 0x3BD2, 0x3BD5, 0x3BD8, 0x3BDC, 0x3BDF, 0x3BE2, 0x3BE5, 0x3BE9,
|
||||
0x3BEC, 0x3BEF, 0x3BF2, 0x3BF6, 0x3BF9, 0x3BFC, 0x3C00, 0x3C01, 0x3C03, 0x3C05, 0x3C06, 0x3C08,
|
||||
0x3C0A, 0x3C0B, 0x3C0D, 0x3C0F, 0x3C10, 0x3C12, 0x3C13, 0x3C15, 0x3C17, 0x3C18, 0x3C1A, 0x3C1C,
|
||||
0x3C1D, 0x3C1F, 0x3C21, 0x3C22, 0x3C24, 0x3C25, 0x3C27, 0x3C29, 0x3C2A, 0x3C2C, 0x3C2E, 0x3C2F,
|
||||
0x3C31, 0x3C33, 0x3C34, 0x3C36, 0x3C38, 0x3C39, 0x3C3B, 0x3C3C, 0x3C3E, 0x3C40, 0x3C41, 0x3C43,
|
||||
0x3C45, 0x3C46, 0x3C48, 0x3C4A, 0x3C4B, 0x3C4D, 0x3C4E, 0x3C50, 0x3C52, 0x3C53, 0x3C55, 0x3C57,
|
||||
0x3C58, 0x3C5A, 0x3C5C, 0x3C5D, 0x3C5F, 0x3C60, 0x3C62, 0x3C64, 0x3C65, 0x3C67, 0x3C69, 0x3C6A,
|
||||
0x3C6C, 0x3C6E, 0x3C6F, 0x3C71, 0x3C72, 0x3C74, 0x3C76, 0x3C77, 0x3C79, 0x3C7B, 0x3C7C, 0x3C7E,
|
||||
0x3C80, 0x3C81, 0x3C81, 0x3C82, 0x3C83, 0x3C84, 0x3C85, 0x3C86, 0x3C86, 0x3C87, 0x3C88, 0x3C89,
|
||||
0x3C8A, 0x3C8A, 0x3C8B, 0x3C8C, 0x3C8D, 0x3C8E, 0x3C8F, 0x3C8F, 0x3C90, 0x3C91, 0x3C92, 0x3C93,
|
||||
0x3C93, 0x3C94, 0x3C95, 0x3C96, 0x3C97, 0x3C98, 0x3C98, 0x3C99, 0x3C9A, 0x3C9B, 0x3C9C, 0x3C9C,
|
||||
0x3C9D, 0x3C9E, 0x3C9F, 0x3CA0, 0x3CA1, 0x3CA1, 0x3CA2, 0x3CA3, 0x3CA4, 0x3CA5, 0x3CA5, 0x3CA6,
|
||||
0x3CA7, 0x3CA8, 0x3CA9, 0x3CAA, 0x3CAA, 0x3CAB, 0x3CAC, 0x3CAD, 0x3CAE, 0x3CAE, 0x3CAF, 0x3CB0,
|
||||
0x3CB1, 0x3CB2, 0x3CB3, 0x3CB3, 0x3CB4, 0x3CB5, 0x3CB6, 0x3CB7, 0x3CB8, 0x3CB8, 0x3CB9, 0x3CBA,
|
||||
0x3CBB, 0x3CBC, 0x3CBC, 0x3CBD, 0x3CBE, 0x3CBF, 0x3CC0, 0x3CC1, 0x3CC1, 0x3CC2, 0x3CC3, 0x3CC4,
|
||||
0x3CC5, 0x3CC5, 0x3CC6, 0x3CC7, 0x3CC8, 0x3CC9, 0x3CCA, 0x3CCA, 0x3CCB, 0x3CCC, 0x3CCD, 0x3CCE,
|
||||
0x3CCE, 0x3CCF, 0x3CD0, 0x3CD1, 0x3CD2, 0x3CD3, 0x3CD3, 0x3CD4, 0x3CD5, 0x3CD6, 0x3CD7, 0x3CD7,
|
||||
0x3CD8, 0x3CD9, 0x3CDA, 0x3CDB, 0x3CDC, 0x3CDC, 0x3CDD, 0x3CDE, 0x3CDF, 0x3CE0, 0x3CE0, 0x3CE1,
|
||||
0x3CE2, 0x3CE3, 0x3CE4, 0x3CE5, 0x3CE5, 0x3CE6, 0x3CE7, 0x3CE8, 0x3CE9, 0x3CE9, 0x3CEA, 0x3CEB,
|
||||
0x3CEC, 0x3CED, 0x3CEE, 0x3CEE, 0x3CEF, 0x3CF0, 0x3CF1, 0x3CF2, 0x3CF2, 0x3CF3, 0x3CF4, 0x3CF5,
|
||||
0x3CF6, 0x3CF7, 0x3CF7, 0x3CF8, 0x3CF9, 0x3CFA, 0x3CFB, 0x3CFB, 0x3CFC, 0x3CFD, 0x3CFE, 0x3CFF,
|
||||
0x3D00, 0x3D00, 0x3D01, 0x3D01, 0x3D01, 0x3D02, 0x3D02, 0x3D03, 0x3D03, 0x3D03, 0x3D04, 0x3D04,
|
||||
0x3D05, 0x3D05, 0x3D06, 0x3D06, 0x3D06, 0x3D07, 0x3D07, 0x3D08, 0x3D08, 0x3D08, 0x3D09, 0x3D09,
|
||||
0x3D0A, 0x3D0A, 0x3D0A, 0x3D0B, 0x3D0B, 0x3D0C, 0x3D0C, 0x3D0C, 0x3D0D, 0x3D0D, 0x3D0E, 0x3D0E,
|
||||
0x3D0F, 0x3D0F, 0x3D0F, 0x3D10, 0x3D10, 0x3D11, 0x3D11, 0x3D11, 0x3D12, 0x3D12, 0x3D13, 0x3D13,
|
||||
0x3D13, 0x3D14, 0x3D14, 0x3D15, 0x3D15, 0x3D16, 0x3D16, 0x3D16, 0x3D17, 0x3D17, 0x3D18, 0x3D18,
|
||||
0x3D18, 0x3D19, 0x3D19, 0x3D1A, 0x3D1A, 0x3D1A, 0x3D1B, 0x3D1B, 0x3D1C, 0x3D1C, 0x3D1C, 0x3D1D,
|
||||
0x3D1D, 0x3D1E, 0x3D1E, 0x3D1F, 0x3D1F, 0x3D1F, 0x3D20, 0x3D20, 0x3D21, 0x3D21, 0x3D21, 0x3D22,
|
||||
0x3D22, 0x3D23, 0x3D23, 0x3D23, 0x3D24, 0x3D24, 0x3D25, 0x3D25, 0x3D25, 0x3D26, 0x3D26, 0x3D27,
|
||||
0x3D27, 0x3D28, 0x3D28, 0x3D28, 0x3D29, 0x3D29, 0x3D2A, 0x3D2A, 0x3D2A, 0x3D2B, 0x3D2B, 0x3D2C,
|
||||
0x3D2C, 0x3D2C, 0x3D2D, 0x3D2D, 0x3D2E, 0x3D2E, 0x3D2E, 0x3D2F, 0x3D2F, 0x3D30, 0x3D30, 0x3D31,
|
||||
0x3D31, 0x3D31, 0x3D32, 0x3D32, 0x3D33, 0x3D33, 0x3D33, 0x3D34, 0x3D34, 0x3D35, 0x3D35, 0x3D35,
|
||||
0x3D36, 0x3D36, 0x3D37, 0x3D37, 0x3D38, 0x3D38, 0x3D38, 0x3D39, 0x3D39, 0x3D3A, 0x3D3A, 0x3D3A,
|
||||
0x3D3B, 0x3D3B, 0x3D3C, 0x3D3C, 0x3D3C, 0x3D3D, 0x3D3D, 0x3D3E, 0x3D3E, 0x3D3E, 0x3D3F, 0x3D3F,
|
||||
0x3D40, 0x3D40, 0x3D41, 0x3D41, 0x3D41, 0x3D42, 0x3D42, 0x3D43, 0x3D43, 0x3D43, 0x3D44, 0x3D44,
|
||||
0x3D45, 0x3D45, 0x3D45, 0x3D46, 0x3D46, 0x3D47, 0x3D47, 0x3D47, 0x3D48, 0x3D48, 0x3D49, 0x3D49,
|
||||
0x3D4A, 0x3D4A, 0x3D4A, 0x3D4B, 0x3D4B, 0x3D4C, 0x3D4C, 0x3D4C, 0x3D4D, 0x3D4D, 0x3D4E, 0x3D4E,
|
||||
0x3D4E, 0x3D4F, 0x3D4F, 0x3D50, 0x3D50, 0x3D50, 0x3D51, 0x3D51, 0x3D52, 0x3D52, 0x3D53, 0x3D53,
|
||||
0x3D53, 0x3D54, 0x3D54, 0x3D55, 0x3D55, 0x3D55, 0x3D56, 0x3D56, 0x3D57, 0x3D57, 0x3D57, 0x3D58,
|
||||
0x3D58, 0x3D59, 0x3D59, 0x3D59, 0x3D5A, 0x3D5A, 0x3D5B, 0x3D5B, 0x3D5C, 0x3D5C, 0x3D5C, 0x3D5D,
|
||||
0x3D5D, 0x3D5E, 0x3D5E, 0x3D5E, 0x3D5F, 0x3D5F, 0x3D60, 0x3D60, 0x3D60, 0x3D61, 0x3D61, 0x3D62,
|
||||
0x3D62, 0x3D63, 0x3D63, 0x3D63, 0x3D64, 0x3D64, 0x3D65, 0x3D65, 0x3D65, 0x3D66, 0x3D66, 0x3D67,
|
||||
0x3D67, 0x3D67, 0x3D68, 0x3D68, 0x3D69, 0x3D69, 0x3D69, 0x3D6A, 0x3D6A, 0x3D6B, 0x3D6B, 0x3D6C,
|
||||
0x3D6C, 0x3D6C, 0x3D6D, 0x3D6D, 0x3D6E, 0x3D6E, 0x3D6E, 0x3D6F, 0x3D6F, 0x3D70, 0x3D70, 0x3D70,
|
||||
0x3D71, 0x3D71, 0x3D72, 0x3D72, 0x3D72, 0x3D73, 0x3D73, 0x3D74, 0x3D74, 0x3D75, 0x3D75, 0x3D75,
|
||||
0x3D76, 0x3D76, 0x3D77, 0x3D77, 0x3D77, 0x3D78, 0x3D78, 0x3D79, 0x3D79, 0x3D79, 0x3D7A, 0x3D7A,
|
||||
0x3D7B, 0x3D7B, 0x3D7B, 0x3D7C, 0x3D7C, 0x3D7D, 0x3D7D, 0x3D7E, 0x3D7E, 0x3D7E, 0x3D7F, 0x3D7F,
|
||||
0x3D80, 0x3D80, 0x3D80, 0x3D80, 0x3D81, 0x3D81, 0x3D81, 0x3D81, 0x3D81, 0x3D82, 0x3D82, 0x3D82,
|
||||
0x3D82, 0x3D82, 0x3D83, 0x3D83, 0x3D83, 0x3D83, 0x3D83, 0x3D84, 0x3D84, 0x3D84, 0x3D84, 0x3D85,
|
||||
0x3D85, 0x3D85, 0x3D85, 0x3D85, 0x3D86, 0x3D86, 0x3D86, 0x3D86, 0x3D86, 0x3D87, 0x3D87, 0x3D87,
|
||||
0x3D87, 0x3D87, 0x3D88, 0x3D88, 0x3D88, 0x3D88, 0x3D88, 0x3D89, 0x3D89, 0x3D89, 0x3D89, 0x3D89,
|
||||
0x3D8A, 0x3D8A, 0x3D8A, 0x3D8A, 0x3D8A, 0x3D8B, 0x3D8B, 0x3D8B, 0x3D8B, 0x3D8B, 0x3D8C, 0x3D8C,
|
||||
0x3D8C, 0x3D8C, 0x3D8C, 0x3D8D, 0x3D8D, 0x3D8D, 0x3D8D, 0x3D8E, 0x3D8E, 0x3D8E, 0x3D8E, 0x3D8E,
|
||||
0x3D8F, 0x3D8F, 0x3D8F, 0x3D8F, 0x3D8F, 0x3D90, 0x3D90, 0x3D90, 0x3D90, 0x3D90, 0x3D91, 0x3D91,
|
||||
0x3D91, 0x3D91, 0x3D91, 0x3D92, 0x3D92, 0x3D92, 0x3D92, 0x3D92, 0x3D93, 0x3D93, 0x3D93, 0x3D93,
|
||||
0x3D93, 0x3D94, 0x3D94, 0x3D94, 0x3D94, 0x3D94, 0x3D95, 0x3D95, 0x3D95, 0x3D95, 0x3D96, 0x3D96,
|
||||
0x3D96, 0x3D96, 0x3D96, 0x3D97, 0x3D97, 0x3D97, 0x3D97, 0x3D97, 0x3D98, 0x3D98, 0x3D98, 0x3D98,
|
||||
0x3D98, 0x3D99, 0x3D99, 0x3D99, 0x3D99, 0x3D99, 0x3D9A, 0x3D9A, 0x3D9A, 0x3D9A, 0x3D9A, 0x3D9B,
|
||||
0x3D9B, 0x3D9B, 0x3D9B, 0x3D9B, 0x3D9C, 0x3D9C, 0x3D9C, 0x3D9C, 0x3D9C, 0x3D9D, 0x3D9D, 0x3D9D,
|
||||
0x3D9D, 0x3D9D, 0x3D9E, 0x3D9E, 0x3D9E, 0x3D9E, 0x3D9F, 0x3D9F, 0x3D9F, 0x3D9F, 0x3D9F, 0x3DA0,
|
||||
0x3DA0, 0x3DA0, 0x3DA0, 0x3DA0, 0x3DA1, 0x3DA1, 0x3DA1, 0x3DA1, 0x3DA1, 0x3DA2, 0x3DA2, 0x3DA2,
|
||||
0x3DA2, 0x3DA2, 0x3DA3, 0x3DA3, 0x3DA3, 0x3DA3, 0x3DA3, 0x3DA4, 0x3DA4, 0x3DA4, 0x3DA4, 0x3DA4,
|
||||
0x3DA5, 0x3DA5, 0x3DA5, 0x3DA5, 0x3DA5, 0x3DA6, 0x3DA6, 0x3DA6, 0x3DA6, 0x3DA7, 0x3DA7, 0x3DA7,
|
||||
0x3DA7, 0x3DA7, 0x3DA8, 0x3DA8, 0x3DA8, 0x3DA8, 0x3DA8, 0x3DA9, 0x3DA9, 0x3DA9, 0x3DA9, 0x3DA9,
|
||||
0x3DAA, 0x3DAA, 0x3DAA, 0x3DAA, 0x3DAA, 0x3DAB, 0x3DAB, 0x3DAB, 0x3DAB, 0x3DAB, 0x3DAC, 0x3DAC,
|
||||
0x3DAC, 0x3DAC, 0x3DAC, 0x3DAD, 0x3DAD, 0x3DAD, 0x3DAD, 0x3DAD, 0x3DAE, 0x3DAE, 0x3DAE, 0x3DAE,
|
||||
0x3DAE, 0x3DAF, 0x3DAF, 0x3DAF, 0x3DAF, 0x3DB0, 0x3DB0, 0x3DB0, 0x3DB0, 0x3DB0, 0x3DB1, 0x3DB1,
|
||||
0x3DB1, 0x3DB1, 0x3DB1, 0x3DB2, 0x3DB2, 0x3DB2, 0x3DB2, 0x3DB2, 0x3DB3, 0x3DB3, 0x3DB3, 0x3DB3,
|
||||
0x3DB3, 0x3DB4, 0x3DB4, 0x3DB4, 0x3DB4, 0x3DB4, 0x3DB5, 0x3DB5, 0x3DB5, 0x3DB5, 0x3DB5, 0x3DB6,
|
||||
0x3DB6, 0x3DB6, 0x3DB6, 0x3DB6, 0x3DB7, 0x3DB7, 0x3DB7, 0x3DB7, 0x3DB8, 0x3DB8, 0x3DB8, 0x3DB8,
|
||||
0x3DB8, 0x3DB9, 0x3DB9, 0x3DB9, 0x3DB9, 0x3DB9, 0x3DBA, 0x3DBA, 0x3DBA, 0x3DBA, 0x3DBA, 0x3DBB,
|
||||
0x3DBB, 0x3DBB, 0x3DBB, 0x3DBB, 0x3DBC, 0x3DBC, 0x3DBC, 0x3DBC, 0x3DBC, 0x3DBD, 0x3DBD, 0x3DBD,
|
||||
0x3DBD, 0x3DBD, 0x3DBE, 0x3DBE, 0x3DBE, 0x3DBE, 0x3DBE, 0x3DBF, 0x3DBF, 0x3DBF, 0x3DBF, 0x3DBF,
|
||||
0x3DC0, 0x3DC0, 0x3DC0, 0x3DC0, 0x3DC1, 0x3DC1, 0x3DC1, 0x3DC1, 0x3DC1, 0x3DC2, 0x3DC2, 0x3DC2,
|
||||
0x3DC2, 0x3DC2, 0x3DC3, 0x3DC3, 0x3DC3, 0x3DC3, 0x3DC3, 0x3DC4, 0x3DC4, 0x3DC4, 0x3DC4, 0x3DC4,
|
||||
0x3DC5, 0x3DC5, 0x3DC5, 0x3DC5, 0x3DC5, 0x3DC6, 0x3DC6, 0x3DC6, 0x3DC6, 0x3DC6, 0x3DC7, 0x3DC7,
|
||||
0x3DC7, 0x3DC7, 0x3DC7, 0x3DC8, 0x3DC8, 0x3DC8, 0x3DC8, 0x3DC8, 0x3DC9, 0x3DC9, 0x3DC9, 0x3DC9,
|
||||
0x3DCA, 0x3DCA, 0x3DCA, 0x3DCA, 0x3DCA, 0x3DCB, 0x3DCB, 0x3DCB, 0x3DCB, 0x3DCB, 0x3DCC, 0x3DCC,
|
||||
0x3DCC, 0x3DCC, 0x3DCC, 0x3DCD, 0x3DCE, 0x3DCF, 0x3DD0, 0x3DD1, 0x3DD2, 0x3DD3, 0x3DD4, 0x3DD5,
|
||||
0x3DD6, 0x3DD7, 0x3DD8, 0x3DD9, 0x3DDA, 0x3DDB, 0x3DDC, 0x3DDD, 0x3DDE, 0x3DDF, 0x3DE0, 0x3DE1,
|
||||
0x3DE2, 0x3DE3, 0x3DE4, 0x3DE5,
|
||||
};
|
||||
|
||||
static uint16_t test_pattern_ref[] = {
|
||||
0x0, 0x3c24, 0x3c68, 0x3c8e, 0x3ca4, 0x3cb7, 0x3cc8, 0x3cd9, 0x3ce8, 0x3cf6, 0x3d01, 0x3d08,
|
||||
0x3d0e, 0x3d14, 0x3d19, 0x3d1f, 0x3d24, 0x3d29, 0x3d2e, 0x3d33, 0x3d37, 0x3d3c, 0x3d40, 0x3d45,
|
||||
0x3d48, 0x3d4d, 0x3d51, 0x3d55, 0x3d59, 0x3d5d, 0x3d61, 0x3d64, 0x3d68, 0x3d6b, 0x3d6f, 0x3d72,
|
||||
0x3d76, 0x3d79, 0x3d7c, 0x3d80, 0x3d81, 0x3d83, 0x3d85, 0x3d86, 0x3d88, 0x3d89, 0x3d8b, 0x3d8c,
|
||||
0x3d8e, 0x3d90, 0x3d91, 0x3d92, 0x3d94, 0x3d95, 0x3d97, 0x3d98, 0x3d99, 0x3d9b, 0x3d9c, 0x3d9d,
|
||||
0x3d9f, 0x3da0, 0x3da1, 0x3da2, 0x3da4, 0x3da5, 0x3da6, 0x3da8, 0x3da9, 0x3daa, 0x3dab, 0x3dad,
|
||||
0x3dae, 0x3daf, 0x3db0, 0x3db1, 0x3db3, 0x3db4, 0x3db5, 0x3db6, 0x3db7, 0x3db9, 0x3db9, 0x3dbb,
|
||||
0x3dbc, 0x3dbd, 0x3dbe, 0x3dbf, 0x3dc0, 0x3dc1, 0x3dc2, 0x3dc3, 0x3dc5, 0x3dc5, 0x3dc7, 0x3dc8,
|
||||
0x3dc8, 0x3dca, 0x3dcb, 0x3dcc, 0x3dcd, 0x3dce, 0x3dcf, 0x3dd0, 0x3dd1, 0x3dd2, 0x3dd3, 0x3dd4,
|
||||
0x3dd5, 0x3dd6, 0x3dd7, 0x3dd8, 0x3dd9, 0x3dda, 0x3ddb, 0x3ddb, 0x3ddd, 0x3dde, 0x3dde, 0x3ddf,
|
||||
0x3de1, 0x3de1, 0x3de2, 0x3de3, 0x3de4, 0x3de5, 0x3de6, 0x3de7, 0x3de8, 0x3de8, 0x3dea, 0x3deb,
|
||||
0x3deb, 0x3dec, 0x3ded, 0x3dee, 0x3def, 0x3def, 0x3df1, 0x3df2, 0x3df2, 0x3df3, 0x3df4, 0x3df5,
|
||||
0x3df6, 0x3df7, 0x3df7, 0x3df8, 0x3df9, 0x3dfa, 0x3dfb, 0x3dfb, 0x3dfc, 0x3dfd, 0x3dfe, 0x3dff,
|
||||
0x3e00, 0x3e00, 0x3e00, 0x3e01, 0x3e01, 0x3e02, 0x3e02, 0x3e03, 0x3e03, 0x3e03, 0x3e04, 0x3e04,
|
||||
0x3e05, 0x3e05, 0x3e05, 0x3e06, 0x3e06, 0x3e07, 0x3e07, 0x3e07, 0x3e08, 0x3e08, 0x3e09, 0x3e09,
|
||||
0x3e09, 0x3e0a, 0x3e0a, 0x3e0b, 0x3e0b, 0x3e0b, 0x3e0b, 0x3e0c, 0x3e0c, 0x3e0d, 0x3e0d, 0x3e0d,
|
||||
0x3e0e, 0x3e0e, 0x3e0f, 0x3e0f, 0x3e10, 0x3e10, 0x3e10, 0x3e10, 0x3e11, 0x3e11, 0x3e11, 0x3e12,
|
||||
0x3e12, 0x3e13, 0x3e13, 0x3e14, 0x3e14, 0x3e14, 0x3e14, 0x3e15, 0x3e15, 0x3e15, 0x3e16, 0x3e16,
|
||||
0x3e17, 0x3e17, 0x3e17, 0x3e17, 0x3e18, 0x3e18, 0x3e19, 0x3e19, 0x3e19, 0x3e19, 0x3e1a, 0x3e1a,
|
||||
0x3e1b, 0x3e1b, 0x3e1b, 0x3e1c, 0x3e1c, 0x3e1c, 0x3e1d, 0x3e1d, 0x3e1d, 0x3e1e, 0x3e1e, 0x3e1e,
|
||||
0x3e1f, 0x3e1f, 0x3e1f, 0x3e20, 0x3e20, 0x3e20, 0x3e21, 0x3e21, 0x3e21, 0x3e22, 0x3e22, 0x3e22,
|
||||
0x3e22, 0x3e23, 0x3e23, 0x3e24, 0x3e24, 0x3e24, 0x3e24, 0x3e25, 0x3e25, 0x3e26, 0x3e26, 0x3e26,
|
||||
0x3e26, 0x3e27, 0x3e27, 0x3e27, 0x3e28, 0x3e28, 0x3e28, 0x3e29, 0x3e29, 0x3e29, 0x3e29, 0x3e2a,
|
||||
0x3e2a, 0x3e2a, 0x3e2b, 0x3e2b, 0x3e2b, 0x3e2c, 0x3e2c, 0x3e2c, 0x3e2d, 0x3e2d, 0x3e2d, 0x3e2d,
|
||||
0x3e2e, 0x3e2e, 0x3e2f, 0x3e2f, 0x3e2f, 0x3e2f, 0x3e30, 0x3e30, 0x3e30, 0x3e30, 0x3e31, 0x3e31,
|
||||
0x3e31, 0x3e32, 0x3e32, 0x3e32, 0x3e33, 0x3e33, 0x3e33, 0x3e33, 0x3e34, 0x3e34, 0x3e34, 0x3e35,
|
||||
0x3e35, 0x3e35, 0x3e36, 0x3e36, 0x3e36, 0x3e36, 0x3e36, 0x3e37, 0x3e37, 0x3e37, 0x3e38, 0x3e38,
|
||||
0x3e39, 0x3e39, 0x3e39, 0x3e39, 0x3e39, 0x3e3a, 0x3e3a, 0x3e3b, 0x3e3b, 0x3e3b, 0x3e3b, 0x3e3b,
|
||||
0x3e3c, 0x3e3c, 0x3e3c, 0x3e3d, 0x3e3d, 0x3e3d, 0x3e3d, 0x3e3d, 0x3e3e, 0x3e3e, 0x3e3f, 0x3e3f,
|
||||
0x3e3f, 0x3e3f, 0x3e3f, 0x3e40, 0x3e40, 0x3e41, 0x3e41, 0x3e41, 0x3e41, 0x3e41, 0x3e42, 0x3e42,
|
||||
0x3e42, 0x3e43, 0x3e43, 0x3e43, 0x3e43, 0x3e44, 0x3e44, 0x3e44, 0x3e45, 0x3e45, 0x3e45, 0x3e45,
|
||||
0x3e45, 0x3e46, 0x3e46, 0x3e47, 0x3e47, 0x3e47, 0x3e47, 0x3e47, 0x3e48, 0x3e48, 0x3e48, 0x3e48,
|
||||
0x3e48, 0x3e49, 0x3e49, 0x3e4a, 0x3e4a, 0x3e4a, 0x3e4a, 0x3e4a, 0x3e4b, 0x3e4b, 0x3e4b, 0x3e4c,
|
||||
0x3e4c, 0x3e4c, 0x3e4c, 0x3e4c, 0x3e4d, 0x3e4d, 0x3e4e, 0x3e4e, 0x3e4e, 0x3e4e, 0x3e4e, 0x3e4f,
|
||||
0x3e4f, 0x3e4f, 0x3e4f, 0x3e4f, 0x3e50, 0x3e50, 0x3e51, 0x3e51, 0x3e51, 0x3e51, 0x3e51, 0x3e52,
|
||||
0x3e52, 0x3e52, 0x3e52, 0x3e52, 0x3e53, 0x3e53, 0x3e53, 0x3e54, 0x3e54, 0x3e54, 0x3e54, 0x3e55,
|
||||
0x3e55, 0x3e55, 0x3e55, 0x3e55, 0x3e56, 0x3e56, 0x3e56, 0x3e57, 0x3e57, 0x3e57, 0x3e57, 0x3e57,
|
||||
0x3e58, 0x3e58, 0x3e58, 0x3e58, 0x3e59, 0x3e59, 0x3e59, 0x3e5a, 0x3e5a, 0x3e5a, 0x3e5a, 0x3e5a,
|
||||
0x3e5b, 0x3e5b, 0x3e5b, 0x3e5b, 0x3e5b, 0x3e5c, 0x3e5c, 0x3e5d, 0x3e5d, 0x3e5d, 0x3e5d, 0x3e5d,
|
||||
0x3e5e, 0x3e5e, 0x3e5e, 0x3e5e, 0x3e5e, 0x3e5f, 0x3e5f, 0x3e5f, 0x3e5f, 0x3e5f, 0x3e60, 0x3e60,
|
||||
0x3e61, 0x3e61, 0x3e61, 0x3e61, 0x3e61, 0x3e62, 0x3e62, 0x3e62, 0x3e62, 0x3e62, 0x3e63, 0x3e63,
|
||||
0x3e63, 0x3e63, 0x3e63, 0x3e64, 0x3e64, 0x3e65, 0x3e65, 0x3e65, 0x3e65, 0x3e65, 0x3e66, 0x3e66,
|
||||
0x3e66, 0x3e66, 0x3e66, 0x3e67, 0x3e67, 0x3e67, 0x3e67, 0x3e67, 0x3e68, 0x3e68, 0x3e68, 0x3e68,
|
||||
0x3e68, 0x3e69, 0x3e69, 0x3e6a, 0x3e6a, 0x3e6a, 0x3e6a, 0x3e6a, 0x3e6b, 0x3e6b, 0x3e6b, 0x3e6b,
|
||||
0x3e6b, 0x3e6c, 0x3e6c, 0x3e6c, 0x3e6c, 0x3e6c, 0x3e6d, 0x3e6d, 0x3e6d, 0x3e6d, 0x3e6d, 0x3e6e,
|
||||
0x3e6e, 0x3e6e, 0x3e6e, 0x3e6e, 0x3e6f, 0x3e6f, 0x3e6f, 0x3e6f, 0x3e6f, 0x3e70, 0x3e70, 0x3e71,
|
||||
0x3e71, 0x3e71, 0x3e71, 0x3e71, 0x3e72, 0x3e72, 0x3e72, 0x3e72, 0x3e72, 0x3e73, 0x3e73, 0x3e73,
|
||||
0x3e73, 0x3e73, 0x3e74, 0x3e74, 0x3e74, 0x3e74, 0x3e74, 0x3e75, 0x3e75, 0x3e75, 0x3e75, 0x3e76,
|
||||
0x3e76, 0x3e76, 0x3e76, 0x3e76, 0x3e77, 0x3e77, 0x3e77, 0x3e77, 0x3e77, 0x3e78, 0x3e78, 0x3e78,
|
||||
0x3e78, 0x3e78, 0x3e79, 0x3e79, 0x3e79, 0x3e79, 0x3e79, 0x3e7a, 0x3e7a, 0x3e7a, 0x3e7a, 0x3e7a,
|
||||
0x3e7b, 0x3e7b, 0x3e7b, 0x3e7b, 0x3e7b, 0x3e7c, 0x3e7c, 0x3e7c, 0x3e7c, 0x3e7c, 0x3e7d, 0x3e7d,
|
||||
0x3e7d, 0x3e7d, 0x3e7d, 0x3e7e, 0x3e7e, 0x3e7e, 0x3e7e, 0x3e7f, 0x3e7f, 0x3e7f, 0x3e7f, 0x3e7f,
|
||||
0x3e80, 0x3e80, 0x3e80, 0x3e80, 0x3e80, 0x3e80, 0x3e80, 0x3e80, 0x3e80, 0x3e81, 0x3e81, 0x3e81,
|
||||
0x3e81, 0x3e81, 0x3e81, 0x3e81, 0x3e81, 0x3e81, 0x3e81, 0x3e82, 0x3e82, 0x3e82, 0x3e82, 0x3e82,
|
||||
0x3e82, 0x3e82, 0x3e82, 0x3e82, 0x3e83, 0x3e83, 0x3e83, 0x3e83, 0x3e83, 0x3e83, 0x3e83, 0x3e83,
|
||||
0x3e83, 0x3e83, 0x3e84, 0x3e84, 0x3e84, 0x3e84, 0x3e84, 0x3e84, 0x3e84, 0x3e84, 0x3e84, 0x3e84,
|
||||
0x3e85, 0x3e85, 0x3e85, 0x3e85, 0x3e85, 0x3e85, 0x3e85, 0x3e85, 0x3e85, 0x3e85, 0x3e86, 0x3e86,
|
||||
0x3e86, 0x3e86, 0x3e86, 0x3e86, 0x3e86, 0x3e86, 0x3e86, 0x3e87, 0x3e87, 0x3e87, 0x3e87, 0x3e87,
|
||||
0x3e87, 0x3e87, 0x3e87, 0x3e87, 0x3e87, 0x3e88, 0x3e88, 0x3e88, 0x3e88, 0x3e88, 0x3e88, 0x3e88,
|
||||
0x3e88, 0x3e88, 0x3e88, 0x3e89, 0x3e89, 0x3e89, 0x3e89, 0x3e89, 0x3e89, 0x3e89, 0x3e89, 0x3e89,
|
||||
0x3e89, 0x3e8a, 0x3e8a, 0x3e8a, 0x3e8a, 0x3e8a, 0x3e8a, 0x3e8a, 0x3e8a, 0x3e8a, 0x3e8b, 0x3e8b,
|
||||
0x3e8b, 0x3e8b, 0x3e8b, 0x3e8b, 0x3e8b, 0x3e8b, 0x3e8b, 0x3e8b, 0x3e8b, 0x3e8b, 0x3e8b, 0x3e8b,
|
||||
0x3e8b, 0x3e8c, 0x3e8c, 0x3e8c, 0x3e8c, 0x3e8c, 0x3e8c, 0x3e8c, 0x3e8c, 0x3e8c, 0x3e8c, 0x3e8d,
|
||||
0x3e8d, 0x3e8d, 0x3e8d, 0x3e8d, 0x3e8d, 0x3e8d, 0x3e8d, 0x3e8d, 0x3e8d, 0x3e8e, 0x3e8e, 0x3e8e,
|
||||
0x3e8e, 0x3e8e, 0x3e8e, 0x3e8e, 0x3e8e, 0x3e8e, 0x3e8f, 0x3e8f, 0x3e8f, 0x3e8f, 0x3e8f, 0x3e8f,
|
||||
0x3e8f, 0x3e8f, 0x3e8f, 0x3e8f, 0x3e90, 0x3e90, 0x3e90, 0x3e90, 0x3e90, 0x3e90, 0x3e90, 0x3e90,
|
||||
0x3e90, 0x3e90, 0x3e90, 0x3e90, 0x3e90, 0x3e90, 0x3e90, 0x3e91, 0x3e91, 0x3e91, 0x3e91, 0x3e91,
|
||||
0x3e91, 0x3e91, 0x3e91, 0x3e91, 0x3e91, 0x3e92, 0x3e92, 0x3e92, 0x3e92, 0x3e92, 0x3e92, 0x3e92,
|
||||
0x3e92, 0x3e92, 0x3e93, 0x3e93, 0x3e93, 0x3e93, 0x3e93, 0x3e93, 0x3e93, 0x3e93, 0x3e93, 0x3e93,
|
||||
0x3e94, 0x3e94, 0x3e94, 0x3e94, 0x3e94, 0x3e94, 0x3e94, 0x3e94, 0x3e94, 0x3e94, 0x3e94, 0x3e94,
|
||||
0x3e94, 0x3e94, 0x3e94, 0x3e95, 0x3e95, 0x3e95, 0x3e95, 0x3e95, 0x3e95, 0x3e95, 0x3e95, 0x3e95,
|
||||
0x3e95, 0x3e96, 0x3e96, 0x3e96, 0x3e96, 0x3e96, 0x3e96, 0x3e96, 0x3e96, 0x3e96, 0x3e97, 0x3e97,
|
||||
0x3e97, 0x3e97, 0x3e97, 0x3e97, 0x3e97, 0x3e97, 0x3e97, 0x3e97, 0x3e97, 0x3e97, 0x3e97, 0x3e97,
|
||||
0x3e97, 0x3e98, 0x3e98, 0x3e98, 0x3e98, 0x3e98, 0x3e98, 0x3e98, 0x3e98, 0x3e98, 0x3e98, 0x3e99,
|
||||
0x3e99, 0x3e99, 0x3e99, 0x3e99, 0x3e99, 0x3e99, 0x3e99, 0x3e99, 0x3e99, 0x3e99, 0x3e99, 0x3e99,
|
||||
0x3e99, 0x3e9a, 0x3e9a, 0x3e9a, 0x3e9a, 0x3e9a, 0x3e9a, 0x3e9a, 0x3e9a, 0x3e9a, 0x3e9a, 0x3e9b,
|
||||
0x3e9b, 0x3e9b, 0x3e9b, 0x3e9b, 0x3e9b, 0x3e9b, 0x3e9b, 0x3e9b, 0x3e9b, 0x3e9c, 0x3e9c, 0x3e9c,
|
||||
0x3e9c, 0x3e9c, 0x3e9c, 0x3e9c, 0x3e9c, 0x3e9c, 0x3e9c, 0x3e9c, 0x3e9c, 0x3e9c, 0x3e9c, 0x3e9c,
|
||||
0x3e9d, 0x3e9d, 0x3e9d, 0x3e9d, 0x3e9d, 0x3e9d, 0x3e9d, 0x3e9d, 0x3e9d, 0x3e9e, 0x3e9e, 0x3e9e,
|
||||
0x3e9e, 0x3e9e, 0x3e9e, 0x3e9e, 0x3e9e, 0x3e9e, 0x3e9e, 0x3e9e, 0x3e9e, 0x3e9e, 0x3e9e, 0x3e9e,
|
||||
0x3e9f, 0x3e9f, 0x3e9f, 0x3e9f, 0x3e9f, 0x3e9f, 0x3e9f, 0x3e9f, 0x3e9f, 0x3e9f, 0x3ea0, 0x3ea0,
|
||||
0x3ea0, 0x3ea0, 0x3ea0, 0x3ea0, 0x3ea0, 0x3ea0, 0x3ea0, 0x3ea0, 0x3ea0, 0x3ea0, 0x3ea0, 0x3ea0,
|
||||
0x3ea1, 0x3ea1, 0x3ea1, 0x3ea1, 0x3ea1, 0x3ea1, 0x3ea1, 0x3ea1, 0x3ea1, 0x3ea1, 0x3ea2, 0x3ea2,
|
||||
0x3ea2, 0x3ea2, 0x3ea2, 0x3ea2, 0x3ea2, 0x3ea3, 0x3ea3, 0x3ea4, 0x3ea4, 0x3ea4, 0x3ea5, 0x3ea5,
|
||||
0x3ea6, 0x3ea6, 0x3ea6, 0x3ea7, 0x3ea7, 0x3ea7, 0x3ea8, 0x3ea8, 0x3ea9, 0x3ea9, 0x3ea9, 0x3eaa,
|
||||
0x3eaa, 0x3eaa, 0x3eab, 0x3eab,
|
||||
};
|
||||
|
||||
static void tl_lut_ref(uint16_t *ofmap, uint16_t *ifmap, cvk_tl_shape_t ifmap_shape) {
|
||||
for (uint32_t i = 0; i < tl_shape_size(&ifmap_shape); i++) {
|
||||
if (mode == PRE_DATA_COMPARE_FIX) {
|
||||
ofmap[i] = test_pattern_ref[i];
|
||||
} else {
|
||||
ofmap[i] = convert_fp32_bf16(pow(convert_bf16_fp32(ifmap[i]), 0.5));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static bool verify(uint16_t *ofmap_data, uint16_t *ref_data, uint16_t *ifmap,
|
||||
uint64_t ifmap_shape_size, TEST_MODE mode) {
|
||||
uint64_t size = ifmap_shape_size;
|
||||
|
||||
for (uint64_t i = 0; i < size; i++) {
|
||||
bool is_close;
|
||||
uint16_t ref;
|
||||
uint16_t ofmap_data_bf16;
|
||||
float ref_f;
|
||||
float ofmap_data_f;
|
||||
|
||||
ref = ref_data[i];
|
||||
ref_f = convert_bf16_fp32(ref);
|
||||
ofmap_data_f = convert_bf16_fp32(ofmap_data[i]);
|
||||
ofmap_data_bf16 = ofmap_data[i];
|
||||
|
||||
if (mode == PRE_DATA_COMPARE_FIX) {
|
||||
is_close = ofmap_data[i] == ref;
|
||||
} else {
|
||||
is_close = fabs(ref_f - ofmap_data_f) < 0.001;
|
||||
}
|
||||
|
||||
if (!is_close) {
|
||||
fprintf(stderr,
|
||||
"comparing failed at ofmap_data[%lu](input:%e), got %x, exp %x, "
|
||||
"fp32: got %e exp %e\n",
|
||||
i, convert_bf16_fp32(ifmap[i]), ofmap_data_bf16, ref, ofmap_data_f, ref_f);
|
||||
exit(-1);
|
||||
}
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
static void gen_input(uint16_t *ifmap, uint64_t ifmap_shape_size) {
|
||||
if (mode == PRE_DATA_COMPARE_FIX) {
|
||||
memcpy(ifmap, &test_pattern, sizeof(test_pattern));
|
||||
} else {
|
||||
for (uint64_t i = 0; i < ifmap_shape_size; i++) {
|
||||
srand(static_cast<unsigned>(time(0)));
|
||||
std::random_device rd;
|
||||
std::mt19937 e2(rd());
|
||||
float LO = pow(2, -10);
|
||||
float HI = pow(2, 10);
|
||||
// std::uniform_real_distribution<> dist(pow(2,-62), pow(2,63));
|
||||
for (uint64_t i = 0; i < ifmap_shape_size; i++) {
|
||||
// float r3 = dist(e2);
|
||||
float r3 = LO + static_cast<float>(rand()) / (static_cast<float>(RAND_MAX / (HI - LO)));
|
||||
ifmap[i] = convert_fp32_bf16(r3);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#ifdef DBG
|
||||
for (uint64_t i = 0; i < ifmap_shape_size; i++) {
|
||||
printf("source if[%lu] bf16 %f 0x%x, log2f is %f\n", i, convert_bf16_fp32(ifmap[i]), ifmap[i],
|
||||
floor(log2((convert_bf16_fp32(ifmap[i])))));
|
||||
}
|
||||
#endif /* ifdef DBG */
|
||||
}
|
||||
|
||||
static void testbench(CVI_RT_HANDLE *ctx, cvk_context_t *bmk, uint32_t input_n, uint32_t input_c,
|
||||
uint32_t input_h, uint32_t input_w) {
|
||||
cvk_fmt_t fmt = CVK_FMT_BF16;
|
||||
|
||||
// TODO: check more shape / align
|
||||
cvk_tl_shape_t ifmap_shape = {input_n, input_c, input_h, input_w};
|
||||
cvk_tl_shape_t ofmap_shape = ifmap_shape;
|
||||
cvk_tl_shape_t table_shape;
|
||||
cvm_table_shape(bmk, &table_shape);
|
||||
|
||||
uint64_t ifmap_shape_size = tl_shape_size(&ifmap_shape);
|
||||
uint64_t ofmap_size = tl_shape_size(&ofmap_shape);
|
||||
uint64_t table_size = tl_shape_size(&table_shape);
|
||||
|
||||
// prepare input data with size
|
||||
int data_type_size = bytesize_of_fmt(fmt);
|
||||
uint64_t ifmap_bytesize = ifmap_shape_size * data_type_size;
|
||||
uint64_t ofmap_bytesize = ofmap_size * data_type_size;
|
||||
uint64_t table_bytesize = table_size * data_type_size;
|
||||
|
||||
uint16_t *ifmap = (uint16_t *)xmalloc(ifmap_bytesize);
|
||||
uint16_t *ref_data = (uint16_t *)xmalloc(ofmap_bytesize);
|
||||
uint16_t *table_data = (uint16_t *)xmalloc(table_bytesize);
|
||||
uint16_t *table_data_mantissa = (uint16_t *)xmalloc(table_bytesize);
|
||||
|
||||
// alloc lmem
|
||||
cvk_tl_t *tl_ifmap = test_alloc_tl(bmk, ifmap_shape, fmt, /*align*/ 1);
|
||||
cvk_tl_t *tl_ofmap_bf16 = test_alloc_tl(bmk, ofmap_shape, fmt, /*align*/ 1);
|
||||
cvk_tl_t *tl_buf = test_alloc_tl(bmk, tl_ifmap->shape, fmt, /*align*/ 1);
|
||||
cvk_tl_t *cvk_tl_table_answer = test_alloc_tl(bmk, table_shape, fmt, /*align*/ 1);
|
||||
cvk_tl_t *cvk_tl_table_answer_mantissa = test_alloc_tl(bmk, table_shape, fmt, /*align*/ 1);
|
||||
|
||||
// generate testbench
|
||||
gen_input(ifmap, ifmap_shape_size);
|
||||
tl_lut_ref(ref_data, ifmap, ifmap_shape);
|
||||
|
||||
// prepare table
|
||||
cvm_sqrt_tbl(table_data, table_data_mantissa, &table_shape);
|
||||
|
||||
// sys->lmem
|
||||
test_put_tensor_g2l_comp(ctx, bmk, tl_ifmap, (uint8_t *)ifmap);
|
||||
test_put_tensor_g2l_comp(ctx, bmk, cvk_tl_table_answer, (uint8_t *)table_data);
|
||||
test_put_tensor_g2l_comp(ctx, bmk, cvk_tl_table_answer_mantissa, (uint8_t *)table_data_mantissa);
|
||||
|
||||
cvm_emit_sqrt(bmk, tl_ifmap, tl_buf, cvk_tl_table_answer, cvk_tl_table_answer_mantissa,
|
||||
tl_ofmap_bf16);
|
||||
|
||||
// issue cmd
|
||||
test_submit_comp(ctx, bmk);
|
||||
|
||||
// get output from lmem->sys
|
||||
uint16_t *ofmap_data = (uint16_t *)test_get_tensor_l2g_comp(ctx, bmk, tl_ofmap_bf16);
|
||||
|
||||
verify(ofmap_data, ref_data, ifmap, ifmap_shape_size, mode);
|
||||
|
||||
free_tl(bmk, cvk_tl_table_answer_mantissa);
|
||||
free_tl(bmk, cvk_tl_table_answer);
|
||||
free_tl(bmk, tl_buf);
|
||||
free_tl(bmk, tl_ofmap_bf16);
|
||||
free_tl(bmk, tl_ifmap);
|
||||
|
||||
free(ifmap);
|
||||
free(ref_data);
|
||||
free(ofmap_data);
|
||||
free(table_data);
|
||||
free(table_data_mantissa);
|
||||
}
|
||||
|
||||
int main() {
|
||||
CVI_RT_HANDLE ctx;
|
||||
cvk_context_t *bmk;
|
||||
int round_mode;
|
||||
|
||||
round_mode = set_store_feround();
|
||||
|
||||
test_init(&ctx, &bmk);
|
||||
|
||||
for (int i = PRE_DATA_COMPARE_FIX; i < TEST_MODE_MAX; i++) {
|
||||
mode = static_cast<TEST_MODE>(i);
|
||||
printf("test mode %d...\n", mode);
|
||||
|
||||
int input_n = 1;
|
||||
int input_c = 32;
|
||||
int input_h = 1;
|
||||
int input_w = 1;
|
||||
|
||||
if (mode == PRE_DATA_COMPARE_FIX) {
|
||||
input_h = 4;
|
||||
input_w = 8;
|
||||
} else {
|
||||
input_h = input_w = 16;
|
||||
}
|
||||
|
||||
testbench(&ctx, bmk, input_n, input_c, input_h, input_w);
|
||||
}
|
||||
|
||||
test_exit(&ctx, bmk);
|
||||
restore_feround(round_mode);
|
||||
return 0;
|
||||
}
|
||||
383
cvimath/tests/include/test_native_ref.h
Normal file
383
cvimath/tests/include/test_native_ref.h
Normal file
@ -0,0 +1,383 @@
|
||||
#ifndef _BM_NATIVE_REF_H_
|
||||
#define _BM_NATIVE_REF_H_
|
||||
|
||||
#include <stdbool.h>
|
||||
#include <stddef.h>
|
||||
#include <stdint.h>
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
typedef union {
|
||||
uint32_t ival;
|
||||
float fval;
|
||||
} IF_VAL;
|
||||
|
||||
/*
|
||||
* fp32 version
|
||||
*/
|
||||
|
||||
int array_cmp_float(const char *const info, float *p_exp, float *p_got, int count, float delta);
|
||||
int array_cmp_int(const char *const info, int *p_exp, int *p_got, int count);
|
||||
|
||||
/**
|
||||
* @name calc_dilute_hw
|
||||
* @brief calculate diluted dimention
|
||||
* @ingroup libbmutils
|
||||
*
|
||||
* @param [in] h origin dimention
|
||||
* @param [in] ins_h scaleing factor, 0 -> no scaling
|
||||
* @param [in] ins_h_l compensation value after last value in each row
|
||||
* @param [in] pad_h_b extra padding left ofr bottom
|
||||
* @param [in] pad_h_t extra padding right or top
|
||||
*
|
||||
* @retval diluted value
|
||||
*/
|
||||
int calc_dilute_hw(int h, int ins_h, int ins_h_l, int pad_h_b, int pad_h_t);
|
||||
|
||||
/**
|
||||
* @name calc_output_hw
|
||||
* @brief calculate output dimention by kernel and stride size
|
||||
* @ingroup libbmutils
|
||||
*
|
||||
* @param [in] hw origin dimention
|
||||
* @param [in] kwh scaling factor, 0 -> no scaling
|
||||
* @param [in] stride compensation value after last value in each row
|
||||
*
|
||||
* @retval output dimention
|
||||
*/
|
||||
int calc_output_hw(int hw, int khw, int stride);
|
||||
|
||||
/**
|
||||
* @name fill_pad_fmap_fp32
|
||||
* @brief fill padded feature map with unpadded map
|
||||
* @ingroup libbmutils
|
||||
*
|
||||
* @param [in] before input array
|
||||
* @param [out] pbefore output array reference, if NULL, alloc a new one
|
||||
* @param [in] pad_val padding value
|
||||
* @param [in] pad_l padding left size
|
||||
* @param [in] pad_r padding right size
|
||||
* @param [in] pad_t padding top size
|
||||
* @param [in] pad_b padding bottom size
|
||||
* @param [in] ins_h scaling factor h
|
||||
* @param [in] ins_w scaling factor w
|
||||
* @param [in] ins_h_last compensation value after last value in each row
|
||||
* @param [in] ins_w_last compensation value after last value in each col
|
||||
* @param [in] h_before origin height
|
||||
* @param [in] w_before origin width
|
||||
*
|
||||
* @retval BM_SUCCESS success
|
||||
* @retval BM_ERR_INVALID_ARGUMENT before or pafter is null pointer
|
||||
* @retval BM_ERR_NOMEM can't alloc new output array
|
||||
*/
|
||||
int fill_pad_fmap_fp32(const float *before, float **after, float pad_value, int pad_t, int pad_b,
|
||||
int pad_l, int pad_r, int ins_h, int ins_w, int ins_h_last, int ins_w_last,
|
||||
int h_before, int w_before);
|
||||
|
||||
void native_md_scalar(float *a, float *b, float *r, int N, int C, int H, int W, int op,
|
||||
bool result_add);
|
||||
|
||||
void native_conv_ref(const void *ifmap, void *ofmap, const void *weight, int input_n, int input_c,
|
||||
int input_h, int input_w, int output_c, int output_h, int output_w, int groups,
|
||||
int kh, int kw, int dilation_h, int dilation_w, int pad_h, int pad_w,
|
||||
int stride_h, int stride_w, int flip, int using_bias, const void *bias,
|
||||
int result_add);
|
||||
|
||||
void native_pooling_forward_max(const float *bottom_data, float *top_data, int *mask_data,
|
||||
const int count, const int num, const int channels,
|
||||
const int height, const int width, const int pooled_height,
|
||||
const int pooled_width, const int kernel_h, const int kernel_w,
|
||||
const int stride_h, const int stride_w, const int pad_h,
|
||||
const int pad_w);
|
||||
|
||||
void native_pooling_forward_ave(const float *bottom_data, float *top_data, const int count,
|
||||
const int num, const int channels, const int height,
|
||||
const int width, const int pooled_height, const int pooled_width,
|
||||
const int kernel_h, const int kernel_w, const int stride_h,
|
||||
const int stride_w, const int pad_h, const int pad_w);
|
||||
|
||||
/*
|
||||
* int8 vresion
|
||||
*/
|
||||
|
||||
/**
|
||||
* @name array_cmp_int8
|
||||
* @brief compare the contect of p_exp and p_got and print the error index
|
||||
* and value
|
||||
* @ingroup libbmutils
|
||||
*
|
||||
* @param [in] info informataion string printed when encounter error
|
||||
* @param [in] p_exp input array
|
||||
* @param [in] p_got length of input array
|
||||
* @param [in] len length of input array
|
||||
* @retval 0 no error
|
||||
* @retval -1 error occur
|
||||
*/
|
||||
int array_cmp_int8(const char *const info, const int8_t *p_exp, const int8_t *p_got, int count);
|
||||
|
||||
/**
|
||||
* @name fill_pad_fmap_int8
|
||||
* @brief fill padded feature map with unpadded map
|
||||
* @ingroup libbmutils
|
||||
*
|
||||
* @param [in] before input array
|
||||
* @param [out] pbefore output array reference, if NULL, alloc a new one
|
||||
* @param [in] pad_val padding value
|
||||
* @param [in] pad_l padding left size
|
||||
* @param [in] pad_r padding right size
|
||||
* @param [in] pad_t padding top size
|
||||
* @param [in] pad_b padding bottom size
|
||||
* @param [in] ins_h scaling factor h
|
||||
* @param [in] ins_w scaling factor w
|
||||
* @param [in] ins_h_last compensation value after last value in each row
|
||||
* @param [in] ins_w_last compensation value after last value in each col
|
||||
* @param [in] h_before origin height
|
||||
* @param [in] w_before origin width
|
||||
*
|
||||
* @retval BM_SUCCESS success
|
||||
* @retval BM_ERR_INVALID_ARGUMENT before or pafter is null pointer
|
||||
* @retval BM_ERR_NOMEM can't alloc new output array
|
||||
*/
|
||||
int fill_pad_fmap_int8(const int8_t *before, int8_t **pafter, int pad_val, int pad_l, int pad_r,
|
||||
int pad_t, int pad_b, int ins_h, int ins_w, int ins_h_last, int ins_w_last,
|
||||
int h_before, int w_before);
|
||||
|
||||
int fill_pad_fmap_bf16(const unsigned short *before, unsigned short **pafter, int pad_val,
|
||||
int pad_l, int pad_r, int pad_t, int pad_b, int ins_h, int ins_w,
|
||||
int ins_h_last, int ins_w_last, int h_before, int w_before);
|
||||
|
||||
/**
|
||||
* @name fill_int_with_int8
|
||||
* @brief (int) pdest[i] = (int8_t)pdest[i] for each element
|
||||
* @ingroup libbmutils
|
||||
*
|
||||
* @param [out] pdest output array
|
||||
* @param [in] psrc input array
|
||||
* @param [in] len length of input array
|
||||
*/
|
||||
void fill_int_with_int8(int *pdest, int8_t *psrc, int len);
|
||||
|
||||
/**
|
||||
* @name fill_int_with_uint8
|
||||
* @brief (int) pdest[i] = (int16_t)pdest[i] for each element
|
||||
* @ingroup libbmutils
|
||||
*
|
||||
* @param [out] pdest output array
|
||||
* @param [in] psrc input array
|
||||
* @param [in] len length of input array
|
||||
*/
|
||||
void fill_int_with_uint8(int *pdest, uint8_t *psrc, int len);
|
||||
|
||||
/**
|
||||
* @name fill_int_with_int16
|
||||
* @brief (int) pdest[i] = (int16_t)pdest[i] for each element
|
||||
* @ingroup libbmutils
|
||||
*
|
||||
* @param [out] pdest output array
|
||||
* @param [in] psrc input array
|
||||
* @param [in] len length of input array
|
||||
*/
|
||||
void fill_int_with_int16(int *pdest, int16_t *psrc, int len);
|
||||
|
||||
void native_md_scalar_int8(int8_t *a, int8_t *b, int8_t *r, int N, int C, int H, int W, int op,
|
||||
bool result_add);
|
||||
|
||||
/**
|
||||
* @name inner_product
|
||||
* @brief inner product of two array
|
||||
* @ingroup libbmutils
|
||||
*
|
||||
* @param [in] a input array 0
|
||||
* @param [in] b input array 1
|
||||
* @param [in] len length of a or b
|
||||
* @param [out] c store the summation
|
||||
*/
|
||||
void inner_product(const int *a, const int *b, int len, int *c);
|
||||
void inner_float_product(const float *a, const float *b, int len, float *c);
|
||||
|
||||
/**
|
||||
* @name native_conv_int8
|
||||
* @brief do convolution specific 8bit feature map
|
||||
* @ingroup libbmutils
|
||||
*
|
||||
* @param [in] ifmap input array
|
||||
* @param [in] weight weight data array
|
||||
* @param [in] bias bias array if !NULL, add bias
|
||||
* @param [out] ofmap lenght of input array
|
||||
* @param [in] in input batch size
|
||||
* @param [in] ic input channel size
|
||||
* @param [in] ih input height
|
||||
* @param [in] iw input width
|
||||
* @param [in] oc output channle size
|
||||
* @param [in] kh kernel height
|
||||
* @param [in] kw kernel width
|
||||
* @param [in] dh kernel dilute height factor
|
||||
* @param [in] dw kernel dilute width factor
|
||||
* @param [in] pad_h_t padding top size
|
||||
* @param [in] pad_h_b padding bottom size
|
||||
* @param [in] pad_w_l padding left size
|
||||
* @param [in] pad_w_r padding right size
|
||||
* @param [in] stride_h stride height
|
||||
* @param [in] stride_w stride width
|
||||
* @param [in] ins_h insert extra element for each i_fmap row
|
||||
* @param [in] ins_w insert extra element for each i_fmap col
|
||||
* @param [in] ins_h_last insert extra element for last i_fmap row
|
||||
* @param [in] ins_w_last insert extra element for last i_fmap col
|
||||
* @param [in] input_sign i_fmap data type. 0 => signed, 1 => unsigned
|
||||
* @param [in] r_shift_width scale bit for saturation
|
||||
*
|
||||
* @retval BM_SUCCESS success
|
||||
* @retval other saturation failed
|
||||
*/
|
||||
int native_conv_int8(const int8_t *ifmap, const int8_t *weight, const int16_t *bias, int8_t *ofmap,
|
||||
int in, int ic, int ih, int iw, int oc, int kh, int kw, int dh, int dw,
|
||||
int pad_h_t, int pad_h_b, int pad_w_l, int pad_w_r, int stride_h, int stride_w,
|
||||
int ins_h, int ins_w, int ins_h_last, int ins_w_last, int input_sign,
|
||||
int r_shift_width, int do_relu);
|
||||
|
||||
/**
|
||||
* @name native_fc_int8
|
||||
* @brief do full-connected layer for specific feature map
|
||||
* @ingroup libbmutils
|
||||
*
|
||||
* @param [in] L input array
|
||||
* @param [in] R weight array
|
||||
* @param [in] B bias array if !NULL, add bias
|
||||
* @param [in] Y accumulation array if !NULL, add this
|
||||
* @param [out] Y_ref output array
|
||||
* @param [in] L_row_num input row size
|
||||
* @param [in] L_col_num input col size
|
||||
* @param [in] R_col_num weight
|
||||
* @param [in] L_sign padding top size
|
||||
* @param [in] R_sign padding top size
|
||||
* @param [in] B_sign padding top size
|
||||
* @param [in] L_shift_width padding top size
|
||||
* @param [in] R_shift_width padding top size
|
||||
* @param [in] is_result_int8 padding top size
|
||||
* @param [in] do_relu padding top size
|
||||
*
|
||||
* @retval BM_SUCCESS success
|
||||
* @retval other saturation failed
|
||||
*/
|
||||
int native_fc_int8(const int8_t *L, const int8_t *R, const int16_t *B, const int16_t *Y, int *Y_ref,
|
||||
int L_row_num, int L_col_num, int R_col_num, int L_sign, int R_sign, int B_sign,
|
||||
int l_shift_width, int r_shift_width, int is_result_int8, int do_relu);
|
||||
|
||||
/**
|
||||
* @name native_pooling_ave_int8
|
||||
* @brief do average pooling for specific feature map
|
||||
* @ingroup libbmutils
|
||||
*
|
||||
* @param [in] i_fmap input array
|
||||
* @param [in] weight weight data array
|
||||
* @param [in] bias bias array if !NULL, add bias
|
||||
* @param [out] o_fmap lenght of input array
|
||||
* @param [in] pad_h_t padding top size
|
||||
* @param [in] pad_h_b padding bottom size
|
||||
* @param [in] pad_w_l padding left size
|
||||
* @param [in] pad_w_r padding right size
|
||||
* @param [in] stride_h stride height
|
||||
* @param [in] stride_w stride width
|
||||
* @param [in] ins_h insert extra element for each i_fmap row
|
||||
* @param [in] ins_w insert extra element for each i_fmap col
|
||||
* @param [in] ins_h_last insert extra element for last i_fmap row
|
||||
* @param [in] ins_w_last insert extra element for last i_fmap col
|
||||
* @param [in] input_sign i_fmap data type. 0 => signed, 1 => unsigned
|
||||
* @param [in] satu_sign saturation data type. 0 => unsigned, 1 => signed
|
||||
* @param [in] r_shift_width scale bit for saturation
|
||||
* @param [in] const_weight if weight array has one uint8_t value
|
||||
*
|
||||
* @retval BM_SUCCESS success
|
||||
* @retval BM_ERR_INVALID_ARGUMENT illegal kh/kw or r_shift_width
|
||||
*/
|
||||
int native_pooling_ave_int8(const int8_t *i_fmap, const void *weight, const int16_t *bias,
|
||||
int8_t *o_fmap, int input_n, int input_c, int input_h, int input_w,
|
||||
int kh, int kw, int pad_h_t, int pad_h_b, int pad_w_l, int pad_w_r,
|
||||
int stride_h, int stride_w, int ins_w, int ins_h, int ins_w_last,
|
||||
int ins_h_last, int input_sign, int satu_sign, int r_shift_width,
|
||||
int const_weight);
|
||||
|
||||
/**
|
||||
* @name native_pooling_max_int8
|
||||
* @brief do max pooling for specific feature map
|
||||
* @ingroup libbmutils
|
||||
*
|
||||
* @param [in] i_fmap input array
|
||||
* @param [out] o_fmap lenght of input array
|
||||
* @param [in] pad_h_t padding top size
|
||||
* @param [in] pad_h_b padding bottom size
|
||||
* @param [in] pad_w_l padding left size
|
||||
* @param [in] pad_w_r padding right size
|
||||
* @param [in] stride_h stride height
|
||||
* @param [in] stride_w stride width
|
||||
* @param [in] ins_h insert extra element for each i_fmap row
|
||||
* @param [in] ins_w insert extra element for each i_fmap col
|
||||
* @param [in] ins_h_last insert extra element for last i_fmap row
|
||||
* @param [in] ins_w_last insert extra element for last i_fmap col
|
||||
* @param [in] input_sign i_fmap data type. 0 => unsigned, 1 => signed
|
||||
*
|
||||
* @retval BM_SUCCESS success
|
||||
* @retval BM_ERR_INVALID_ARGUMENT illegal ins_h/w or ins_[hw]_last
|
||||
*/
|
||||
int native_pooling_max_int8(const int8_t *i_fmap, int8_t *o_fmap, int input_n, int input_c,
|
||||
int input_h, int input_w, int kh, int kw, int pad_h_t, int pad_h_b,
|
||||
int pad_w_l, int pad_w_r, int stride_h, int stride_w, int ins_h,
|
||||
int ins_w, int ins_h_last, int ins_w_last, int input_sign);
|
||||
|
||||
int native_pooling_max_fp32(const float *i_fmap, float *o_fmap, int input_n, int input_c,
|
||||
int input_h, int input_w, int kh, int kw, int pad_h_t, int pad_h_b,
|
||||
int pad_w_l, int pad_w_r, int stride_h, int stride_w, int ins_h,
|
||||
int ins_w, int ins_h_last, int ins_w_last);
|
||||
|
||||
int native_pooling_avg_fp32(const float *i_fmap, float *o_fmap, int input_n, int input_c,
|
||||
int input_h, int input_w, int kh, int kw, int pad_h_t, int pad_h_b,
|
||||
int pad_w_l, int pad_w_r, int stride_h, int stride_w, int ins_h,
|
||||
int ins_w, int ins_h_last, int ins_w_last, float avg_pooling_const);
|
||||
|
||||
int native_depthwise_fp32(const float *ifmap, const float *weight, const float *bias, float *ofmap,
|
||||
int in, int ic, int ih, int iw, int kh, int kw, int dh, int dw,
|
||||
int pad_h_t, int pad_h_b, int pad_w_l, int pad_w_r, int stride_h,
|
||||
int stride_w, int ins_h, int ins_w, int ins_h_last, int ins_w_last);
|
||||
|
||||
/**
|
||||
* @name satu_2_8bit
|
||||
* @brief saturate each signed or unsiged 8bit element in array
|
||||
* @ingroup libbmutils
|
||||
*
|
||||
* @param [in] pBuff input array
|
||||
* @param [in] len lenght of input array
|
||||
* @param [out] pyByteOut output array
|
||||
* @param [in] rshiftbits right shift bit if round_floor && value != 0
|
||||
* @param [in] round_floor enable floor rounding
|
||||
* @param [in] sign_unsign 0 => unsigned, 1 => signed
|
||||
*
|
||||
* @retval BM_SUCCESS success
|
||||
* @retval BM_ERR_INVALID_ARGUMENT rshiftbits < 0
|
||||
*/
|
||||
int satu_2_8bit(const int *pBuff, int len, int8_t *pByteOut, int rshiftbits, int round_floor,
|
||||
int sign_unsign);
|
||||
|
||||
/**
|
||||
* @name satu_2_16bit
|
||||
* @brief saturate each signed or unsiged 16bit element in array
|
||||
* @ingroup libbmutils
|
||||
*
|
||||
* @param [in] pBuff input array
|
||||
* @param [in] len lenght of input array
|
||||
* @param [out] pyByteOut output array
|
||||
* @param [in] rshiftbits right shift bit if round_floor && value != 0
|
||||
* @param [in] round_floor enable floor rounding
|
||||
* @param [in] sign_unsign 0 => unsigned, 1 => signed
|
||||
*
|
||||
* @retval BM_SUCCESS success
|
||||
* @retval BM_ERR_INVALID_ARGUMENT rshiftbits < 0
|
||||
*/
|
||||
int satu_2_16bit(const int *pBuff, int len, short *pByteOut, int rshiftbits, int round_floor,
|
||||
int sign_unsign);
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif /* _BM_NATIVE_REF_H_ */
|
||||
41
cvimath/tests/include/test_tf_quant_util.h
Normal file
41
cvimath/tests/include/test_tf_quant_util.h
Normal file
@ -0,0 +1,41 @@
|
||||
#ifndef TEST_TF_QUANT_UTIL_H
|
||||
#define TEST_TF_QUANT_UTIL_H
|
||||
|
||||
#include <stdint.h>
|
||||
|
||||
#define MAX(a, b) \
|
||||
({ \
|
||||
__typeof__(a) _a = (a); \
|
||||
__typeof__(b) _b = (b); \
|
||||
_a > _b ? _a : _b; \
|
||||
})
|
||||
|
||||
#define MIN(a, b) \
|
||||
({ \
|
||||
__typeof__(a) _a = (a); \
|
||||
__typeof__(b) _b = (b); \
|
||||
_a > _b ? _b : _a; \
|
||||
})
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
int32_t RoundingDivideByPOT(int32_t x, int exponent);
|
||||
int32_t SaturatingRoundingDoublingHighMul(int32_t a, int32_t b);
|
||||
int32_t MultiplyByQuantizedMultiplier(int32_t x, int32_t quantized_multiplier, int rshift);
|
||||
void QuantizeMultiplierSmallerThanOne(float real_multiplier, uint32_t *quantized_multiplier,
|
||||
int *right_shift);
|
||||
|
||||
void pack_chl_quan_param(uint32_t channels, int has_bias, int32_t *bias, uint32_t *multiplier,
|
||||
int8_t *rshift, uint8_t *packed_data);
|
||||
|
||||
// 1880v2: 5bit right shift, [0, 31]
|
||||
// 1822: 1bit sign, 5b shift, [-32, 31]
|
||||
int8_t truncate_rshift(int8_t rshift, int8_t allow_lshift);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif // TEST_TF_QUANT_UTIL_H
|
||||
Reference in New Issue
Block a user