#ifndef _BM_NATIVE_REF_H_ #define _BM_NATIVE_REF_H_ #include #include #include #ifdef __cplusplus extern "C" { #endif typedef union { uint32_t ival; float fval; } IF_VAL; /* * fp32 version */ int array_cmp_float(const char *const info, float *p_exp, float *p_got, int count, float delta); int array_cmp_int(const char *const info, int *p_exp, int *p_got, int count); /** * @name calc_dilute_hw * @brief calculate diluted dimention * @ingroup libbmutils * * @param [in] h origin dimention * @param [in] ins_h scaleing factor, 0 -> no scaling * @param [in] ins_h_l compensation value after last value in each row * @param [in] pad_h_b extra padding left ofr bottom * @param [in] pad_h_t extra padding right or top * * @retval diluted value */ int calc_dilute_hw(int h, int ins_h, int ins_h_l, int pad_h_b, int pad_h_t); /** * @name calc_output_hw * @brief calculate output dimention by kernel and stride size * @ingroup libbmutils * * @param [in] hw origin dimention * @param [in] kwh scaling factor, 0 -> no scaling * @param [in] stride compensation value after last value in each row * * @retval output dimention */ int calc_output_hw(int hw, int khw, int stride); /** * @name fill_pad_fmap_fp32 * @brief fill padded feature map with unpadded map * @ingroup libbmutils * * @param [in] before input array * @param [out] pbefore output array reference, if NULL, alloc a new one * @param [in] pad_val padding value * @param [in] pad_l padding left size * @param [in] pad_r padding right size * @param [in] pad_t padding top size * @param [in] pad_b padding bottom size * @param [in] ins_h scaling factor h * @param [in] ins_w scaling factor w * @param [in] ins_h_last compensation value after last value in each row * @param [in] ins_w_last compensation value after last value in each col * @param [in] h_before origin height * @param [in] w_before origin width * * @retval BM_SUCCESS success * @retval BM_ERR_INVALID_ARGUMENT before or pafter is null pointer * @retval BM_ERR_NOMEM can't alloc new output array */ int fill_pad_fmap_fp32(const float *before, float **after, float pad_value, int pad_t, int pad_b, int pad_l, int pad_r, int ins_h, int ins_w, int ins_h_last, int ins_w_last, int h_before, int w_before); void native_md_scalar(float *a, float *b, float *r, int N, int C, int H, int W, int op, bool result_add); void native_conv_ref(const void *ifmap, void *ofmap, const void *weight, int input_n, int input_c, int input_h, int input_w, int output_c, int output_h, int output_w, int groups, int kh, int kw, int dilation_h, int dilation_w, int pad_h, int pad_w, int stride_h, int stride_w, int flip, int using_bias, const void *bias, int result_add); void native_pooling_forward_max(const float *bottom_data, float *top_data, int *mask_data, const int count, const int num, const int channels, const int height, const int width, const int pooled_height, const int pooled_width, const int kernel_h, const int kernel_w, const int stride_h, const int stride_w, const int pad_h, const int pad_w); void native_pooling_forward_ave(const float *bottom_data, float *top_data, const int count, const int num, const int channels, const int height, const int width, const int pooled_height, const int pooled_width, const int kernel_h, const int kernel_w, const int stride_h, const int stride_w, const int pad_h, const int pad_w); /* * int8 vresion */ /** * @name array_cmp_int8 * @brief compare the contect of p_exp and p_got and print the error index * and value * @ingroup libbmutils * * @param [in] info informataion string printed when encounter error * @param [in] p_exp input array * @param [in] p_got length of input array * @param [in] len length of input array * @retval 0 no error * @retval -1 error occur */ int array_cmp_int8(const char *const info, const int8_t *p_exp, const int8_t *p_got, int count); /** * @name fill_pad_fmap_int8 * @brief fill padded feature map with unpadded map * @ingroup libbmutils * * @param [in] before input array * @param [out] pbefore output array reference, if NULL, alloc a new one * @param [in] pad_val padding value * @param [in] pad_l padding left size * @param [in] pad_r padding right size * @param [in] pad_t padding top size * @param [in] pad_b padding bottom size * @param [in] ins_h scaling factor h * @param [in] ins_w scaling factor w * @param [in] ins_h_last compensation value after last value in each row * @param [in] ins_w_last compensation value after last value in each col * @param [in] h_before origin height * @param [in] w_before origin width * * @retval BM_SUCCESS success * @retval BM_ERR_INVALID_ARGUMENT before or pafter is null pointer * @retval BM_ERR_NOMEM can't alloc new output array */ int fill_pad_fmap_int8(const int8_t *before, int8_t **pafter, int pad_val, int pad_l, int pad_r, int pad_t, int pad_b, int ins_h, int ins_w, int ins_h_last, int ins_w_last, int h_before, int w_before); int fill_pad_fmap_bf16(const unsigned short *before, unsigned short **pafter, int pad_val, int pad_l, int pad_r, int pad_t, int pad_b, int ins_h, int ins_w, int ins_h_last, int ins_w_last, int h_before, int w_before); /** * @name fill_int_with_int8 * @brief (int) pdest[i] = (int8_t)pdest[i] for each element * @ingroup libbmutils * * @param [out] pdest output array * @param [in] psrc input array * @param [in] len length of input array */ void fill_int_with_int8(int *pdest, int8_t *psrc, int len); /** * @name fill_int_with_uint8 * @brief (int) pdest[i] = (int16_t)pdest[i] for each element * @ingroup libbmutils * * @param [out] pdest output array * @param [in] psrc input array * @param [in] len length of input array */ void fill_int_with_uint8(int *pdest, uint8_t *psrc, int len); /** * @name fill_int_with_int16 * @brief (int) pdest[i] = (int16_t)pdest[i] for each element * @ingroup libbmutils * * @param [out] pdest output array * @param [in] psrc input array * @param [in] len length of input array */ void fill_int_with_int16(int *pdest, int16_t *psrc, int len); void native_md_scalar_int8(int8_t *a, int8_t *b, int8_t *r, int N, int C, int H, int W, int op, bool result_add); /** * @name inner_product * @brief inner product of two array * @ingroup libbmutils * * @param [in] a input array 0 * @param [in] b input array 1 * @param [in] len length of a or b * @param [out] c store the summation */ void inner_product(const int *a, const int *b, int len, int *c); void inner_float_product(const float *a, const float *b, int len, float *c); /** * @name native_conv_int8 * @brief do convolution specific 8bit feature map * @ingroup libbmutils * * @param [in] ifmap input array * @param [in] weight weight data array * @param [in] bias bias array if !NULL, add bias * @param [out] ofmap lenght of input array * @param [in] in input batch size * @param [in] ic input channel size * @param [in] ih input height * @param [in] iw input width * @param [in] oc output channle size * @param [in] kh kernel height * @param [in] kw kernel width * @param [in] dh kernel dilute height factor * @param [in] dw kernel dilute width factor * @param [in] pad_h_t padding top size * @param [in] pad_h_b padding bottom size * @param [in] pad_w_l padding left size * @param [in] pad_w_r padding right size * @param [in] stride_h stride height * @param [in] stride_w stride width * @param [in] ins_h insert extra element for each i_fmap row * @param [in] ins_w insert extra element for each i_fmap col * @param [in] ins_h_last insert extra element for last i_fmap row * @param [in] ins_w_last insert extra element for last i_fmap col * @param [in] input_sign i_fmap data type. 0 => signed, 1 => unsigned * @param [in] r_shift_width scale bit for saturation * * @retval BM_SUCCESS success * @retval other saturation failed */ int native_conv_int8(const int8_t *ifmap, const int8_t *weight, const int16_t *bias, int8_t *ofmap, int in, int ic, int ih, int iw, int oc, int kh, int kw, int dh, int dw, int pad_h_t, int pad_h_b, int pad_w_l, int pad_w_r, int stride_h, int stride_w, int ins_h, int ins_w, int ins_h_last, int ins_w_last, int input_sign, int r_shift_width, int do_relu); /** * @name native_fc_int8 * @brief do full-connected layer for specific feature map * @ingroup libbmutils * * @param [in] L input array * @param [in] R weight array * @param [in] B bias array if !NULL, add bias * @param [in] Y accumulation array if !NULL, add this * @param [out] Y_ref output array * @param [in] L_row_num input row size * @param [in] L_col_num input col size * @param [in] R_col_num weight * @param [in] L_sign padding top size * @param [in] R_sign padding top size * @param [in] B_sign padding top size * @param [in] L_shift_width padding top size * @param [in] R_shift_width padding top size * @param [in] is_result_int8 padding top size * @param [in] do_relu padding top size * * @retval BM_SUCCESS success * @retval other saturation failed */ int native_fc_int8(const int8_t *L, const int8_t *R, const int16_t *B, const int16_t *Y, int *Y_ref, int L_row_num, int L_col_num, int R_col_num, int L_sign, int R_sign, int B_sign, int l_shift_width, int r_shift_width, int is_result_int8, int do_relu); /** * @name native_pooling_ave_int8 * @brief do average pooling for specific feature map * @ingroup libbmutils * * @param [in] i_fmap input array * @param [in] weight weight data array * @param [in] bias bias array if !NULL, add bias * @param [out] o_fmap lenght of input array * @param [in] pad_h_t padding top size * @param [in] pad_h_b padding bottom size * @param [in] pad_w_l padding left size * @param [in] pad_w_r padding right size * @param [in] stride_h stride height * @param [in] stride_w stride width * @param [in] ins_h insert extra element for each i_fmap row * @param [in] ins_w insert extra element for each i_fmap col * @param [in] ins_h_last insert extra element for last i_fmap row * @param [in] ins_w_last insert extra element for last i_fmap col * @param [in] input_sign i_fmap data type. 0 => signed, 1 => unsigned * @param [in] satu_sign saturation data type. 0 => unsigned, 1 => signed * @param [in] r_shift_width scale bit for saturation * @param [in] const_weight if weight array has one uint8_t value * * @retval BM_SUCCESS success * @retval BM_ERR_INVALID_ARGUMENT illegal kh/kw or r_shift_width */ int native_pooling_ave_int8(const int8_t *i_fmap, const void *weight, const int16_t *bias, int8_t *o_fmap, int input_n, int input_c, int input_h, int input_w, int kh, int kw, int pad_h_t, int pad_h_b, int pad_w_l, int pad_w_r, int stride_h, int stride_w, int ins_w, int ins_h, int ins_w_last, int ins_h_last, int input_sign, int satu_sign, int r_shift_width, int const_weight); /** * @name native_pooling_max_int8 * @brief do max pooling for specific feature map * @ingroup libbmutils * * @param [in] i_fmap input array * @param [out] o_fmap lenght of input array * @param [in] pad_h_t padding top size * @param [in] pad_h_b padding bottom size * @param [in] pad_w_l padding left size * @param [in] pad_w_r padding right size * @param [in] stride_h stride height * @param [in] stride_w stride width * @param [in] ins_h insert extra element for each i_fmap row * @param [in] ins_w insert extra element for each i_fmap col * @param [in] ins_h_last insert extra element for last i_fmap row * @param [in] ins_w_last insert extra element for last i_fmap col * @param [in] input_sign i_fmap data type. 0 => unsigned, 1 => signed * * @retval BM_SUCCESS success * @retval BM_ERR_INVALID_ARGUMENT illegal ins_h/w or ins_[hw]_last */ int native_pooling_max_int8(const int8_t *i_fmap, int8_t *o_fmap, int input_n, int input_c, int input_h, int input_w, int kh, int kw, int pad_h_t, int pad_h_b, int pad_w_l, int pad_w_r, int stride_h, int stride_w, int ins_h, int ins_w, int ins_h_last, int ins_w_last, int input_sign); int native_pooling_max_fp32(const float *i_fmap, float *o_fmap, int input_n, int input_c, int input_h, int input_w, int kh, int kw, int pad_h_t, int pad_h_b, int pad_w_l, int pad_w_r, int stride_h, int stride_w, int ins_h, int ins_w, int ins_h_last, int ins_w_last); int native_pooling_avg_fp32(const float *i_fmap, float *o_fmap, int input_n, int input_c, int input_h, int input_w, int kh, int kw, int pad_h_t, int pad_h_b, int pad_w_l, int pad_w_r, int stride_h, int stride_w, int ins_h, int ins_w, int ins_h_last, int ins_w_last, float avg_pooling_const); int native_depthwise_fp32(const float *ifmap, const float *weight, const float *bias, float *ofmap, int in, int ic, int ih, int iw, int kh, int kw, int dh, int dw, int pad_h_t, int pad_h_b, int pad_w_l, int pad_w_r, int stride_h, int stride_w, int ins_h, int ins_w, int ins_h_last, int ins_w_last); /** * @name satu_2_8bit * @brief saturate each signed or unsiged 8bit element in array * @ingroup libbmutils * * @param [in] pBuff input array * @param [in] len lenght of input array * @param [out] pyByteOut output array * @param [in] rshiftbits right shift bit if round_floor && value != 0 * @param [in] round_floor enable floor rounding * @param [in] sign_unsign 0 => unsigned, 1 => signed * * @retval BM_SUCCESS success * @retval BM_ERR_INVALID_ARGUMENT rshiftbits < 0 */ int satu_2_8bit(const int *pBuff, int len, int8_t *pByteOut, int rshiftbits, int round_floor, int sign_unsign); /** * @name satu_2_16bit * @brief saturate each signed or unsiged 16bit element in array * @ingroup libbmutils * * @param [in] pBuff input array * @param [in] len lenght of input array * @param [out] pyByteOut output array * @param [in] rshiftbits right shift bit if round_floor && value != 0 * @param [in] round_floor enable floor rounding * @param [in] sign_unsign 0 => unsigned, 1 => signed * * @retval BM_SUCCESS success * @retval BM_ERR_INVALID_ARGUMENT rshiftbits < 0 */ int satu_2_16bit(const int *pBuff, int len, short *pByteOut, int rshiftbits, int round_floor, int sign_unsign); #ifdef __cplusplus } #endif #endif /* _BM_NATIVE_REF_H_ */