commit 9f1f57a19c3c281a931dfc71b318494487193d56 Author: sophgo-forum-service <forum_service@sophgo.com> Date: Mon May 13 13:58:23 2024 +0800 [feat] cvikernel opensource for cv18xx soc. - 79b6a7, set lookup_interp_table layer_id.
1177 lines
33 KiB
C
1177 lines
33 KiB
C
#ifndef __BMKERNEL_1822_H__
|
|
#define __BMKERNEL_1822_H__
|
|
|
|
#include <bmkernel/bm_kernel.h>
|
|
|
|
#ifdef __cplusplus
|
|
extern "C" {
|
|
#endif
|
|
|
|
#define BMK1822_TIU 0 // Tensor Instruction Unit
|
|
#define BMK1822_CPU 1 // CPU, Reserved for common cpu op
|
|
#define BMK1822_TDMA 2 // TPU DMA
|
|
#define BMK1822_ENGINE_NUM 3 // Number of Engines
|
|
|
|
typedef struct bmk_context bmk1822_context_t;
|
|
typedef struct bmk_context bmk_context_t;
|
|
typedef cvk_chip_info_t bmk1822_chip_info_t;
|
|
|
|
typedef struct ec_desc bmk1822_op_t;
|
|
|
|
bmk1822_context_t * bmk1822_register(bmk_info_t *info);
|
|
void bmk1822_cleanup(bmk1822_context_t *ctx);
|
|
void bmk1822_reset(bmk1822_context_t *ctx);
|
|
uint8_t *bmk1822_acquire_cmdbuf(bmk1822_context_t *ctx, uint32_t *size);
|
|
void bmk1822_dmabuf_size(uint8_t *cmdbuf, uint32_t sz, uint32_t *psize, uint32_t *pmu_size);
|
|
void bmk1822_dmabuf_relocate(
|
|
uint8_t *dmabuf, uint64_t dmabuf_devaddr,
|
|
uint32_t original_size, uint32_t pmubuf_size);
|
|
void bmk1822_dmabuf_convert(uint8_t *cmdbuf, uint32_t sz, uint8_t *dmabuf);
|
|
void bmk1822_dmabuf_dump(uint8_t * dmabuf);
|
|
void bmk1822_arraybase_set(
|
|
uint8_t *dmabuf, uint32_t arraybase0L, uint32_t arraybase1L,
|
|
uint32_t arraybase0H, uint32_t arraybase1H);
|
|
|
|
|
|
void bmk1822_parallel_enable(bmk1822_context_t *ctx);
|
|
void bmk1822_set_op(bmk1822_context_t *ctx, void* op);
|
|
void* bmk1822_get_op(bmk1822_context_t *ctx);
|
|
void bmk1822_parallel_disable(bmk1822_context_t *ctx);
|
|
void bmk1822_set_layer_id(bmk1822_context_t *ctx, uint16_t layer_id);
|
|
uint16_t bmk1822_layer_id(bmk1822_context_t *ctx);
|
|
|
|
void bmk1822_create_streams(bmk1822_context_t *ctx, int nr_streams);
|
|
void bmk1822_destroy_streams(bmk1822_context_t *ctx);
|
|
void bmk1822_set_stream(bmk1822_context_t *ctx, int i);
|
|
|
|
void bmk1822_add_dependency(
|
|
bmk1822_context_t *ctx,
|
|
bmk1822_op_t *before,
|
|
bmk1822_op_t *after);
|
|
|
|
void bmk1822_cpu_op(
|
|
bmk1822_context_t *ctx,
|
|
const char* op_name, char *params, int size);
|
|
|
|
/*
|
|
* Fundamental structures for tensor and matrix
|
|
*/
|
|
|
|
typedef struct {
|
|
uint32_t n, c, w, col;
|
|
} bmk1822_matrix_lmem_shape_t;
|
|
|
|
typedef struct {
|
|
uint32_t row, col;
|
|
} bmk1822_matrix_tgmem_shape_t;
|
|
|
|
typedef struct {
|
|
uint32_t n, c, h;
|
|
} bmk1822_matrix_lmem_stride_t;
|
|
|
|
typedef struct {
|
|
uint32_t row;
|
|
} bmk1822_matrix_tgmem_stride_t;
|
|
|
|
typedef struct {
|
|
uint32_t n, c, h, w;
|
|
} bmk1822_tensor_lmem_shape_t;
|
|
|
|
typedef struct {
|
|
uint32_t n, c, h, w;
|
|
} bmk1822_tensor_tgmem_shape_t;
|
|
|
|
typedef struct {
|
|
uint32_t n, c, h, w;
|
|
} bmk1822_tensor_lmem_stride_t;
|
|
|
|
typedef struct {
|
|
uint32_t n, c, h;
|
|
} bmk1822_tensor_tgmem_stride_t;
|
|
|
|
typedef struct {
|
|
uint32_t start_address;
|
|
fmt_t fmt;
|
|
fmt_t cmprs_fmt;
|
|
bmk1822_tensor_lmem_shape_t shape;
|
|
bmk1822_tensor_lmem_stride_t stride;
|
|
uint8_t int8_rnd_mode; // <! 0 is round to nearset even, 1 is toward zero, currently used by lut
|
|
uint8_t eu_align;
|
|
} bmk1822_tensor_lmem_t;
|
|
|
|
typedef struct {
|
|
uint32_t start_address;
|
|
fmt_t fmt;
|
|
bmk1822_matrix_lmem_shape_t shape;
|
|
bmk1822_matrix_lmem_stride_t stride;
|
|
uint8_t int8_rnd_mode; // <! 0 is round to nearset even, 1 is toward zero
|
|
uint8_t eu_align;
|
|
} bmk1822_matrix_lmem_t;
|
|
|
|
typedef struct {
|
|
uint8_t base_reg_index;
|
|
uint64_t start_address;
|
|
fmt_t fmt;
|
|
bmk1822_tensor_tgmem_shape_t shape;
|
|
bmk1822_tensor_tgmem_stride_t stride;
|
|
uint8_t int8_rnd_mode; // <! 0 is round to nearset even, 1 is toward zero
|
|
} bmk1822_tensor_tgmem_t;
|
|
|
|
typedef struct {
|
|
bmk1822_tensor_tgmem_t t;
|
|
uint64_t reserved_size;
|
|
uint8_t bit_length; //<! deprecated for zero compress
|
|
uint8_t bias0;
|
|
uint8_t bias1;
|
|
uint8_t zero_guard_en;
|
|
} bmk1822_compressed_tensor_tgmem_t;
|
|
|
|
typedef struct {
|
|
uint8_t base_reg_index;
|
|
uint64_t start_address;
|
|
fmt_t fmt;
|
|
bmk1822_matrix_tgmem_shape_t shape;
|
|
bmk1822_matrix_tgmem_stride_t stride;
|
|
uint8_t int8_rnd_mode; // <! 0 is round to nearset even, 1 is toward zero
|
|
} bmk1822_matrix_tgmem_t;
|
|
|
|
typedef struct {
|
|
bmk1822_matrix_tgmem_t m;
|
|
uint8_t bias0;
|
|
uint8_t bias1;
|
|
uint8_t zero_guard_en;
|
|
} bmk1822_compressed_matrix_tgmem_t;
|
|
|
|
typedef struct {
|
|
uint8_t base_reg_index;
|
|
uint64_t start_address;
|
|
fmt_t fmt;
|
|
bmk1822_matrix_tgmem_shape_t shape;
|
|
bmk1822_matrix_tgmem_stride_t stride;
|
|
} bmk1822_matrix_bf16_tgmem_t;
|
|
|
|
/*
|
|
* TDMA Engine APIs: LMEM to LMEM (L2L)
|
|
*/
|
|
|
|
typedef struct {
|
|
uint8_t mv_lut_idx;
|
|
uint8_t mv_lut_base;
|
|
const bmk1822_tensor_lmem_t *src;
|
|
const bmk1822_tensor_lmem_t *dst;
|
|
uint8_t outstanding;
|
|
} bmk1822_tdma_l2l_tensor_copy_param_t;
|
|
|
|
bmk1822_op_t * bmk1822_tdma_l2l_tensor_copy(
|
|
bmk1822_context_t *ctx,
|
|
const bmk1822_tdma_l2l_tensor_copy_param_t *p);
|
|
|
|
bmk1822_op_t * bmk1822_tdma_l2l_bf16_tensor_copy(
|
|
bmk1822_context_t *ctx,
|
|
const bmk1822_tdma_l2l_tensor_copy_param_t *p);
|
|
|
|
typedef struct {
|
|
const bmk1822_tensor_lmem_t *src;
|
|
const bmk1822_tensor_lmem_t *dst;
|
|
int right_shift;
|
|
uint32_t lrn_step;
|
|
} bmk1822_tdma_l2l_tensor_lrn_shift_param_t;
|
|
|
|
bmk1822_op_t * bmk1822_tdma_l2l_tensor_lrn_shift(
|
|
bmk1822_context_t *ctx,
|
|
const bmk1822_tdma_l2l_tensor_lrn_shift_param_t *p);
|
|
|
|
/*
|
|
* TDMA Engine APIs: LMEM to GMEM (L2TG)
|
|
*/
|
|
|
|
typedef struct {
|
|
const bmk1822_tensor_lmem_t *src;
|
|
const bmk1822_tensor_tgmem_t *dst;
|
|
uint32_t intra_cmd_paral; // [0]: disable
|
|
// [1]: enable TDMA/TIU intra-command parallelism
|
|
} bmk1822_tdma_l2tg_tensor_copy_param_t;
|
|
|
|
bmk1822_op_t * bmk1822_tdma_l2t_tensor_copy(
|
|
bmk1822_context_t *ctx,
|
|
const bmk1822_tdma_l2tg_tensor_copy_param_t *p);
|
|
|
|
bmk1822_op_t * bmk1822_tdma_l2g_tensor_copy(
|
|
bmk1822_context_t *ctx,
|
|
const bmk1822_tdma_l2tg_tensor_copy_param_t *p);
|
|
|
|
bmk1822_op_t * bmk1822_tdma_l2g_bf16_tensor_copy(
|
|
bmk1822_context_t *ctx,
|
|
const bmk1822_tdma_l2tg_tensor_copy_param_t *p);
|
|
|
|
typedef struct {
|
|
const bmk1822_tensor_lmem_t *src;
|
|
const bmk1822_tensor_tgmem_t *dst;
|
|
} bmk1822_tdma_l2tg_tensor_copy_nc_transposed_param_t;
|
|
|
|
bmk1822_op_t * bmk1822_tdma_l2t_tensor_copy_nc_transposed(
|
|
bmk1822_context_t *ctx,
|
|
const bmk1822_tdma_l2tg_tensor_copy_nc_transposed_param_t *p);
|
|
|
|
bmk1822_op_t * bmk1822_tdma_l2g_tensor_copy_nc_transposed(
|
|
bmk1822_context_t *ctx,
|
|
const bmk1822_tdma_l2tg_tensor_copy_nc_transposed_param_t *p);
|
|
|
|
bmk1822_op_t * bmk1822_tdma_l2g_bf16_tensor_copy_nc_transposed(
|
|
bmk1822_context_t *ctx,
|
|
const bmk1822_tdma_l2tg_tensor_copy_nc_transposed_param_t *p);
|
|
|
|
typedef struct {
|
|
const bmk1822_tensor_lmem_t *src;
|
|
const bmk1822_tensor_tgmem_t *dst;
|
|
} bmk1822_tdma_l2tg_tensor_copy_cw_transposed_param_t;
|
|
|
|
bmk1822_op_t * bmk1822_tdma_l2t_tensor_copy_cw_transposed(
|
|
bmk1822_context_t *ctx,
|
|
const bmk1822_tdma_l2tg_tensor_copy_cw_transposed_param_t *p);
|
|
|
|
bmk1822_op_t * bmk1822_tdma_l2g_tensor_copy_cw_transposed(
|
|
bmk1822_context_t *ctx,
|
|
const bmk1822_tdma_l2tg_tensor_copy_cw_transposed_param_t *p);
|
|
|
|
bmk1822_op_t * bmk1822_tdma_l2g_bf16_tensor_copy_cw_transposed(
|
|
bmk1822_context_t *ctx,
|
|
const bmk1822_tdma_l2tg_tensor_copy_cw_transposed_param_t *p);
|
|
|
|
typedef struct {
|
|
const bmk1822_tensor_lmem_t *src;
|
|
const bmk1822_compressed_tensor_tgmem_t *dst;
|
|
uint32_t intra_cmd_paral; // [0]: disable
|
|
// [1]: enable TDMA/TIU intra-command parallelism
|
|
} bmk1822_tdma_l2tg_tensor_copy_compressed_param_t;
|
|
|
|
bmk1822_op_t * bmk1822_tdma_l2g_tensor_copy_compressed(
|
|
bmk1822_context_t *ctx,
|
|
const bmk1822_tdma_l2tg_tensor_copy_compressed_param_t *p);
|
|
|
|
typedef struct {
|
|
uint16_t constant;
|
|
const bmk1822_tensor_tgmem_t *dst;
|
|
} bmk1822_tdma_l2tg_tensor_fill_constant_param_t;
|
|
|
|
bmk1822_op_t * bmk1822_tdma_l2g_tensor_fill_constant(
|
|
bmk1822_context_t *ctx,
|
|
const bmk1822_tdma_l2tg_tensor_fill_constant_param_t *p);
|
|
|
|
typedef struct {
|
|
const bmk1822_matrix_lmem_t *src;
|
|
const bmk1822_matrix_tgmem_t *dst;
|
|
} bmk1822_tdma_l2tg_matrix_copy_param_t;
|
|
|
|
bmk1822_op_t * bmk1822_tdma_l2g_matrix_copy(
|
|
bmk1822_context_t *ctx,
|
|
const bmk1822_tdma_l2tg_matrix_copy_param_t *p);
|
|
|
|
bmk1822_op_t * bmk1822_tdma_l2g_bf16_matrix_copy(
|
|
bmk1822_context_t *ctx,
|
|
const bmk1822_tdma_l2tg_matrix_copy_param_t *p);
|
|
|
|
typedef struct {
|
|
uint32_t src_address;
|
|
uint8_t dst_base_reg_index;
|
|
uint64_t dst_address;
|
|
uint32_t bytes;
|
|
} bmk1822_tdma_l2tg_general_copy_param_t;
|
|
|
|
typedef struct {
|
|
uint32_t src_address;
|
|
uint8_t dst_base_reg_index;
|
|
uint64_t dst_address;
|
|
uint32_t src_bytes;
|
|
fmt_t src_fmt;
|
|
fmt_t dst_fmt;
|
|
} bmk1822_tdma_l2tg_bf16_general_copy_param_t;
|
|
|
|
bmk1822_op_t * bmk1822_tdma_l2t_general_copy(
|
|
bmk1822_context_t *ctx,
|
|
const bmk1822_tdma_l2tg_general_copy_param_t *p);
|
|
|
|
bmk1822_op_t * bmk1822_tdma_l2g_general_copy(
|
|
bmk1822_context_t *ctx,
|
|
const bmk1822_tdma_l2tg_general_copy_param_t *p);
|
|
|
|
bmk1822_op_t * bmk1822_tdma_l2g_bf16_general_copy(
|
|
bmk1822_context_t *ctx,
|
|
const bmk1822_tdma_l2tg_bf16_general_copy_param_t *p);
|
|
|
|
/*
|
|
* TDMA Engine APIs: GMEM to LMEM (TG2L)
|
|
*/
|
|
|
|
typedef struct {
|
|
const bmk1822_tensor_tgmem_t *src;
|
|
const bmk1822_tensor_lmem_t *dst;
|
|
uint32_t intra_cmd_paral; // [0]: disable
|
|
// [1]: enable TDMA/TIU intra-command parallelism
|
|
} bmk1822_tdma_tg2l_tensor_copy_param_t;
|
|
|
|
bmk1822_op_t * bmk1822_tdma_t2l_tensor_copy(
|
|
bmk1822_context_t *ctx,
|
|
const bmk1822_tdma_tg2l_tensor_copy_param_t *p);
|
|
|
|
bmk1822_op_t * bmk1822_tdma_g2l_tensor_copy(
|
|
bmk1822_context_t *ctx,
|
|
const bmk1822_tdma_tg2l_tensor_copy_param_t *p);
|
|
|
|
bmk1822_op_t * bmk1822_tdma_g2l_bf16_tensor_copy(
|
|
bmk1822_context_t *ctx,
|
|
const bmk1822_tdma_tg2l_tensor_copy_param_t *p);
|
|
|
|
typedef struct {
|
|
const bmk1822_tensor_tgmem_t *src;
|
|
const bmk1822_tensor_lmem_t *dst;
|
|
} bmk1822_tdma_tg2l_tensor_copy_nc_transposed_param_t;
|
|
|
|
bmk1822_op_t * bmk1822_tdma_t2l_tensor_copy_nc_transposed(
|
|
bmk1822_context_t *ctx,
|
|
const bmk1822_tdma_tg2l_tensor_copy_nc_transposed_param_t *p);
|
|
|
|
bmk1822_op_t * bmk1822_tdma_g2l_tensor_copy_nc_transposed(
|
|
bmk1822_context_t *ctx,
|
|
const bmk1822_tdma_tg2l_tensor_copy_nc_transposed_param_t *p);
|
|
|
|
bmk1822_op_t * bmk1822_tdma_g2l_bf16_tensor_copy_nc_transposed(
|
|
bmk1822_context_t *ctx,
|
|
const bmk1822_tdma_tg2l_tensor_copy_nc_transposed_param_t *p);
|
|
|
|
typedef struct {
|
|
const bmk1822_tensor_tgmem_t *src;
|
|
const bmk1822_tensor_lmem_t *dst;
|
|
} bmk1822_tdma_tg2l_tensor_copy_chw_rotated_param_t;
|
|
|
|
bmk1822_op_t * bmk1822_tdma_t2l_tensor_copy_chw_rotated(
|
|
bmk1822_context_t *ctx,
|
|
const bmk1822_tdma_tg2l_tensor_copy_chw_rotated_param_t *p);
|
|
|
|
bmk1822_op_t * bmk1822_tdma_g2l_tensor_copy_chw_rotated(
|
|
bmk1822_context_t *ctx,
|
|
const bmk1822_tdma_tg2l_tensor_copy_chw_rotated_param_t *p);
|
|
|
|
typedef struct {
|
|
const bmk1822_compressed_tensor_tgmem_t *src;
|
|
const bmk1822_tensor_lmem_t *dst;
|
|
uint32_t intra_cmd_paral; // [0]: disable
|
|
// [1]: enable TDMA/TIU intra-command parallelism
|
|
} bmk1822_tdma_tg2l_tensor_copy_decompressed_param_t;
|
|
|
|
bmk1822_op_t * bmk1822_tdma_t2l_tensor_copy_decompressed(
|
|
bmk1822_context_t *ctx,
|
|
const bmk1822_tdma_tg2l_tensor_copy_decompressed_param_t *p);
|
|
|
|
bmk1822_op_t * bmk1822_tdma_g2l_tensor_copy_decompressed(
|
|
bmk1822_context_t *ctx,
|
|
const bmk1822_tdma_tg2l_tensor_copy_decompressed_param_t *p);
|
|
|
|
typedef struct {
|
|
uint16_t constant;
|
|
const bmk1822_tensor_lmem_t *dst;
|
|
} bmk1822_tdma_tg2l_tensor_fill_constant_param_t;
|
|
|
|
bmk1822_op_t * bmk1822_tdma_tg2l_tensor_fill_constant(
|
|
bmk1822_context_t *ctx,
|
|
const bmk1822_tdma_tg2l_tensor_fill_constant_param_t *p);
|
|
|
|
bmk1822_op_t * bmk1822_tdma_tg2l_bf16_tensor_fill_constant(
|
|
bmk1822_context_t *ctx,
|
|
const bmk1822_tdma_tg2l_tensor_fill_constant_param_t *p);
|
|
|
|
typedef struct {
|
|
const bmk1822_compressed_matrix_tgmem_t *src;
|
|
const bmk1822_matrix_lmem_t *dst;
|
|
} bmk1822_tdma_tg2l_matrix_copy_decompressed_param_t;
|
|
|
|
bmk1822_op_t * bmk1822_tdma_g2l_matrix_copy_decompressed(
|
|
bmk1822_context_t *ctx,
|
|
const bmk1822_tdma_tg2l_matrix_copy_decompressed_param_t *p);
|
|
|
|
typedef struct {
|
|
const bmk1822_matrix_lmem_t *src;
|
|
const bmk1822_compressed_matrix_tgmem_t *dst;
|
|
} bmk1822_tdma_l2tg_matrix_copy_compressed_param_t;
|
|
|
|
bmk1822_op_t * bmk1822_tdma_l2g_matrix_copy_compressed(
|
|
bmk1822_context_t *ctx,
|
|
const bmk1822_tdma_l2tg_matrix_copy_compressed_param_t *p);
|
|
|
|
typedef struct {
|
|
const bmk1822_matrix_tgmem_t *src;
|
|
const bmk1822_matrix_lmem_t *dst;
|
|
} bmk1822_tdma_tg2l_matrix_copy_param_t;
|
|
|
|
bmk1822_op_t * bmk1822_tdma_t2l_matrix_copy(
|
|
bmk1822_context_t *ctx,
|
|
const bmk1822_tdma_tg2l_matrix_copy_param_t *p);
|
|
|
|
bmk1822_op_t * bmk1822_tdma_g2l_matrix_copy(
|
|
bmk1822_context_t *ctx,
|
|
const bmk1822_tdma_tg2l_matrix_copy_param_t *p);
|
|
|
|
bmk1822_op_t * bmk1822_tdma_g2l_bf16_matrix_copy(
|
|
bmk1822_context_t *ctx,
|
|
const bmk1822_tdma_tg2l_matrix_copy_param_t *p);
|
|
|
|
typedef struct {
|
|
const bmk1822_matrix_tgmem_t *src;
|
|
const bmk1822_matrix_lmem_t *dst;
|
|
} bmk1822_tdma_tg2l_matrix_copy_row_col_transposed_param_t;
|
|
|
|
bmk1822_op_t * bmk1822_tdma_t2l_matrix_copy_row_col_transposed(
|
|
bmk1822_context_t *ctx,
|
|
const bmk1822_tdma_tg2l_matrix_copy_row_col_transposed_param_t *p);
|
|
|
|
bmk1822_op_t * bmk1822_tdma_g2l_matrix_copy_row_col_transposed(
|
|
bmk1822_context_t *ctx,
|
|
const bmk1822_tdma_tg2l_matrix_copy_row_col_transposed_param_t *p);
|
|
|
|
typedef struct {
|
|
uint8_t src_base_reg_index;
|
|
uint64_t src_address;
|
|
uint32_t dst_address;
|
|
uint32_t bytes;
|
|
} bmk1822_tdma_tg2l_general_copy_param_t;
|
|
|
|
typedef struct {
|
|
uint8_t src_base_reg_index;
|
|
uint64_t src_address;
|
|
uint32_t dst_address;
|
|
uint32_t src_bytes;
|
|
fmt_t src_fmt;
|
|
fmt_t dst_fmt;
|
|
} bmk1822_tdma_tg2l_bf16_general_copy_param_t;
|
|
|
|
bmk1822_op_t * bmk1822_tdma_t2l_general_copy(
|
|
bmk1822_context_t *ctx,
|
|
const bmk1822_tdma_tg2l_general_copy_param_t *p);
|
|
|
|
bmk1822_op_t * bmk1822_tdma_g2l_general_copy(
|
|
bmk1822_context_t *ctx,
|
|
const bmk1822_tdma_tg2l_general_copy_param_t *p);
|
|
|
|
bmk1822_op_t * bmk1822_tdma_g2l_bf16_general_copy(
|
|
bmk1822_context_t *ctx,
|
|
const bmk1822_tdma_tg2l_bf16_general_copy_param_t *p);
|
|
|
|
/*
|
|
* TDMA Engine APIs: GEM to GEM (TG2TG)
|
|
*
|
|
* Note:
|
|
* TDMA does not support GEM to GEM.
|
|
* Implement with TG2L and L2TG.
|
|
*/
|
|
typedef struct {
|
|
const bmk1822_tensor_tgmem_t *src;
|
|
const bmk1822_tensor_tgmem_t *dst;
|
|
} bmk1822_tdma_tg2tg_tensor_copy_param_t;
|
|
|
|
bmk1822_op_t * bmk1822_tdma_tg2tg_general_copy(
|
|
bmk1822_context_t *ctx,
|
|
const bmk1822_tdma_tg2tg_tensor_copy_param_t *p);
|
|
|
|
bmk1822_op_t * bmk1822_tdma_tg2tg_bf16_general_copy(
|
|
bmk1822_context_t *ctx,
|
|
const bmk1822_tdma_tg2tg_tensor_copy_param_t *p);
|
|
|
|
bmk1822_op_t * bmk1822_tdma_tg2tg_tensor_copy(
|
|
bmk1822_context_t *ctx,
|
|
const bmk1822_tdma_tg2tg_tensor_copy_param_t *p);
|
|
|
|
bmk1822_op_t * bmk1822_tdma_tg2tg_bf16_tensor_copy(
|
|
bmk1822_context_t *ctx,
|
|
const bmk1822_tdma_tg2tg_tensor_copy_param_t *p);
|
|
|
|
|
|
/*
|
|
* TIU Engine APIs
|
|
*
|
|
* General rules for tensor arithmetic APIs:
|
|
*
|
|
* 1, All tensors can be either signed or unsigned
|
|
* if not mentioned otherwise.
|
|
* 2, A tensor @x with both @x_high and @x_low as
|
|
* parameters can optionally be 8-bit (when @x_high
|
|
* is NULL) or 16-bit (otherwise).
|
|
*/
|
|
|
|
typedef struct {
|
|
const bmk1822_tensor_lmem_t *res_high;
|
|
const bmk1822_tensor_lmem_t *res_low;
|
|
const bmk1822_tensor_lmem_t *a;
|
|
int b_is_const;
|
|
union {
|
|
const bmk1822_tensor_lmem_t *b;
|
|
struct {
|
|
int16_t val;
|
|
int is_signed;
|
|
} b_const;
|
|
};
|
|
uint8_t rshift_bits;
|
|
int relu_enable;
|
|
uint16_t layer_id;
|
|
} bmk1822_tiu_element_wise_mul_param_t;
|
|
|
|
bmk1822_op_t * bmk1822_tiu_element_wise_mul(
|
|
bmk1822_context_t *ctx,
|
|
const bmk1822_tiu_element_wise_mul_param_t *p);
|
|
|
|
// Multiplier in quantization down
|
|
typedef struct {
|
|
const bmk1822_tensor_lmem_t *res_high;
|
|
const bmk1822_tensor_lmem_t *res_low;
|
|
const bmk1822_tensor_lmem_t *a;
|
|
int b_is_const;
|
|
union {
|
|
const bmk1822_tensor_lmem_t *b;
|
|
struct {
|
|
int8_t val;
|
|
int is_signed;
|
|
} b_const;
|
|
};
|
|
uint8_t rshift_bits;
|
|
int relu_enable;
|
|
uint32_t multiplier;
|
|
uint16_t layer_id;
|
|
} bmk1822_tiu_element_wise_mul_qdm_param_t;
|
|
|
|
bmk1822_op_t * bmk1822_tiu_element_wise_mul_qdm(
|
|
bmk1822_context_t *ctx,
|
|
const bmk1822_tiu_element_wise_mul_qdm_param_t *p);
|
|
|
|
/*
|
|
* @res = @a * @b + @res
|
|
*
|
|
* 1, @res_high must not be NULL since input @res must be 16-bit.
|
|
* 2, If output @res is 8-bit (@res_is_int8 == 1), only @res_low
|
|
* is used as output tensor.
|
|
*/
|
|
typedef struct {
|
|
const bmk1822_tensor_lmem_t *res_high;
|
|
const bmk1822_tensor_lmem_t *res_low;
|
|
const bmk1822_tensor_lmem_t *a;
|
|
int b_is_const;
|
|
union {
|
|
const bmk1822_tensor_lmem_t *b;
|
|
struct {
|
|
int16_t val;
|
|
int is_signed;
|
|
} b_const;
|
|
};
|
|
int res_is_int8;
|
|
int relu_enable;
|
|
uint8_t lshift_bits;
|
|
uint8_t rshift_bits;
|
|
uint16_t layer_id;
|
|
} bmk1822_tiu_element_wise_mac_param_t;
|
|
|
|
bmk1822_op_t * bmk1822_tiu_element_wise_mac(
|
|
bmk1822_context_t *ctx,
|
|
const bmk1822_tiu_element_wise_mac_param_t *p);
|
|
|
|
/*
|
|
* @a and @b must all be 16-bit.
|
|
*/
|
|
typedef struct {
|
|
const bmk1822_tensor_lmem_t *res_high;
|
|
const bmk1822_tensor_lmem_t *res_low;
|
|
const bmk1822_tensor_lmem_t *a_high;
|
|
const bmk1822_tensor_lmem_t *a_low;
|
|
int b_is_const;
|
|
union {
|
|
struct {
|
|
const bmk1822_tensor_lmem_t *b_high;
|
|
const bmk1822_tensor_lmem_t *b_low;
|
|
};
|
|
struct {
|
|
int16_t val;
|
|
int is_signed;
|
|
} b_const;
|
|
};
|
|
uint8_t rshift_bits;
|
|
int relu_enable;
|
|
uint16_t layer_id;
|
|
} bmk1822_tiu_element_wise_add_param_t;
|
|
|
|
bmk1822_op_t * bmk1822_tiu_element_wise_add(
|
|
bmk1822_context_t *ctx,
|
|
const bmk1822_tiu_element_wise_add_param_t *p);
|
|
|
|
/*
|
|
* 1, @a and @b must all be 16-bit.
|
|
* 2, @res must be signed.
|
|
*/
|
|
typedef struct {
|
|
const bmk1822_tensor_lmem_t *res_high;
|
|
const bmk1822_tensor_lmem_t *res_low;
|
|
const bmk1822_tensor_lmem_t *a_high;
|
|
const bmk1822_tensor_lmem_t *a_low;
|
|
const bmk1822_tensor_lmem_t *b_high;
|
|
const bmk1822_tensor_lmem_t *b_low;
|
|
uint8_t rshift_bits;
|
|
uint16_t layer_id;
|
|
} bmk1822_tiu_element_wise_sub_param_t;
|
|
|
|
bmk1822_op_t * bmk1822_tiu_element_wise_sub(
|
|
bmk1822_context_t *ctx,
|
|
const bmk1822_tiu_element_wise_sub_param_t *p);
|
|
|
|
/*
|
|
* @a and @b must both be signed or unsigned.
|
|
*/
|
|
typedef struct {
|
|
const bmk1822_tensor_lmem_t *max;
|
|
const bmk1822_tensor_lmem_t *a;
|
|
int b_is_const;
|
|
union {
|
|
const bmk1822_tensor_lmem_t *b;
|
|
struct {
|
|
int16_t val;
|
|
int is_signed;
|
|
} b_const;
|
|
};
|
|
uint16_t layer_id;
|
|
} bmk1822_tiu_element_wise_max_param_t;
|
|
|
|
bmk1822_op_t * bmk1822_tiu_element_wise_max(
|
|
bmk1822_context_t *ctx,
|
|
const bmk1822_tiu_element_wise_max_param_t *p);
|
|
|
|
/*
|
|
* @a and @b must both be signed or unsigned.
|
|
*/
|
|
typedef struct {
|
|
const bmk1822_tensor_lmem_t *min;
|
|
const bmk1822_tensor_lmem_t *a;
|
|
int b_is_const;
|
|
union {
|
|
const bmk1822_tensor_lmem_t *b;
|
|
struct {
|
|
int16_t val;
|
|
int is_signed;
|
|
} b_const;
|
|
};
|
|
uint16_t layer_id;
|
|
} bmk1822_tiu_element_wise_min_param_t;
|
|
|
|
bmk1822_op_t * bmk1822_tiu_element_wise_min(
|
|
bmk1822_context_t *ctx,
|
|
const bmk1822_tiu_element_wise_min_param_t *p);
|
|
|
|
/*
|
|
* @a and @b must both be signed or unsigned.
|
|
*/
|
|
typedef struct {
|
|
const bmk1822_tensor_lmem_t *ge;
|
|
const bmk1822_tensor_lmem_t *a;
|
|
int b_is_const;
|
|
union {
|
|
const bmk1822_tensor_lmem_t *b;
|
|
struct {
|
|
int16_t val;
|
|
int is_signed;
|
|
} b_const;
|
|
};
|
|
uint16_t layer_id;
|
|
} bmk1822_tiu_element_wise_ge_param_t;
|
|
|
|
bmk1822_op_t * bmk1822_tiu_element_wise_ge(
|
|
bmk1822_context_t *ctx,
|
|
const bmk1822_tiu_element_wise_ge_param_t *p);
|
|
|
|
bmk1822_op_t * bmk1822_tiu_bf16_element_wise_ge(
|
|
bmk1822_context_t *ctx,
|
|
const bmk1822_tiu_element_wise_ge_param_t *p);
|
|
|
|
/*
|
|
* 1, @a must be 16-bit and signed.
|
|
* 2, @res must be 16-bit.
|
|
* 3, @bits must be signed and must range in [-16, 16].
|
|
*/
|
|
typedef struct {
|
|
const bmk1822_tensor_lmem_t *res_high;
|
|
const bmk1822_tensor_lmem_t *res_low;
|
|
const bmk1822_tensor_lmem_t *a_high;
|
|
const bmk1822_tensor_lmem_t *a_low;
|
|
const bmk1822_tensor_lmem_t *bits;
|
|
uint16_t layer_id;
|
|
} bmk1822_tiu_element_wise_arith_shift_param_t;
|
|
|
|
bmk1822_op_t * bmk1822_tiu_element_wise_arith_shift(
|
|
bmk1822_context_t *ctx,
|
|
const bmk1822_tiu_element_wise_arith_shift_param_t *p);
|
|
|
|
/*
|
|
* TODO: bmk1822_tl_logic_shift()
|
|
*/
|
|
|
|
typedef struct {
|
|
const bmk1822_tensor_lmem_t *res;
|
|
const bmk1822_tensor_lmem_t *a;
|
|
const bmk1822_tensor_lmem_t *b;
|
|
uint16_t layer_id;
|
|
} bmk1822_tiu_element_wise_and_int8_param_t;
|
|
|
|
bmk1822_op_t * bmk1822_tiu_element_wise_and_int8(
|
|
bmk1822_context_t *ctx,
|
|
const bmk1822_tiu_element_wise_and_int8_param_t *p);
|
|
|
|
/*
|
|
* All parameters must be 16-bit.
|
|
*/
|
|
typedef struct {
|
|
const bmk1822_tensor_lmem_t *res_high;
|
|
const bmk1822_tensor_lmem_t *res_low;
|
|
const bmk1822_tensor_lmem_t *a_high;
|
|
const bmk1822_tensor_lmem_t *a_low;
|
|
const bmk1822_tensor_lmem_t *b_high;
|
|
const bmk1822_tensor_lmem_t *b_low;
|
|
} bmk1822_tiu_element_wise_and_int16_param_t;
|
|
|
|
bmk1822_op_t * bmk1822_tiu_element_wise_and_int16(
|
|
bmk1822_context_t *ctx,
|
|
const bmk1822_tiu_element_wise_and_int16_param_t *p);
|
|
|
|
typedef struct {
|
|
const bmk1822_tensor_lmem_t *res;
|
|
const bmk1822_tensor_lmem_t *a;
|
|
const bmk1822_tensor_lmem_t *b;
|
|
uint16_t layer_id;
|
|
} bmk1822_tiu_element_wise_or_int8_param_t;
|
|
|
|
bmk1822_op_t * bmk1822_tiu_element_wise_or_int8(
|
|
bmk1822_context_t *ctx,
|
|
const bmk1822_tiu_element_wise_or_int8_param_t *p);
|
|
|
|
/*
|
|
* All parameters must be 16-bit.
|
|
*/
|
|
typedef struct {
|
|
const bmk1822_tensor_lmem_t *res_high;
|
|
const bmk1822_tensor_lmem_t *res_low;
|
|
const bmk1822_tensor_lmem_t *a_high;
|
|
const bmk1822_tensor_lmem_t *a_low;
|
|
const bmk1822_tensor_lmem_t *b_high;
|
|
const bmk1822_tensor_lmem_t *b_low;
|
|
} bmk1822_tiu_element_wise_or_int16_param_t;
|
|
|
|
bmk1822_op_t * bmk1822_tiu_element_wise_or_int16(
|
|
bmk1822_context_t *ctx,
|
|
const bmk1822_tiu_element_wise_or_int16_param_t *p);
|
|
|
|
typedef struct {
|
|
const bmk1822_tensor_lmem_t *res;
|
|
const bmk1822_tensor_lmem_t *a;
|
|
const bmk1822_tensor_lmem_t *b;
|
|
uint16_t layer_id;
|
|
} bmk1822_tiu_element_wise_xor_int8_param_t;
|
|
|
|
bmk1822_op_t * bmk1822_tiu_element_wise_xor_int8(
|
|
bmk1822_context_t *ctx,
|
|
const bmk1822_tiu_element_wise_xor_int8_param_t *p);
|
|
|
|
/*
|
|
* All parameters must be 16-bit.
|
|
*/
|
|
typedef struct {
|
|
const bmk1822_tensor_lmem_t *res_high;
|
|
const bmk1822_tensor_lmem_t *res_low;
|
|
const bmk1822_tensor_lmem_t *a_high;
|
|
const bmk1822_tensor_lmem_t *a_low;
|
|
const bmk1822_tensor_lmem_t *b_high;
|
|
const bmk1822_tensor_lmem_t *b_low;
|
|
} bmk1822_tiu_element_wise_xor_int16_param_t;
|
|
|
|
bmk1822_op_t * bmk1822_tiu_element_wise_xor_int16(
|
|
bmk1822_context_t *ctx,
|
|
const bmk1822_tiu_element_wise_xor_int16_param_t *p);
|
|
|
|
typedef struct {
|
|
const bmk1822_tensor_lmem_t *src;
|
|
const bmk1822_tensor_lmem_t *dst;
|
|
uint16_t layer_id;
|
|
} bmk1822_tiu_element_wise_copy_param_t;
|
|
|
|
bmk1822_op_t * bmk1822_tiu_element_wise_copy(
|
|
bmk1822_context_t *ctx,
|
|
const bmk1822_tiu_element_wise_copy_param_t *p);
|
|
|
|
/*
|
|
* @res and @a must be both signed or unsigned.
|
|
* TODO: explain the shape and layout of @res when it's 16-bit
|
|
*/
|
|
typedef struct {
|
|
int res_is_int8;
|
|
const bmk1822_tensor_lmem_t *res;
|
|
const bmk1822_tensor_lmem_t *input;
|
|
uint8_t rshift_bits;
|
|
uint16_t layer_id;
|
|
} bmk1822_tiu_mdsum_param_t;
|
|
|
|
bmk1822_op_t * bmk1822_tiu_mdsum(
|
|
bmk1822_context_t *ctx,
|
|
const bmk1822_tiu_mdsum_param_t *p);
|
|
|
|
/*
|
|
* NOTE:
|
|
* @table is treated logically as a linear list of
|
|
* length @table_n, where @table_n is a multiple of
|
|
* 16 and is smaller than or equal to 256.
|
|
* When stored in local memory, @table is a tensor
|
|
* of shape (1, npu_num, 1, @table_n), that is, the
|
|
* data of the linear list should be copied across
|
|
* each NPU's local memory by user. The behavior when
|
|
* these copies differ is undefined.
|
|
*/
|
|
typedef struct {
|
|
const bmk1822_tensor_lmem_t *ofmap;
|
|
const bmk1822_tensor_lmem_t *ifmap;
|
|
const bmk1822_tensor_lmem_t *table;
|
|
uint16_t layer_id;
|
|
} bmk1822_tiu_lookup_table_param_t;
|
|
|
|
bmk1822_op_t * bmk1822_tiu_lookup_table(
|
|
bmk1822_context_t *ctx,
|
|
const bmk1822_tiu_lookup_table_param_t *p);
|
|
|
|
/*
|
|
* Convolution weight shape:
|
|
* Calibration output (oc, ic, kh, kw)
|
|
* bm_build transforms (oc, ic, kh, kw) -> (1, oc, kh*kw, ic)
|
|
* TDMA load global (1, oc, kh*w, ic) -> local (1, oc, kh*kw, ic)
|
|
* TIU conv opd1 (ic, oc, kh, kw)
|
|
*/
|
|
typedef struct {
|
|
const bmk1822_tensor_lmem_t *ofmap;
|
|
const bmk1822_tensor_lmem_t *ifmap;
|
|
const bmk1822_tensor_lmem_t *weight;
|
|
const bmk1822_tensor_lmem_t *bias;
|
|
uint8_t ins_h, ins_last_h;
|
|
uint8_t ins_w, ins_last_w;
|
|
uint8_t pad_top, pad_bottom;
|
|
uint8_t pad_left, pad_right;
|
|
uint8_t stride_h, stride_w;
|
|
uint8_t dilation_h, dilation_w;
|
|
int relu_enable;
|
|
uint8_t rshift_bits;
|
|
uint8_t ps32_mode;
|
|
uint8_t w_is_const;
|
|
uint16_t layer_id;
|
|
uint8_t fp_round_typ;
|
|
uint8_t cmd_pre_exe_typ; // tiu execute cmd when channel data is ready
|
|
// wait type:
|
|
// 0: activation
|
|
// 1: weight
|
|
uint8_t cmd_pre_exe; // tiu execute cmd when channel data is ready
|
|
// 0: disable
|
|
// 1: load pre exec
|
|
// 2: store pre exec
|
|
// 3: load and store pre exec
|
|
int8_t ins_val; // padding value for int8
|
|
uint16_t ins_fp; // padding value for bf16
|
|
} bmk1822_tiu_convolution_param_t;
|
|
|
|
bmk1822_op_t * bmk1822_tiu_convolution(
|
|
bmk1822_context_t *ctx,
|
|
const bmk1822_tiu_convolution_param_t *p);
|
|
|
|
typedef struct {
|
|
const bmk1822_tensor_lmem_t *ofmap;
|
|
const bmk1822_tensor_lmem_t *ifmap;
|
|
const bmk1822_tensor_lmem_t *weight;
|
|
const bmk1822_tensor_lmem_t *chl_quan_param;
|
|
uint8_t ins_h, ins_last_h;
|
|
uint8_t ins_w, ins_last_w;
|
|
uint8_t pad_top, pad_bottom;
|
|
uint8_t pad_left, pad_right;
|
|
uint8_t stride_h, stride_w;
|
|
uint8_t dilation_h, dilation_w;
|
|
uint8_t has_bias;
|
|
uint8_t relu_enable;
|
|
uint8_t ps32_mode;
|
|
uint8_t w_is_const;
|
|
uint16_t layer_id;
|
|
uint8_t cmd_pre_exe_typ; // tiu execute cmd when channel data is ready
|
|
// wait type:
|
|
// 0: activation
|
|
// 1: weight
|
|
uint8_t cmd_pre_exe; // tiu execute cmd when channel data is ready
|
|
// 0: disable
|
|
// 1: load pre exec
|
|
// 2: store pre exec
|
|
// 3: load and store pre exec
|
|
int8_t ins_val; // padding value for int8
|
|
uint16_t ins_fp; // padding value for bf16
|
|
} bmk1822_tiu_convolution_qdm_param_t;
|
|
|
|
bmk1822_op_t * bmk1822_tiu_convolution_qdm(
|
|
bmk1822_context_t *ctx,
|
|
const bmk1822_tiu_convolution_qdm_param_t *p);
|
|
|
|
typedef struct {
|
|
const bmk1822_tensor_lmem_t *ofmap;
|
|
const bmk1822_tensor_lmem_t *ifmap;
|
|
uint16_t kh, kw;
|
|
uint8_t pad_top, pad_bottom;
|
|
uint8_t pad_left, pad_right;
|
|
uint8_t stride_h, stride_w;
|
|
int8_t ins_val; // padding value for int8
|
|
uint16_t ins_fp; // padding value for bf16
|
|
uint16_t layer_id;
|
|
} bmk1822_tiu_max_pooling_param_t;
|
|
|
|
bmk1822_op_t * bmk1822_tiu_max_pooling(
|
|
bmk1822_context_t *ctx,
|
|
const bmk1822_tiu_max_pooling_param_t *p);
|
|
|
|
typedef struct {
|
|
const bmk1822_tensor_lmem_t *ofmap;
|
|
const bmk1822_tensor_lmem_t *ifmap;
|
|
uint16_t kh, kw;
|
|
uint8_t pad_top, pad_bottom;
|
|
uint8_t pad_left, pad_right;
|
|
uint8_t stride_h, stride_w;
|
|
uint16_t ins_fp;
|
|
uint16_t layer_id;
|
|
} bmk1822_tiu_min_pooling_param_t;
|
|
|
|
bmk1822_op_t * bmk1822_tiu_min_pooling(
|
|
bmk1822_context_t *ctx,
|
|
const bmk1822_tiu_min_pooling_param_t *p);
|
|
|
|
bmk1822_op_t * bmk1822_tiu_bf16_min_pooling(
|
|
bmk1822_context_t *ctx,
|
|
const bmk1822_tiu_min_pooling_param_t *p);
|
|
|
|
typedef struct {
|
|
const bmk1822_tensor_lmem_t *ofmap;
|
|
const bmk1822_tensor_lmem_t *ifmap;
|
|
uint16_t kh, kw;
|
|
uint8_t ins_h, ins_last_h;
|
|
uint8_t ins_w, ins_last_w;
|
|
uint8_t pad_top, pad_bottom;
|
|
uint8_t pad_left, pad_right;
|
|
uint8_t stride_h, stride_w;
|
|
uint16_t avg_pooling_const;
|
|
uint8_t rshift_bits;
|
|
uint16_t layer_id;
|
|
int8_t ins_val; // padding value for int8
|
|
uint16_t ins_fp; // padding value for bf16
|
|
} bmk1822_tiu_average_pooling_param_t;
|
|
|
|
bmk1822_op_t * bmk1822_tiu_average_pooling(
|
|
bmk1822_context_t *ctx,
|
|
const bmk1822_tiu_average_pooling_param_t *p);
|
|
|
|
typedef struct {
|
|
const bmk1822_tensor_lmem_t *ofmap;
|
|
const bmk1822_tensor_lmem_t *ifmap;
|
|
const bmk1822_tensor_lmem_t *weight;
|
|
const bmk1822_tensor_lmem_t *bias;
|
|
int weight_is_const;
|
|
struct {
|
|
int16_t val;
|
|
int is_signed;
|
|
} weight_const;
|
|
uint8_t ins_h, ins_last_h;
|
|
uint8_t ins_w, ins_last_w;
|
|
uint8_t dilation_h, dilation_w;
|
|
uint8_t pad_top, pad_bottom;
|
|
uint8_t pad_left, pad_right;
|
|
uint8_t stride_h, stride_w;
|
|
uint8_t rshift_bits;
|
|
int relu_enable;
|
|
uint16_t layer_id;
|
|
uint8_t cmd_pre_exe_typ; // tiu execute cmd when channel data is ready
|
|
// wait type:
|
|
// 0: activation
|
|
// 1: weight
|
|
uint8_t cmd_pre_exe; // tiu execute cmd when channel data is ready
|
|
// 0: disable
|
|
// 1: load pre exec
|
|
// 2: store pre exec
|
|
// 3: load and store pre exec
|
|
int8_t ins_val; // padding value for int8
|
|
uint16_t ins_fp; // padding value for bf16
|
|
} bmk1822_tiu_depthwise_convolution_param_t;
|
|
|
|
bmk1822_op_t * bmk1822_tiu_depthwise_convolution(
|
|
bmk1822_context_t *ctx,
|
|
const bmk1822_tiu_depthwise_convolution_param_t *p);
|
|
|
|
typedef struct {
|
|
const bmk1822_tensor_lmem_t *ofmap;
|
|
const bmk1822_tensor_lmem_t *ifmap;
|
|
const bmk1822_tensor_lmem_t *weight;
|
|
const bmk1822_tensor_lmem_t *chl_quan_param;
|
|
int weight_is_const;
|
|
struct {
|
|
int16_t val;
|
|
int is_signed;
|
|
} weight_const;
|
|
uint8_t ins_h, ins_last_h;
|
|
uint8_t ins_w, ins_last_w;
|
|
uint8_t dilation_h, dilation_w;
|
|
uint8_t pad_top, pad_bottom;
|
|
uint8_t pad_left, pad_right;
|
|
uint8_t stride_h, stride_w;
|
|
uint8_t has_bias;
|
|
uint8_t relu_enable;
|
|
uint16_t layer_id;
|
|
uint8_t cmd_pre_exe_typ; // tiu execute cmd when channel data is ready
|
|
// wait type:
|
|
// 0: activation
|
|
// 1: weight
|
|
uint8_t cmd_pre_exe; // tiu execute cmd when channel data is ready
|
|
// 0: disable
|
|
// 1: load pre exec
|
|
// 2: store pre exec
|
|
// 3: load and store pre exec
|
|
int8_t ins_val; // padding value for int8
|
|
uint16_t ins_fp; // padding value for bf16
|
|
} bmk1822_tiu_depthwise_convolution_qdm_param_t;
|
|
|
|
bmk1822_op_t * bmk1822_tiu_depthwise_convolution_qdm(
|
|
bmk1822_context_t *ctx,
|
|
const bmk1822_tiu_depthwise_convolution_qdm_param_t *p);
|
|
|
|
typedef struct {
|
|
const bmk1822_matrix_lmem_t *res;
|
|
const bmk1822_matrix_lmem_t *left;
|
|
const bmk1822_matrix_lmem_t *right;
|
|
const bmk1822_matrix_lmem_t *bias;
|
|
uint8_t lshift_bits;
|
|
uint8_t rshift_bits;
|
|
int res_is_int8;
|
|
int relu_enable;
|
|
int add_result;
|
|
uint8_t ps32_mode;
|
|
uint16_t layer_id;
|
|
} bmk1822_tiu_matrix_multiplication_param_t;
|
|
|
|
bmk1822_op_t * bmk1822_tiu_matrix_multiplication(
|
|
bmk1822_context_t *ctx,
|
|
const bmk1822_tiu_matrix_multiplication_param_t *p);
|
|
|
|
typedef struct {
|
|
const bmk1822_matrix_lmem_t *res;
|
|
const bmk1822_matrix_lmem_t *left;
|
|
const bmk1822_matrix_lmem_t *right;
|
|
const bmk1822_matrix_lmem_t *bias;
|
|
uint8_t lshift_bits;
|
|
uint8_t rshift_bits;
|
|
int res_is_int8;
|
|
int relu_enable;
|
|
int add_result;
|
|
uint8_t ps32_mode;
|
|
int32_t quan_m;
|
|
uint16_t layer_id;
|
|
} bmk1822_tiu_matrix_multiplication_qdm_param_t;
|
|
|
|
bmk1822_op_t * bmk1822_tiu_matrix_multiplication_qdm(
|
|
bmk1822_context_t *ctx,
|
|
const bmk1822_tiu_matrix_multiplication_qdm_param_t *p);
|
|
|
|
/*
|
|
* Helpers
|
|
*/
|
|
|
|
bmk1822_tensor_lmem_stride_t bmk1822_tensor_lmem_default_stride(
|
|
bmk1822_context_t *ctx,
|
|
bmk1822_tensor_lmem_shape_t s,
|
|
fmt_t fmt_type,
|
|
int eu_align);
|
|
|
|
bmk1822_tensor_tgmem_stride_t bmk1822_tensor_tgmem_default_stride(
|
|
bmk1822_tensor_tgmem_shape_t s,
|
|
fmt_t fmt_type);
|
|
|
|
bmk1822_matrix_lmem_shape_t bmk1822_matrix_lmem_default_shape(
|
|
bmk1822_context_t *ctx,
|
|
uint32_t row,
|
|
uint32_t col,
|
|
fmt_t fmt_type);
|
|
|
|
bmk1822_matrix_lmem_shape_t bmk1822_matrix_lmem_shape_t1(
|
|
bmk1822_context_t *ctx,
|
|
uint32_t len,
|
|
fmt_t fmt_type);
|
|
|
|
bmk1822_matrix_lmem_stride_t bmk1822_matrix_lmem_default_stride(
|
|
bmk1822_context_t *ctx,
|
|
bmk1822_matrix_lmem_shape_t s,
|
|
fmt_t fmt,
|
|
int eu_align);
|
|
|
|
bmk1822_tensor_lmem_t * bmk1822_lmem_alloc_tensor(
|
|
bmk1822_context_t *ctx,
|
|
bmk1822_tensor_lmem_shape_t s,
|
|
fmt_t fmt,
|
|
int eu_align);
|
|
|
|
void bmk1822_lmem_init_tensor(
|
|
bmk1822_context_t *ctx,
|
|
bmk1822_tensor_lmem_t *tl,
|
|
bmk1822_tensor_lmem_shape_t shape,
|
|
fmt_t fmt,
|
|
int eu_align);
|
|
|
|
bmk1822_tensor_lmem_t * bmk1822_lmem_alloc_ps32_tensor(
|
|
bmk1822_context_t *ctx,
|
|
bmk1822_tensor_lmem_shape_t s,
|
|
fmt_t fmt,
|
|
int eu_align);
|
|
|
|
void bmk1822_lmem_free_tensor(
|
|
bmk1822_context_t *ctx,
|
|
const bmk1822_tensor_lmem_t *t);
|
|
|
|
bmk1822_matrix_lmem_t * bmk1822_lmem_alloc_matrix(
|
|
bmk1822_context_t *ctx,
|
|
bmk1822_matrix_lmem_shape_t s,
|
|
fmt_t fmt,
|
|
int eu_align);
|
|
|
|
void bmk1822_lmem_init_matrix(
|
|
bmk1822_context_t *ctx,
|
|
bmk1822_matrix_lmem_t *ml,
|
|
bmk1822_matrix_lmem_shape_t shape,
|
|
fmt_t fmt,
|
|
int eu_align);
|
|
|
|
bmk1822_matrix_lmem_t * bmk1822_lmem_alloc_ps32_matrix(
|
|
bmk1822_context_t *ctx,
|
|
bmk1822_matrix_lmem_shape_t s,
|
|
fmt_t fmt,
|
|
int eu_align);
|
|
|
|
void bmk1822_lmem_free_matrix(
|
|
bmk1822_context_t *ctx,
|
|
const bmk1822_matrix_lmem_t *t);
|
|
|
|
uint32_t bmk1822_lmem_tensor_to_size(
|
|
bmk1822_context_t *ctx,
|
|
bmk1822_tensor_lmem_shape_t s,
|
|
fmt_t fmt,
|
|
int eu_align);
|
|
|
|
uint32_t bmk1822_lmem_matrix_to_size(
|
|
bmk1822_context_t *ctx,
|
|
bmk1822_matrix_lmem_shape_t s,
|
|
fmt_t fmt,
|
|
int eu_align);
|
|
|
|
uint32_t bmk1822_lmem_ps32_matrix_to_size(
|
|
bmk1822_context_t *ctx,
|
|
bmk1822_matrix_lmem_shape_t s,
|
|
fmt_t fmt,
|
|
int eu_align);
|
|
|
|
#ifdef __cplusplus
|
|
}
|
|
#endif
|
|
|
|
#endif /* __BMKERNEL_1822_H__ */
|