commit ce8705f49da5e5f59c2ddb3253ef88323a0cd9c4 Author: sophgo-forum-service <forum_service@sophgo.com> Date: Mon May 13 14:04:10 2024 +0800 [feat] cvimath opensource for cv18xx soc. - 9e8967
394 lines
10 KiB
C
394 lines
10 KiB
C
#ifndef CVIMATH_TEST_UTIL_H
|
|
#define CVIMATH_TEST_UTIL_H
|
|
|
|
#include <cviruntime_context.h>
|
|
#include "cvikernel/cvikernel.h"
|
|
|
|
#include "bmruntime.h"
|
|
#include "bmruntime_bmkernel.h"
|
|
|
|
#include <assert.h>
|
|
#include <math.h> // pow
|
|
#include <stdint.h> // uint8_t / uint16_t
|
|
#include <stdio.h> /* printf, scanf, NULL */
|
|
#include <stdlib.h> /* malloc, free, rand */
|
|
#include <string.h> // strncpy
|
|
|
|
// copy from lagency
|
|
// TODO: move to properly header files
|
|
#define __ALIGN_MASK(x, mask) (((x) + (mask)) & ~(mask))
|
|
#define ALIGN(x, a) __ALIGN_MASK(x, (__typeof__(x))(a)-1)
|
|
typedef uint32_t laddr_t;
|
|
typedef uint64_t gaddr_t;
|
|
typedef uint32_t ctrl_t;
|
|
#define CTRL_NULL 0
|
|
#define CTRL_AL (1 << 0) // alloc aligned with EU_NUM
|
|
#define CTRL_TP (1 << 5) // transpose
|
|
#define CTRL_NEURON (1 << 11) // mark neuron address in GDMA
|
|
|
|
#define LADDR_INVALID (0xFFFFFFFF)
|
|
#define GADDR_INVALID (0x000000FFFFFFFFFFULL)
|
|
static inline int ceiling_func(int numerator, int denominator) {
|
|
return (numerator + denominator - 1) / denominator;
|
|
}
|
|
static inline int ceiling_func_shift(int numerator, int shift) {
|
|
return (numerator + (1 << shift) - 1) >> shift;
|
|
}
|
|
static inline int get_num_shift(uint64_t num) {
|
|
int n = 0;
|
|
while (!(num & 1)) {
|
|
n++;
|
|
num >>= 1;
|
|
}
|
|
return n;
|
|
}
|
|
|
|
#ifdef __cplusplus
|
|
extern "C" {
|
|
#endif
|
|
|
|
/*
|
|
* bm runtime binds with bm kernel.
|
|
* cvi kernel still needs bm runtime.
|
|
*
|
|
* Need to create the separate function to combine bm runtime and cvi kernel.
|
|
* Function with postfix _comp (compatible) for such combination.
|
|
*/
|
|
|
|
#define __FILENAME__ (strrchr(__FILE__, '/') ? strrchr(__FILE__, '/') + 1 : __FILE__)
|
|
|
|
/**
|
|
* @brief submit command buffer
|
|
*
|
|
* @param rt_ctx runtime structure
|
|
* @param cvk_ctx kernel structure
|
|
*/
|
|
void test_submit_comp(CVI_RT_HANDLE *rt_ctx, cvk_context_t *cvk_ctx);
|
|
|
|
/**
|
|
* @brief alloc tensor from device memory
|
|
*
|
|
* @param rt_ctx runtime structure
|
|
* @param cvk_ctx kernel structure
|
|
* @param shape tensor shape
|
|
* @param fmt tensor format such as \CVK_FMT_U16 or \CVK_FMT_U8
|
|
*
|
|
* @return cvk_tg_t structure
|
|
*/
|
|
cvk_tg_t *test_alloc_tg_mem_comp(CVI_RT_HANDLE *rt_ctx, cvk_context_t *cvk_ctx,
|
|
cvk_tg_shape_t shape, cvk_fmt_t fmt);
|
|
|
|
/**
|
|
* @brief alloc matrix from device memory
|
|
*
|
|
* @param rt_ctx runtime structure
|
|
* @param shape matrix shape
|
|
* @param fmt tensor format such as \CVK_FMT_U16 or \CVK_FMT_U8
|
|
*
|
|
* @return cvk_mg_t structure
|
|
*/
|
|
cvk_mg_t *test_alloc_mg_mem_comp(CVI_RT_HANDLE *rt_ctx, cvk_mg_shape_t shape, cvk_fmt_t fmt);
|
|
|
|
/**
|
|
* @brief free tensor from device memory
|
|
*
|
|
* @param rt_ctx runtime structure
|
|
* @param tg pointer of tg
|
|
*/
|
|
void test_free_tg_mem_comp(CVI_RT_HANDLE *rt_ctx, const cvk_tg_t *tg);
|
|
|
|
/**
|
|
* @brief free matrix from device memory
|
|
*
|
|
* @param rt_ctx runtime structure
|
|
* @param mg pointer of mg
|
|
*/
|
|
void test_free_mg_mem_comp(CVI_RT_HANDLE *rt_ctx, const cvk_mg_t *mg);
|
|
|
|
/**
|
|
* @brief put host data to alloced tensor device memory
|
|
*
|
|
* @param rt_ctx runtime structure
|
|
* @param tg pointer of tg
|
|
* @param data[] host data
|
|
*/
|
|
void test_put_tg_mem_comp(CVI_RT_HANDLE *rt_ctx, const cvk_tg_t *tg, uint8_t data[]);
|
|
|
|
/**
|
|
* @brief put host data to alloced matrix device memory
|
|
*
|
|
* @param rt_ctx runtime structure
|
|
* @param mg pointer of mg
|
|
* @param data[] host data
|
|
*/
|
|
void test_put_mg_mem_comp(CVI_RT_HANDLE *rt_ctx, const cvk_mg_t *mg, uint8_t data[]);
|
|
|
|
/**
|
|
* @brief syntactic sugar for \test_alloc_mg_mem_comp -> \test_put_mg_mem_comp
|
|
*
|
|
* @param rt_ctx runtime structure
|
|
* @param mg_data_format mg format such as \CVK_FMT_U16 or \CVK_FMT_U8
|
|
* @param data[] host data
|
|
*
|
|
* @return
|
|
*/
|
|
cvk_mg_t *test_put_matrix_g(CVI_RT_HANDLE *rt_ctx, const cvk_mg_shape_t shape,
|
|
cvk_fmt_t mg_data_format, uint8_t data[]);
|
|
|
|
/**
|
|
* @brief get tensor data from device memory
|
|
*
|
|
* @param rt_ctx runtime structure
|
|
* @param tg pointer of tg
|
|
*
|
|
* @return data in device memory
|
|
*/
|
|
uint8_t *test_get_tg_mem_comp(CVI_RT_HANDLE *rt_ctx, const cvk_tg_t *tg);
|
|
|
|
/**
|
|
* @brief get matrix data from device memory
|
|
*
|
|
* @param rt_ctx runtime structure
|
|
* @param mg pointer of mg
|
|
*
|
|
* @return data in device memory
|
|
*/
|
|
uint8_t *test_get_mg_mem_comp(CVI_RT_HANDLE *rt_ctx, const cvk_mg_t *mg);
|
|
|
|
/**
|
|
* @brief get tensor data from tpu memory,
|
|
* the data path should be tpu memory -> device memory -> host memory
|
|
*
|
|
* @param rt_ctx runtime structure
|
|
* @param cvk_ctx kernel structure
|
|
* @param tl pointer of tl
|
|
*
|
|
* @return data in tpu memory
|
|
*/
|
|
uint8_t *test_get_tensor_l2g_comp(CVI_RT_HANDLE *rt_ctx, cvk_context_t *cvk_ctx,
|
|
const cvk_tl_t *tl);
|
|
|
|
/**
|
|
* @brief get matrix data from tpu memory,
|
|
* the data path should be tpu memory -> device memory -> host memory
|
|
*
|
|
* @param rt_ctx runtime structure
|
|
* @param cvk_ctx kernel structure
|
|
* @param ml pointer of ml
|
|
*
|
|
* @return data in tpu memory
|
|
*/
|
|
uint8_t *test_get_matrix_l2g_comp(CVI_RT_HANDLE *rt_ctx, cvk_context_t *cvk_ctx,
|
|
const cvk_ml_t *ml);
|
|
|
|
/**
|
|
* @brief put host data to tpu memory with tensor
|
|
* the data path should be host memory -> device memory -> tpu memory
|
|
*
|
|
* @param rt_ctx runtime structure
|
|
* @param cvk_ctx kernel structure
|
|
* @param tl pointer of tl
|
|
* @param data[] data in host memory
|
|
*/
|
|
void test_put_tensor_g2l_comp(CVI_RT_HANDLE *rt_ctx, cvk_context_t *cvk_ctx, const cvk_tl_t *tl,
|
|
|
|
uint8_t data[]);
|
|
|
|
/**
|
|
* @brief put host data to tpu memory with matrix
|
|
* the data path should be host memory -> device memory -> tpu memory
|
|
*
|
|
* @param rt_ctx runtime structure
|
|
* @param cvk_ctx kernel structure
|
|
* @param ml pointer of ml
|
|
* @param data[] data in host memory
|
|
*/
|
|
void test_put_matrix_g2l_comp(CVI_RT_HANDLE *rt_ctx, cvk_context_t *cvk_ctx, const cvk_ml_t *ml,
|
|
uint8_t data[]);
|
|
|
|
/**
|
|
* @brief alloc tensor from tpu memory
|
|
*
|
|
* @param cvk_ctx kernel structure
|
|
* @param shape shape of tensor
|
|
* @param fmt tensor format such as \CVK_FMT_U16 or \CVK_FMT_U8
|
|
* @param eu_align is align excution unit
|
|
*
|
|
* @return pointer of tl
|
|
*/
|
|
cvk_tl_t *test_alloc_tl(cvk_context_t *cvk_ctx, cvk_tl_shape_t shape, cvk_fmt_t fmt, int eu_align);
|
|
|
|
/**
|
|
* @brief free tpu memory with tensor
|
|
*
|
|
* @param cvk_ctx kernel structure
|
|
* @param tl pointer of tl
|
|
*/
|
|
void test_free_tl(cvk_context_t *cvk_ctx, const cvk_tl_t *tl);
|
|
|
|
|
|
/**
|
|
* @brief a small structure for getting RT memory information
|
|
*/
|
|
typedef struct _AddrInfo
|
|
{
|
|
uint64_t phy_addr;
|
|
uint64_t size_bytes;
|
|
uint8_t *vir_addr;
|
|
int mem;
|
|
}AddrInfo;
|
|
|
|
/**
|
|
* @brief get tpu global memory and assign info to an structure
|
|
*
|
|
* @param[in] bm_ctx runtime structure
|
|
* @param[out] pAddrInfo a structure for physical, virtual address
|
|
*/
|
|
uint8_t *test_get_vp_addr(bmctx_t *ctx, AddrInfo *pAddrInfo);
|
|
|
|
/**
|
|
* @brief free tpu global memory from an info structure
|
|
*
|
|
* @param[in] bm_ctx runtime structure
|
|
* @param[in] pAddrInfo a structure for physical, virtual address
|
|
*/
|
|
void test_free_vp_addr(bmctx_t *ctx, AddrInfo *pAddrInfo);
|
|
|
|
|
|
/**
|
|
* @breif wrapper function
|
|
*/
|
|
// tensor in local functions
|
|
// get tl size
|
|
static inline uint64_t tl_shape_size(const cvk_tl_shape_t *s) {
|
|
return (uint64_t)s->n * s->c * s->h * s->w;
|
|
}
|
|
|
|
static inline uint64_t tg_shape_size(const cvk_tg_shape_t *s) {
|
|
return (uint64_t)s->n * s->c * s->h * s->w;
|
|
}
|
|
|
|
static inline uint64_t mg_shape_size(const cvk_mg_shape_t *s) { return (uint64_t)s->row * s->col; }
|
|
|
|
static inline void free_tl(cvk_context_t *cvk_ctx, const cvk_tl_t *t) {
|
|
return cvk_ctx->ops->lmem_free_tensor(cvk_ctx, t);
|
|
}
|
|
|
|
typedef struct {
|
|
cvk_fmt_t src_fmt;
|
|
cvk_fmt_t dst_fmt;
|
|
} cvk_fmt_type;
|
|
|
|
static inline int bitsize_of_fmt(cvk_fmt_t fmt) {
|
|
switch (fmt) {
|
|
case CVK_FMT_F32:
|
|
case CVK_FMT_I32:
|
|
return 32;
|
|
case CVK_FMT_F16:
|
|
case CVK_FMT_I16:
|
|
case CVK_FMT_U16:
|
|
case CVK_FMT_BF16:
|
|
return 16;
|
|
case CVK_FMT_I8:
|
|
case CVK_FMT_U8:
|
|
return 8;
|
|
case CVK_FMT_I4:
|
|
return 4;
|
|
case CVK_FMT_I2:
|
|
return 2;
|
|
case CVK_FMT_I1:
|
|
return 1;
|
|
default:
|
|
assert(0);
|
|
return -1;
|
|
}
|
|
}
|
|
static inline int bytesize_of_fmt(cvk_fmt_t fmt) { return bitsize_of_fmt(fmt) / 8; }
|
|
static inline void tg_2_tl_shape(cvk_tl_shape_t *tl, cvk_tg_shape_t *tg) {
|
|
tl->n = tg->n;
|
|
tl->c = tg->c;
|
|
tl->h = tg->h;
|
|
tl->w = tg->w;
|
|
}
|
|
|
|
static inline void tl_2_tg_shape(cvk_tg_shape_t *tg, cvk_tl_shape_t *tl) {
|
|
tg->n = tl->n;
|
|
tg->c = tl->c;
|
|
tg->h = tl->h;
|
|
tg->w = tl->w;
|
|
}
|
|
/**
|
|
* @brief init test case with runtime/kernel
|
|
*
|
|
* @param rt_ctx runtime structure
|
|
* @param cvk_ctx kernel structure
|
|
*/
|
|
// static inline void _test_init(CVI_RT_HANDLE ctx, cvk_context_t **cvk_ctx) {
|
|
// CVI_RT_HANDLE _ctx = (CVI_RT_HANDLE)ctx;
|
|
// int ret = CVI_RT_Init(&_ctx);
|
|
// if (ret != CVI_SUCCESS) {
|
|
// fprintf(stderr, "init failed, err %d\n", ret);
|
|
// exit(-1);
|
|
// }
|
|
//
|
|
// int alloc_size = 0x10000;
|
|
// *cvk_ctx = (cvk_context_t*) CVI_RT_RegisterKernel(_ctx, alloc_size);
|
|
// printf("alloc command buffer %d bytes success\n", alloc_size);
|
|
//}
|
|
// static inline void _test_exit(CVI_RT_HANDLE ctx, cvk_context_t *cvk_ctx) {
|
|
// CVI_RT_UnRegisterKernel(cvk_ctx);
|
|
// CVI_RT_HANDLE _ctx = (CVI_RT_HANDLE)ctx;
|
|
// CVI_RT_DeInit(_ctx);
|
|
//}
|
|
|
|
static inline void test_init(CVI_RT_HANDLE *ctx, cvk_context_t **cvk_ctx) {
|
|
CVI_RT_HANDLE *_ctx = (CVI_RT_HANDLE *)ctx;
|
|
int ret = CVI_RT_Init(_ctx);
|
|
if (ret != CVI_SUCCESS) {
|
|
fprintf(stderr, "init failed, err %d\n", ret);
|
|
exit(-1);
|
|
}
|
|
|
|
int alloc_size = 0x100000;
|
|
*cvk_ctx = (cvk_context_t *)CVI_RT_RegisterKernel(*_ctx, alloc_size);
|
|
printf("alloc command buffer %d bytes success\n", alloc_size);
|
|
}
|
|
|
|
/**
|
|
* @brief de-init with runtime/kernel
|
|
*
|
|
* @param rt_ctx runtime structure
|
|
* @param cvk_ctx kernel structure
|
|
*/
|
|
static inline void test_exit(CVI_RT_HANDLE *ctx, cvk_context_t *cvk_ctx) {
|
|
CVI_RT_UnRegisterKernel(cvk_ctx);
|
|
CVI_RT_HANDLE *_ctx = (CVI_RT_HANDLE *)ctx;
|
|
CVI_RT_DeInit(*_ctx);
|
|
}
|
|
|
|
// converter bf16<->int8
|
|
uint8_t convert_bf16_u8(uint16_t data);
|
|
int8_t convert_bf16_s8(uint16_t data);
|
|
uint16_t convert_int8_bf16(uint8_t data, uint8_t sign);
|
|
uint32_t convert_fp32_u32(float fp32);
|
|
float convert_hex_fp32(uint32_t hval);
|
|
uint32_t convert_fp32_hex(float val);
|
|
float convert_bf16_fp32(uint16_t bf16);
|
|
uint16_t convert_fp32_bf16(float fp32);
|
|
int set_store_feround();
|
|
void restore_feround(int round_mode);
|
|
|
|
static inline void *xmalloc(size_t size) {
|
|
void *p = malloc(size);
|
|
if (!p) {
|
|
return NULL;
|
|
}
|
|
return p;
|
|
}
|
|
|
|
#ifdef __cplusplus
|
|
}
|
|
#endif
|
|
|
|
#endif // CVIMATH_TEST_UTIL_H
|