Files
SDK_SG200x_V2/cvimath/include/test_cvikernel_util.h
carbon 83dc4914fe add cvimath
commit ce8705f49da5e5f59c2ddb3253ef88323a0cd9c4
Author: sophgo-forum-service <forum_service@sophgo.com>
Date:   Mon May 13 14:04:10 2024 +0800

    [feat] cvimath opensource for cv18xx soc.

    - 9e8967
2024-05-31 11:54:07 +08:00

394 lines
10 KiB
C

#ifndef CVIMATH_TEST_UTIL_H
#define CVIMATH_TEST_UTIL_H
#include <cviruntime_context.h>
#include "cvikernel/cvikernel.h"
#include "bmruntime.h"
#include "bmruntime_bmkernel.h"
#include <assert.h>
#include <math.h> // pow
#include <stdint.h> // uint8_t / uint16_t
#include <stdio.h> /* printf, scanf, NULL */
#include <stdlib.h> /* malloc, free, rand */
#include <string.h> // strncpy
// copy from lagency
// TODO: move to properly header files
#define __ALIGN_MASK(x, mask) (((x) + (mask)) & ~(mask))
#define ALIGN(x, a) __ALIGN_MASK(x, (__typeof__(x))(a)-1)
typedef uint32_t laddr_t;
typedef uint64_t gaddr_t;
typedef uint32_t ctrl_t;
#define CTRL_NULL 0
#define CTRL_AL (1 << 0) // alloc aligned with EU_NUM
#define CTRL_TP (1 << 5) // transpose
#define CTRL_NEURON (1 << 11) // mark neuron address in GDMA
#define LADDR_INVALID (0xFFFFFFFF)
#define GADDR_INVALID (0x000000FFFFFFFFFFULL)
static inline int ceiling_func(int numerator, int denominator) {
return (numerator + denominator - 1) / denominator;
}
static inline int ceiling_func_shift(int numerator, int shift) {
return (numerator + (1 << shift) - 1) >> shift;
}
static inline int get_num_shift(uint64_t num) {
int n = 0;
while (!(num & 1)) {
n++;
num >>= 1;
}
return n;
}
#ifdef __cplusplus
extern "C" {
#endif
/*
* bm runtime binds with bm kernel.
* cvi kernel still needs bm runtime.
*
* Need to create the separate function to combine bm runtime and cvi kernel.
* Function with postfix _comp (compatible) for such combination.
*/
#define __FILENAME__ (strrchr(__FILE__, '/') ? strrchr(__FILE__, '/') + 1 : __FILE__)
/**
* @brief submit command buffer
*
* @param rt_ctx runtime structure
* @param cvk_ctx kernel structure
*/
void test_submit_comp(CVI_RT_HANDLE *rt_ctx, cvk_context_t *cvk_ctx);
/**
* @brief alloc tensor from device memory
*
* @param rt_ctx runtime structure
* @param cvk_ctx kernel structure
* @param shape tensor shape
* @param fmt tensor format such as \CVK_FMT_U16 or \CVK_FMT_U8
*
* @return cvk_tg_t structure
*/
cvk_tg_t *test_alloc_tg_mem_comp(CVI_RT_HANDLE *rt_ctx, cvk_context_t *cvk_ctx,
cvk_tg_shape_t shape, cvk_fmt_t fmt);
/**
* @brief alloc matrix from device memory
*
* @param rt_ctx runtime structure
* @param shape matrix shape
* @param fmt tensor format such as \CVK_FMT_U16 or \CVK_FMT_U8
*
* @return cvk_mg_t structure
*/
cvk_mg_t *test_alloc_mg_mem_comp(CVI_RT_HANDLE *rt_ctx, cvk_mg_shape_t shape, cvk_fmt_t fmt);
/**
* @brief free tensor from device memory
*
* @param rt_ctx runtime structure
* @param tg pointer of tg
*/
void test_free_tg_mem_comp(CVI_RT_HANDLE *rt_ctx, const cvk_tg_t *tg);
/**
* @brief free matrix from device memory
*
* @param rt_ctx runtime structure
* @param mg pointer of mg
*/
void test_free_mg_mem_comp(CVI_RT_HANDLE *rt_ctx, const cvk_mg_t *mg);
/**
* @brief put host data to alloced tensor device memory
*
* @param rt_ctx runtime structure
* @param tg pointer of tg
* @param data[] host data
*/
void test_put_tg_mem_comp(CVI_RT_HANDLE *rt_ctx, const cvk_tg_t *tg, uint8_t data[]);
/**
* @brief put host data to alloced matrix device memory
*
* @param rt_ctx runtime structure
* @param mg pointer of mg
* @param data[] host data
*/
void test_put_mg_mem_comp(CVI_RT_HANDLE *rt_ctx, const cvk_mg_t *mg, uint8_t data[]);
/**
* @brief syntactic sugar for \test_alloc_mg_mem_comp -> \test_put_mg_mem_comp
*
* @param rt_ctx runtime structure
* @param mg_data_format mg format such as \CVK_FMT_U16 or \CVK_FMT_U8
* @param data[] host data
*
* @return
*/
cvk_mg_t *test_put_matrix_g(CVI_RT_HANDLE *rt_ctx, const cvk_mg_shape_t shape,
cvk_fmt_t mg_data_format, uint8_t data[]);
/**
* @brief get tensor data from device memory
*
* @param rt_ctx runtime structure
* @param tg pointer of tg
*
* @return data in device memory
*/
uint8_t *test_get_tg_mem_comp(CVI_RT_HANDLE *rt_ctx, const cvk_tg_t *tg);
/**
* @brief get matrix data from device memory
*
* @param rt_ctx runtime structure
* @param mg pointer of mg
*
* @return data in device memory
*/
uint8_t *test_get_mg_mem_comp(CVI_RT_HANDLE *rt_ctx, const cvk_mg_t *mg);
/**
* @brief get tensor data from tpu memory,
* the data path should be tpu memory -> device memory -> host memory
*
* @param rt_ctx runtime structure
* @param cvk_ctx kernel structure
* @param tl pointer of tl
*
* @return data in tpu memory
*/
uint8_t *test_get_tensor_l2g_comp(CVI_RT_HANDLE *rt_ctx, cvk_context_t *cvk_ctx,
const cvk_tl_t *tl);
/**
* @brief get matrix data from tpu memory,
* the data path should be tpu memory -> device memory -> host memory
*
* @param rt_ctx runtime structure
* @param cvk_ctx kernel structure
* @param ml pointer of ml
*
* @return data in tpu memory
*/
uint8_t *test_get_matrix_l2g_comp(CVI_RT_HANDLE *rt_ctx, cvk_context_t *cvk_ctx,
const cvk_ml_t *ml);
/**
* @brief put host data to tpu memory with tensor
* the data path should be host memory -> device memory -> tpu memory
*
* @param rt_ctx runtime structure
* @param cvk_ctx kernel structure
* @param tl pointer of tl
* @param data[] data in host memory
*/
void test_put_tensor_g2l_comp(CVI_RT_HANDLE *rt_ctx, cvk_context_t *cvk_ctx, const cvk_tl_t *tl,
uint8_t data[]);
/**
* @brief put host data to tpu memory with matrix
* the data path should be host memory -> device memory -> tpu memory
*
* @param rt_ctx runtime structure
* @param cvk_ctx kernel structure
* @param ml pointer of ml
* @param data[] data in host memory
*/
void test_put_matrix_g2l_comp(CVI_RT_HANDLE *rt_ctx, cvk_context_t *cvk_ctx, const cvk_ml_t *ml,
uint8_t data[]);
/**
* @brief alloc tensor from tpu memory
*
* @param cvk_ctx kernel structure
* @param shape shape of tensor
* @param fmt tensor format such as \CVK_FMT_U16 or \CVK_FMT_U8
* @param eu_align is align excution unit
*
* @return pointer of tl
*/
cvk_tl_t *test_alloc_tl(cvk_context_t *cvk_ctx, cvk_tl_shape_t shape, cvk_fmt_t fmt, int eu_align);
/**
* @brief free tpu memory with tensor
*
* @param cvk_ctx kernel structure
* @param tl pointer of tl
*/
void test_free_tl(cvk_context_t *cvk_ctx, const cvk_tl_t *tl);
/**
* @brief a small structure for getting RT memory information
*/
typedef struct _AddrInfo
{
uint64_t phy_addr;
uint64_t size_bytes;
uint8_t *vir_addr;
int mem;
}AddrInfo;
/**
* @brief get tpu global memory and assign info to an structure
*
* @param[in] bm_ctx runtime structure
* @param[out] pAddrInfo a structure for physical, virtual address
*/
uint8_t *test_get_vp_addr(bmctx_t *ctx, AddrInfo *pAddrInfo);
/**
* @brief free tpu global memory from an info structure
*
* @param[in] bm_ctx runtime structure
* @param[in] pAddrInfo a structure for physical, virtual address
*/
void test_free_vp_addr(bmctx_t *ctx, AddrInfo *pAddrInfo);
/**
* @breif wrapper function
*/
// tensor in local functions
// get tl size
static inline uint64_t tl_shape_size(const cvk_tl_shape_t *s) {
return (uint64_t)s->n * s->c * s->h * s->w;
}
static inline uint64_t tg_shape_size(const cvk_tg_shape_t *s) {
return (uint64_t)s->n * s->c * s->h * s->w;
}
static inline uint64_t mg_shape_size(const cvk_mg_shape_t *s) { return (uint64_t)s->row * s->col; }
static inline void free_tl(cvk_context_t *cvk_ctx, const cvk_tl_t *t) {
return cvk_ctx->ops->lmem_free_tensor(cvk_ctx, t);
}
typedef struct {
cvk_fmt_t src_fmt;
cvk_fmt_t dst_fmt;
} cvk_fmt_type;
static inline int bitsize_of_fmt(cvk_fmt_t fmt) {
switch (fmt) {
case CVK_FMT_F32:
case CVK_FMT_I32:
return 32;
case CVK_FMT_F16:
case CVK_FMT_I16:
case CVK_FMT_U16:
case CVK_FMT_BF16:
return 16;
case CVK_FMT_I8:
case CVK_FMT_U8:
return 8;
case CVK_FMT_I4:
return 4;
case CVK_FMT_I2:
return 2;
case CVK_FMT_I1:
return 1;
default:
assert(0);
return -1;
}
}
static inline int bytesize_of_fmt(cvk_fmt_t fmt) { return bitsize_of_fmt(fmt) / 8; }
static inline void tg_2_tl_shape(cvk_tl_shape_t *tl, cvk_tg_shape_t *tg) {
tl->n = tg->n;
tl->c = tg->c;
tl->h = tg->h;
tl->w = tg->w;
}
static inline void tl_2_tg_shape(cvk_tg_shape_t *tg, cvk_tl_shape_t *tl) {
tg->n = tl->n;
tg->c = tl->c;
tg->h = tl->h;
tg->w = tl->w;
}
/**
* @brief init test case with runtime/kernel
*
* @param rt_ctx runtime structure
* @param cvk_ctx kernel structure
*/
// static inline void _test_init(CVI_RT_HANDLE ctx, cvk_context_t **cvk_ctx) {
// CVI_RT_HANDLE _ctx = (CVI_RT_HANDLE)ctx;
// int ret = CVI_RT_Init(&_ctx);
// if (ret != CVI_SUCCESS) {
// fprintf(stderr, "init failed, err %d\n", ret);
// exit(-1);
// }
//
// int alloc_size = 0x10000;
// *cvk_ctx = (cvk_context_t*) CVI_RT_RegisterKernel(_ctx, alloc_size);
// printf("alloc command buffer %d bytes success\n", alloc_size);
//}
// static inline void _test_exit(CVI_RT_HANDLE ctx, cvk_context_t *cvk_ctx) {
// CVI_RT_UnRegisterKernel(cvk_ctx);
// CVI_RT_HANDLE _ctx = (CVI_RT_HANDLE)ctx;
// CVI_RT_DeInit(_ctx);
//}
static inline void test_init(CVI_RT_HANDLE *ctx, cvk_context_t **cvk_ctx) {
CVI_RT_HANDLE *_ctx = (CVI_RT_HANDLE *)ctx;
int ret = CVI_RT_Init(_ctx);
if (ret != CVI_SUCCESS) {
fprintf(stderr, "init failed, err %d\n", ret);
exit(-1);
}
int alloc_size = 0x100000;
*cvk_ctx = (cvk_context_t *)CVI_RT_RegisterKernel(*_ctx, alloc_size);
printf("alloc command buffer %d bytes success\n", alloc_size);
}
/**
* @brief de-init with runtime/kernel
*
* @param rt_ctx runtime structure
* @param cvk_ctx kernel structure
*/
static inline void test_exit(CVI_RT_HANDLE *ctx, cvk_context_t *cvk_ctx) {
CVI_RT_UnRegisterKernel(cvk_ctx);
CVI_RT_HANDLE *_ctx = (CVI_RT_HANDLE *)ctx;
CVI_RT_DeInit(*_ctx);
}
// converter bf16<->int8
uint8_t convert_bf16_u8(uint16_t data);
int8_t convert_bf16_s8(uint16_t data);
uint16_t convert_int8_bf16(uint8_t data, uint8_t sign);
uint32_t convert_fp32_u32(float fp32);
float convert_hex_fp32(uint32_t hval);
uint32_t convert_fp32_hex(float val);
float convert_bf16_fp32(uint16_t bf16);
uint16_t convert_fp32_bf16(float fp32);
int set_store_feround();
void restore_feround(int round_mode);
static inline void *xmalloc(size_t size) {
void *p = malloc(size);
if (!p) {
return NULL;
}
return p;
}
#ifdef __cplusplus
}
#endif
#endif // CVIMATH_TEST_UTIL_H