Files
SDK_SG200x_V2/cvimath/src/util.c
carbon 83dc4914fe add cvimath
commit ce8705f49da5e5f59c2ddb3253ef88323a0cd9c4
Author: sophgo-forum-service <forum_service@sophgo.com>
Date:   Mon May 13 14:04:10 2024 +0800

    [feat] cvimath opensource for cv18xx soc.

    - 9e8967
2024-05-31 11:54:07 +08:00

271 lines
7.5 KiB
C

#include <assert.h>
#include <cvikernel/cvikernel.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include "test_cvikernel_util.h"
#define container_of(ptr, type, member) \
({ \
const typeof(((type *)0)->member) *__mptr = (ptr); \
(type *)((char *)__mptr - offsetof(type, member)); \
})
typedef struct {
cvk_tg_t tg;
CVI_RT_MEM mem;
} test_tg_wrapper_t;
typedef struct {
cvk_mg_t mg;
CVI_RT_MEM mem;
} test_mg_wrapper_t;
void test_submit_comp(CVI_RT_HANDLE *bm_ctx, cvk_context_t *cvk_ctx) {
(void)cvk_ctx;
(void)bm_ctx;
CVI_RT_Submit(cvk_ctx);
}
cvk_tg_t *test_alloc_tg_mem_comp(CVI_RT_HANDLE *bm_ctx, cvk_context_t *cvk_ctx,
cvk_tg_shape_t shape, cvk_fmt_t fmt) {
CVI_RT_HANDLE ctx = (CVI_RT_HANDLE)*bm_ctx;
int alloc_sz = tg_shape_size(&shape) * bytesize_of_fmt(fmt);
test_tg_wrapper_t *w = (test_tg_wrapper_t *)malloc(sizeof(*w));
assert(w && "Expected allocated tg wrapper");
w->tg.base_reg_index = 0;
w->mem = CVI_RT_MemAlloc(ctx, alloc_sz);
w->tg.start_address = CVI_RT_MemGetPAddr(w->mem);
w->tg.fmt = fmt;
w->tg.shape = shape;
w->tg.stride = cvk_ctx->ops->tg_default_stride(cvk_ctx, shape, fmt);
return &w->tg;
}
cvk_mg_t *test_alloc_mg_mem_comp(CVI_RT_HANDLE *bm_ctx, cvk_mg_shape_t s, cvk_fmt_t fmt) {
int alloc_sz = mg_shape_size(&s) * bytesize_of_fmt(fmt);
CVI_RT_HANDLE ctx = (CVI_RT_HANDLE)*bm_ctx;
test_mg_wrapper_t *w = (test_mg_wrapper_t *)malloc(sizeof(*w));
w->mem = CVI_RT_MemAlloc(ctx, alloc_sz);
w->mg.base_reg_index = 0;
w->mg.start_address = CVI_RT_MemGetPAddr(w->mem);
w->mg.shape = s;
w->mg.fmt = fmt;
w->mg.stride.row = s.col * bytesize_of_fmt(fmt);
return &w->mg;
}
void test_free_tg_mem_comp(CVI_RT_HANDLE *ctx, const cvk_tg_t *tg) {
test_tg_wrapper_t *w = container_of(tg, test_tg_wrapper_t, tg);
CVI_RT_MemFree(*ctx, w->mem);
free(w);
}
void test_free_mg_mem_comp(CVI_RT_HANDLE *ctx, const cvk_mg_t *mg) {
test_mg_wrapper_t *w = container_of(mg, test_mg_wrapper_t, mg);
CVI_RT_MemFree(*ctx, w->mem);
free(w);
}
void test_put_tg_mem_comp(CVI_RT_HANDLE *bm_ctx, const cvk_tg_t *tg, uint8_t data[]) {
test_tg_wrapper_t *w = container_of(tg, test_tg_wrapper_t, tg);
CVI_RT_MemCopyS2D(*bm_ctx, w->mem, data);
}
void test_put_mg_mem_comp(CVI_RT_HANDLE *ctx, const cvk_mg_t *mg, uint8_t data[]) {
test_mg_wrapper_t *w = (typeof(w))mg;
CVI_RT_MemCopyS2D(*ctx, w->mem, data);
}
uint8_t *test_get_tg_mem_comp(CVI_RT_HANDLE *bm_ctx, const cvk_tg_t *tg) {
cvk_tg_shape_t s = tg->shape;
int data_type_size = 1;
if (tg->fmt == CVK_FMT_BF16) {
data_type_size = 2;
}
uint32_t size = s.n * s.c * s.h * s.w * data_type_size;
uint8_t *data = (uint8_t *)malloc(size);
assert(data && "Expect allocated data for get tg mem");
test_tg_wrapper_t *w = container_of(tg, test_tg_wrapper_t, tg);
CVI_RT_MemCopyD2S(*bm_ctx, data, w->mem);
return data;
}
uint8_t *test_get_mg_mem_comp(CVI_RT_HANDLE *ctx, const cvk_mg_t *mg) {
cvk_mg_shape_t s = mg->shape;
uint32_t size = s.row * s.col * (mg->fmt == CVK_FMT_BF16 ? 2 : 1);
uint8_t *data = (uint8_t *)malloc(size);
assert(data && "Expect allocated data for get mg mem");
test_mg_wrapper_t *w = container_of(mg, test_mg_wrapper_t, mg);
CVI_RT_MemCopyD2S(*ctx, data, w->mem);
return data;
}
uint8_t *test_get_tensor_l2g_comp(CVI_RT_HANDLE *bm_ctx, cvk_context_t *cvk_ctx,
const cvk_tl_t *tl) {
cvk_tg_shape_t s;
s.n = tl->shape.n;
s.c = tl->shape.h;
s.h = tl->shape.w;
s.w = tl->shape.c;
cvk_tg_t *tg = test_alloc_tg_mem_comp(bm_ctx, cvk_ctx, s, tl->fmt);
cvk_tdma_l2g_tensor_copy_param_t p;
p.src = tl;
p.dst = tg;
if (tl->fmt == CVK_FMT_BF16) {
cvk_ctx->ops->tdma_l2g_bf16_tensor_copy(cvk_ctx, &p);
} else {
cvk_ctx->ops->tdma_l2g_tensor_copy(cvk_ctx, &p);
}
test_submit_comp(bm_ctx, cvk_ctx);
uint8_t *data = test_get_tg_mem_comp(bm_ctx, tg);
test_free_tg_mem_comp(bm_ctx, tg);
return data;
}
uint8_t *test_get_matrix_l2g_comp(CVI_RT_HANDLE *ctx, cvk_context_t *cvk_ctx, const cvk_ml_t *ml) {
cvk_mg_shape_t s;
s.row = ml->shape.n;
s.col = ml->shape.col;
cvk_mg_t *mg = test_alloc_mg_mem_comp(ctx, s, ml->fmt);
cvk_tdma_l2g_matrix_copy_param_t p;
p.src = ml;
p.dst = mg;
if (ml->fmt == CVK_FMT_BF16) {
cvk_ctx->ops->tdma_l2g_bf16_matrix_copy(cvk_ctx, &p);
} else {
cvk_ctx->ops->tdma_l2g_matrix_copy(cvk_ctx, &p);
}
test_submit_comp(ctx, cvk_ctx);
uint8_t *data = test_get_mg_mem_comp(ctx, mg);
test_free_mg_mem_comp(ctx, mg);
return data;
}
void test_put_tensor_g2l_comp(CVI_RT_HANDLE *bm_ctx, cvk_context_t *cvk_ctx, const cvk_tl_t *tl,
uint8_t data[]) {
cvk_tg_shape_t tg_shape;
tg_shape.n = tl->shape.n;
tg_shape.c = tl->shape.c;
tg_shape.h = tl->shape.h;
tg_shape.w = tl->shape.w;
cvk_tg_t *tg = test_alloc_tg_mem_comp(bm_ctx, cvk_ctx, tg_shape, tl->fmt);
cvk_tdma_g2l_tensor_copy_param_t p;
p.src = tg;
p.dst = tl;
test_put_tg_mem_comp(bm_ctx, tg, data);
if (tl->fmt == CVK_FMT_BF16) {
cvk_ctx->ops->tdma_g2l_bf16_tensor_copy(cvk_ctx, &p);
} else {
cvk_ctx->ops->tdma_g2l_tensor_copy(cvk_ctx, &p);
}
test_submit_comp(bm_ctx, cvk_ctx);
test_free_tg_mem_comp(bm_ctx, tg);
}
void test_put_matrix_g2l_comp(CVI_RT_HANDLE *bm_ctx, cvk_context_t *cvk_ctx, const cvk_ml_t *ml,
uint8_t data[]) {
cvk_fmt_t mg_data_format = ml->fmt;
cvk_mg_shape_t s;
s.row = ml->shape.n;
s.col = ml->shape.col;
cvk_mg_t *mg = test_alloc_mg_mem_comp(bm_ctx, s, mg_data_format);
cvk_tdma_g2l_matrix_copy_param_t p;
p.src = mg;
p.dst = ml;
test_put_mg_mem_comp(bm_ctx, mg, data);
if (ml->fmt == CVK_FMT_BF16) {
cvk_ctx->ops->tdma_g2l_bf16_matrix_copy(cvk_ctx, &p);
} else {
cvk_ctx->ops->tdma_g2l_matrix_copy(cvk_ctx, &p);
}
test_submit_comp(bm_ctx, cvk_ctx);
test_free_mg_mem_comp(bm_ctx, mg);
}
cvk_mg_t *test_put_matrix_g(CVI_RT_HANDLE *bm_ctx, const cvk_mg_shape_t s, cvk_fmt_t mg_data_format,
uint8_t data[]) {
cvk_mg_t *mg = test_alloc_mg_mem_comp(bm_ctx, s, mg_data_format);
test_put_mg_mem_comp(bm_ctx, mg, data);
return mg;
}
cvk_tl_t *test_alloc_tl(cvk_context_t *cvk_ctx, cvk_tl_shape_t shape, cvk_fmt_t fmt, int eu_align) {
cvk_tl_t *tl = cvk_ctx->ops->lmem_alloc_tensor(cvk_ctx, shape, fmt, eu_align);
return tl;
}
void test_free_tl(cvk_context_t *cvk_ctx, const cvk_tl_t *t) {
cvk_ctx->ops->lmem_free_tensor(cvk_ctx, t);
}
#define CNV_SCALAR_C_ALIGN (0x1000)
inline uint64_t cnvAlign64(const uint64_t length, const uint64_t align) {
uint64_t stride = (uint64_t)(length / align) * align;
if (stride < length) {
stride += align;
}
return stride;
}
uint8_t *test_get_vp_addr(bmctx_t *ctx, AddrInfo *pAddrInfo)
{
if(pAddrInfo->vir_addr){
test_free_vp_addr(ctx, pAddrInfo);
}
pAddrInfo->mem = bmmem_device_alloc_raw(*ctx, pAddrInfo->size_bytes);
pAddrInfo->vir_addr = (uint8_t *)bmmem_device_v_addr(pAddrInfo->mem);;
pAddrInfo->phy_addr = bmmem_device_addr(pAddrInfo->mem);
uint64_t new_paddr = cnvAlign64(pAddrInfo->phy_addr, CNV_SCALAR_C_ALIGN);
uint64_t offset = new_paddr - pAddrInfo->phy_addr;
pAddrInfo->phy_addr = new_paddr;
pAddrInfo->vir_addr += offset;
return pAddrInfo->vir_addr;
}
void test_free_vp_addr(bmctx_t *ctx, AddrInfo *pAddrInfo){
bmmem_device_free(*ctx, pAddrInfo->mem);
pAddrInfo->phy_addr = -1;
pAddrInfo->vir_addr = NULL;
//pAddrInfo->size_bytes = 0;
}