Files
SDK_SG200x_V2/cvimath/tests/cvi1835/fp32_bf16.cpp
carbon 83dc4914fe add cvimath
commit ce8705f49da5e5f59c2ddb3253ef88323a0cd9c4
Author: sophgo-forum-service <forum_service@sophgo.com>
Date:   Mon May 13 14:04:10 2024 +0800

    [feat] cvimath opensource for cv18xx soc.

    - 9e8967
2024-05-31 11:54:07 +08:00

128 lines
3.2 KiB
C++

#include <cvimath_internal.h>
#include <sys/time.h>
#include <test_cvikernel_util.h>
typedef cvk_tdma_g2g_tensor_copy_param_t param_t;
static void __print_param(const char *tag, FILE *f, param_t *p) {
fprintf(f, "%s: (%u, %u, %u, %u) => (%u, %u, %u, %u)\n", tag, p->src->shape.n, p->src->shape.c,
p->src->shape.h, p->src->shape.w, p->dst->shape.n, p->dst->shape.c, p->dst->shape.h,
p->dst->shape.w);
}
#define print_param(f, p) __print_param(__func__, f, p)
typedef struct {
cvk_tg_shape_t src_shape;
cvk_tg_shape_t dst_shape;
} case_t;
static cvk_fmt_type input_fmt[] = {
{CVK_FMT_BF16, CVK_FMT_BF16},
};
static case_t g_cases[] = {
{
{1, 3, 3, 2},
{1, 3, 3, 2},
},
{
{4, 3, 3, 2},
{4, 3, 3, 2},
},
//{
// // YOLOv2 concat layer
// {1, 256, 19, 19},
// {1, 256, 19, 19},
//},
{
{1, 256, 19, 20},
{1, 256, 19, 20},
},
{
{1, 1280, 3, 4},
{1, 1280, 3, 4},
},
{
{1, 159 * 89, 36, 4},
{1, 159 * 89, 36, 4},
},
{
{159, 89, 36, 4},
{159, 89, 36, 4},
},
};
static void test_param_g2g(CVI_RT_HANDLE *ctx, cvk_context_t *bmk, param_t *p) {
print_param(stderr, p);
// 2 means source is fp32, occupy 2 * bf16 size
uint64_t size = p->src->shape.n * p->src->shape.c * p->src->shape.h * p->src->shape.w / 2;
uint32_t *src_data = new uint32_t[size];
for (uint64_t i = 0; i < size; i++) {
src_data[i] = ((0x1234 + i) << 16) + 0x5678 + i;
// printf("src[%lu] 0x%x\n", i, src_data[i]);
}
test_put_tg_mem_comp(ctx, p->src, (uint8_t *)src_data);
cvm_s2s_fp32_bf16(bmk, p->src->start_address, p->src->shape, p->dst->start_address, p->dst->shape,
CVK_FMT_BF16);
long elapsed;
struct timeval t0, t1;
gettimeofday(&t0, NULL);
test_submit_comp(ctx, bmk);
gettimeofday(&t1, NULL);
elapsed = (t1.tv_sec - t0.tv_sec) * 1000000 + t1.tv_usec - t0.tv_usec;
printf("kernel takes %ld us\n", elapsed);
uint16_t *dst_data = (uint16_t *)test_get_tg_mem_comp(ctx, p->dst);
for (uint64_t i = 0; i < size; i++) {
uint16_t _src_data = (src_data[i] >> 16) & 0xffff;
if (dst_data[i] != _src_data) {
fprintf(stderr, "comparing failed at dst[%lu], got %x, exp %x\n", i, dst_data[i], _src_data);
exit(-1);
}
}
delete[] src_data;
free(dst_data);
}
static void destroy_param_g2g(CVI_RT_HANDLE *ctx, param_t *p) {
test_free_tg_mem_comp(ctx, p->src);
test_free_tg_mem_comp(ctx, p->dst);
}
static void test_one_case(CVI_RT_HANDLE *ctx, cvk_context_t *bmk, case_t *c) {
uint32_t nr_fmt = sizeof(input_fmt) / sizeof(input_fmt[0]);
for (uint32_t i = 0; i < nr_fmt; i++) {
param_t p;
cvk_tg_t *src, *dst;
src = test_alloc_tg_mem_comp(ctx, bmk, c->src_shape, input_fmt[i].src_fmt);
dst = test_alloc_tg_mem_comp(ctx, bmk, c->dst_shape, input_fmt[i].dst_fmt);
p.src = src;
p.dst = dst;
test_param_g2g(ctx, bmk, &p);
destroy_param_g2g(ctx, &p);
}
}
int main() {
CVI_RT_HANDLE ctx;
cvk_context_t *bmk;
test_init(&ctx, &bmk);
uint32_t nr_cases = sizeof(g_cases) / sizeof(g_cases[0]);
for (uint32_t i = 0; i < nr_cases; i++) test_one_case(&ctx, bmk, &g_cases[i]);
test_exit(&ctx, bmk);
return 0;
}