Files
SDK_SG200x_V2/cviruntime/samples_inner/mt/mt_model.cpp
carbon e25f20f7a3 add cviruntime
commit 3f4938648950a7f3bf9a19c320ca9fae7c52de20
Author: sophgo-forum-service <forum_service@sophgo.com>
Date:   Mon May 13 13:44:23 2024 +0800

    [feat] cviruntime opensource for cv18xx soc.

    - a4b6a3, add cumsum and gatherelements_pt.
2024-05-31 11:51:34 +08:00

220 lines
6.2 KiB
C++

#include <stdio.h>
#include <math.h>
#include <time.h>
#include <stdlib.h>
#include <string.h>
#include <sys/time.h>
#include <opencv2/opencv.hpp>
#include "cviruntime.h"
#include "cnpy.h"
#include "mt_model.hpp"
static bf16_t mask_val() {
float val = -50;
#if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
return ((bf16_t *)(&val))[0];
#else
return ((bf16_t *)(&val))[1];
#endif
}
/*
static void store_result(std::string name, CVI_TENSOR *tensor) {
std::vector<size_t> shape = {
(size_t)tensor->shape.dim[0], (size_t)tensor->shape.dim[1],
(size_t)tensor->shape.dim[2], (size_t)tensor->shape.dim[3]};
cnpy::npz_t npz;
cnpy::npz_add_array<uint16_t>(npz, tensor->name,
(uint16_t *)CVI_NN_TensorPtr(tensor), shape);
cnpy::npz_save_all(name, npz);
}
*/
Encoder::Encoder(const char *model_file) {
int ret = CVI_NN_RegisterModel(model_file, &model);
if (ret != CVI_RC_SUCCESS) {
printf("CVI_NN_RegisterModel failed, err %d\n", ret);
exit(1);
}
ret = CVI_NN_GetInputOutputTensors(model, &input_tensors, &input_num,
&output_tensors, &output_num);
if (ret != CVI_RC_SUCCESS) {
printf("CVI_NN_GetInputOutputTensors failed, err %d\n", ret);
exit(1);
}
assert(input_num == 2);
assert(output_num == 1);
src_seq = &input_tensors[0];
src_mask = &input_tensors[1];
enc_output = &output_tensors[0];
for (int i = 0; i < input_num; i++) {
std::cout << "input => " << input_tensors[i].name << "\n";
}
}
void Encoder::gen_src_mask(int16_t *seq, int32_t size) {
auto ptr = (bf16_t *)CVI_NN_TensorPtr(src_mask);
assert(CVI_NN_TensorCount(src_mask) == size);
auto filled_val = mask_val();
for (int i = 0; i < size; i++) {
ptr[i] = (seq[i] == 0) ? filled_val : 0;
}
}
bf16_t *Encoder::get_mask() {
return (bf16_t *)CVI_NN_TensorPtr(src_mask);
}
bf16_t* Encoder::run(int16_t *seq, int32_t size) {
// fill src_seq to tensor 0
CVI_NN_SetTensorPtr(src_seq, seq);
// generate src mask to tensor 1
gen_src_mask(seq, size);
/*
printf("src_seq:");
for (int i = 0; i < (int)CVI_NN_TensorCount(src_seq); i++) {
printf("%d ", ((int16_t *)CVI_NN_TensorPtr(src_seq))[i]);
}
printf("\n");
printf("src_mask:");
for (int i = 0; i < (int)CVI_NN_TensorCount(src_mask); i++) {
printf("%d ", ((int16_t *)CVI_NN_TensorPtr(src_mask))[i]);
}
printf("\n");
*/
// run inference
CVI_NN_Forward(model, input_tensors, input_num,
output_tensors, output_num);
//store_result("xx_enc_output.npz", enc_output);
return (bf16_t *)CVI_NN_TensorPtr(enc_output);
}
Decoder::Decoder(CVI_MODEL_HANDLE main_model, int32_t max_step)
: max_step(max_step) {
int ret = CVI_NN_CloneModel(main_model, &model);
if (ret != CVI_RC_SUCCESS) {
printf("CVI_NN_RegisterModel failed, err %d\n", ret);
exit(1);
}
CVI_NN_SetConfig(model, OPTION_BATCH_SIZE, 1);
switch(max_step) {
case 0:
CVI_NN_SetConfig(model, OPTION_PROGRAM_INDEX, 1);
break;
case 10:
CVI_NN_SetConfig(model, OPTION_PROGRAM_INDEX, 2);
break;
case 20:
CVI_NN_SetConfig(model, OPTION_PROGRAM_INDEX, 3);
break;
case 30:
CVI_NN_SetConfig(model, OPTION_PROGRAM_INDEX, 4);
break;
case 39:
CVI_NN_SetConfig(model, OPTION_PROGRAM_INDEX, 5);
break;
}
ret = CVI_NN_GetInputOutputTensors(model, &input_tensors, &input_num,
&output_tensors, &output_num);
if (ret != CVI_RC_SUCCESS) {
printf("CVI_NN_GetInputOutputTensors failed, err %d\n", ret);
exit(1);
}
assert(input_num == 4);
assert(output_num == 1);
trg_seq = &input_tensors[0];
enc_output = &input_tensors[1];
src_mask = &input_tensors[2];
trg_mask = &input_tensors[3];
dec_output = &output_tensors[0];
width = dec_output->shape.dim[2];
for (int i = 0; i < input_num; i++) {
std::cout << "input => " << input_tensors[i].name << "\n";
}
std::cout << max_step << "- Decoder: tensors: "
<< trg_seq->name << ", "
<< trg_mask->name << ", "
<< enc_output->name << ", "
<< src_mask->name << ", "
<< dec_output->name << ", width:"
<< width << "\n";
// generate default trg mask
gen_trg_mask();
}
void Decoder::gen_trg_mask() {
auto filled_val = mask_val();
auto ptr = (bf16_t *)CVI_NN_TensorPtr(trg_mask);
for (int i = 0; i < max_step; i++) {
for (int j = 0; j < max_step; j++) {
ptr[i * max_step + j] = (j > i) ? filled_val : 0;
}
}
}
int16_t Decoder::argmax(int step) {
step = (step == 0) ? 0 : (step - 1);
auto ptr = (int8_t *)CVI_NN_TensorPtr(dec_output);
ptr += step * width;
int idx = 0;
int8_t max_value = 0;
for (int j = 0; j < width; j++) {
int8_t val = ptr[j];
if (val < 0) {
continue;
}
if (val > max_value) {
idx = j;
max_value = val;
}
}
return idx;
}
int16_t Decoder::run(int step, int16_t *seq, bf16_t *enc, bf16_t *mask) {
// fill data to input tensor
CVI_NN_SetTensorPtr(trg_seq, seq);
CVI_NN_SetTensorPtr(enc_output, enc);
CVI_NN_SetTensorPtr(src_mask, mask);
// run inference
CVI_NN_Forward(model, input_tensors, input_num,
output_tensors, output_num);
// std::string name = "xx_decode_" + std::to_string(step) + "_out.npz";
// store_result(name, dec_output);
return argmax(step);
}
void MTrans::run(int16_t *seq, int32_t seq_sz, int16_t *gen_seq, int32_t gen_seq_sz) {
// clean gen_seq array.
memset(gen_seq, 0, gen_seq_sz * sizeof(int16_t));
auto enc_output = encoder->run(seq, seq_sz);
auto src_mask = encoder->get_mask();
int16_t trg_seq = 1;
auto best_idx = decoder_0->run(0, &trg_seq, enc_output, src_mask);
gen_seq[0] = SOS_IDX;
gen_seq[1] = best_idx;
int seq_len = 0;
for (int step = 2; step < INFER_FIX_LEN; step++) {
if (step <= 10) {
best_idx = decoder_10->run(step, gen_seq, enc_output, src_mask);
} else if (step <= 20) {
best_idx = decoder_20->run(step, gen_seq, enc_output, src_mask);
} else if (step <= 30) {
best_idx = decoder_30->run(step, gen_seq, enc_output, src_mask);
} else {
best_idx = decoder_39->run(step, gen_seq, enc_output, src_mask);
}
gen_seq[step] = best_idx;
seq_len = step + 1;
// if (gen_seq[39] == EOS_IDX) {
if (best_idx == EOS_IDX) {
break;
}
}
}