/* * Copyright (C) Cvitek Co., Ltd. 2019-2020. All rights reserved. */ #ifndef UNPOOLING_OP_H_ #define UNPOOLING_OP_H_ #include "tpuc/CustomOp.h" #include namespace cvi { class UnPoolingOp : public CustomOp { public: UnPoolingOp(OpParam ¶m) : CustomOp(param) {} void interpretFp32(std::vector>> &operand_tensors, std::vector> &operand_shapes, std::shared_ptr> &result_tensor, std::vector &result_shape); void interpretInt8(std::vector>> &operand_tensors, std::vector> &operand_shapes, std::shared_ptr> &result_tensor, std::vector &result_shape); void quantizeInt8(); void codeGenInt8(void *ctx, std::vector> &operand_shapes, std::vector &operand_gaddrs, std::vector &result_shape, uint64_t result_gaddr, int layer_id); private: void alloc_lmem(cvk_context_t *ctx, uint32_t tiling_c, uint32_t tiling_h, uint32_t input_c, uint32_t input_h, uint32_t input_w, uint32_t output_c, uint32_t output_h, uint32_t output_w, cvk_fmt_t fmt, int eu_align, cvk_tl_t &tl_ifmap, cvk_tl_t &tl_working, cvk_tl_t &tl_ofmap, cvk_tl_t &tl_mask); void tdma_load(cvk_context_t *ctx, cvk_tl_t *tlp, uint64_t ga_src, cvk_tg_stride_t stride, int n_pos, int c_pos, int h_pos); void unpooling_compute(cvk_context_t *ctx, uint32_t layer_id, int scale_h, int scale_w, cvk_tl_t *tl_ifmap, cvk_tl_t *tl_working, cvk_tl_t *tl_mask, cvk_tl_t *tl_ofmap); void tdma_store(cvk_context_t *ctx, cvk_tl_t *tlp, uint64_t ga_dst, cvk_tg_stride_t stride, uint32_t n_pos, uint32_t c_pos, uint32_t h_pos, uint32_t crop_h, uint32_t crop_w); void unpooling_codegen(cvk_context_t *ctx, uint32_t layer_id, uint64_t data_gaddr, uint64_t mask_gaddr, uint64_t output_gaddr, int input_n, int input_c, int input_h, int input_w, int scale, int unpool_h, int unpool_w); void unpooling(std::vector>> &operand_tensors, std::vector> &operand_shapes, std::shared_ptr> &result_tensor, std::vector &result_shape) { int in = operand_shapes[0][0]; int ic = operand_shapes[0][1]; int ih = operand_shapes[0][2]; int iw = operand_shapes[0][3]; int oh = result_shape[2]; int ow = result_shape[3]; float *data = operand_tensors[0]->data(); float *mask = operand_tensors[1]->data(); float *output = result_tensor->data(); auto scale = param.get("scale"); auto unpool_h = param.get("unpool_h"); auto unpool_w = param.get("unpool_w"); assert(oh == unpool_h); assert(ow == unpool_w); int sh = ih * scale; int sw = iw * scale; // always use float to store int8 value std::vector tmp_out(in * ic * sh * sw); for (int n = 0; n < in; n++) { for (int c = 0; c < ic; c++) { for (int h = 0; h < sh; h++) { for (int w = 0; w < sw; w++) { int isw = w / scale; int ish = h / scale; int out_idx = ((n * ic + c) * sh + h) * sw + w; int in_idx = ((n * ic + c) * ih + ish) * iw + isw; tmp_out[out_idx] = data[in_idx] * mask[out_idx]; } } } } for (int n = 0; n < in; n++) { for (int c = 0; c < ic; c++) { for (int h = 0; h < oh; h++) { for (int w = 0; w < ow; w++) { int out_idx = ((n * ic + c) * oh + h) * ow + w; int in_idx = ((n * ic + c) * sh + h) * sw + w; output[out_idx] = tmp_out[in_idx]; } } } } } }; } // namespace cvi #endif