本文主要是介绍[RV1109/RV1126系列]-4.RGA、DRM对图像Resize加速,希望对大家解决编程问题提供一定的参考价值,需要的开发者们随着小编来一起学习吧!
RGA、DRM对图像Resize加速
上文中,我们已经部署好RKNN基本的转换、部署工具的安装操作,下面我们对在视觉算法中常用 Resize 操作进行优化提速,因RV1126/1109采用Arm A7的处理器,我们在使用 opnecv 进行resize操作时,发现会有较大的耗时。针对这一场景,瑞芯微为我们提供了硬件加速方案,通过RGA和DRM可以成倍的提升resize操作的耗时。
1.测试结果
图像大小 | resize_opencv | resize_rga |
---|---|---|
640*480 | 22ms | 3ms |
1280*720 | 30ms | 5ms |
PS:测试硬件为RV1126,从上表分析可知,提速比可达到6-7倍左右!!!
2. RGA部分代码(使用瑞芯微官方源码)
rga_func.h
#ifndef __RGA_FUNC_H__
#define __RGA_FUNC_H__#include <dlfcn.h>#include "RgaApi.h"#ifdef __cplusplus
extern "C" {
#endiftypedef int (*FUNC_RGA_INIT)();
typedef void (*FUNC_RGA_DEINIT)();
typedef int (*FUNC_RGA_BLIT)(rga_info_t *, rga_info_t *, rga_info_t *);typedef struct _rga_context {void *rga_handle;FUNC_RGA_INIT init_func;FUNC_RGA_DEINIT deinit_func;FUNC_RGA_BLIT blit_func;
} rga_context;int RGA_init(rga_context *rga_ctx);void img_resize_fast(rga_context *rga_ctx, int src_fd, int src_w, int src_h,uint64_t dst_phys, int dst_w, int dst_h);void img_resize_slow(rga_context *rga_ctx, void *src_virt, int src_w, int src_h,void *dst_virt, int dst_w, int dst_h);int RGA_deinit(rga_context *rga_ctx);#ifdef __cplusplus
}
#endif
#endif /*__RGA_FUNC_H__*/
rga_func.c
#include "rga_func.h"//根据librga库实际路径进行修改
#define LIBRGAFILE "../lib/librga.so"
int RGA_init(rga_context *rga_ctx) {rga_ctx->rga_handle = dlopen(LIBRGAFILE, RTLD_LAZY);if (!rga_ctx->rga_handle) {printf("dlopen %s failed\n",LIBRGAFILE);return -1;}rga_ctx->init_func = (FUNC_RGA_INIT)dlsym(rga_ctx->rga_handle, "c_RkRgaInit");rga_ctx->deinit_func =(FUNC_RGA_DEINIT)dlsym(rga_ctx->rga_handle, "c_RkRgaDeInit");rga_ctx->blit_func = (FUNC_RGA_BLIT)dlsym(rga_ctx->rga_handle, "c_RkRgaBlit");rga_ctx->init_func();return 0;
}void img_resize_fast(rga_context *rga_ctx, int src_fd, int src_w, int src_h,uint64_t dst_phys, int dst_w, int dst_h) {// printf("rga use fd, src(%dx%d) -> dst(%dx%d)\n", src_w, src_h, dst_w,// dst_h);if (rga_ctx->rga_handle) {int ret = 0;rga_info_t src, dst;memset(&src, 0, sizeof(rga_info_t));src.fd = src_fd;src.mmuFlag = 1;// src.virAddr = (void *)psrc;memset(&dst, 0, sizeof(rga_info_t));dst.fd = -1;dst.mmuFlag = 0;#if defined(__arm__)dst.phyAddr = (void *)((uint32_t)dst_phys);
#elsedst.phyAddr = (void *)dst_phys;
#endifdst.nn.nn_flag = 0;rga_set_rect(&src.rect, 0, 0, src_w, src_h, src_w, src_h,RK_FORMAT_RGB_888);rga_set_rect(&dst.rect, 0, 0, dst_w, dst_h, dst_w, dst_h,RK_FORMAT_RGB_888);ret = rga_ctx->blit_func(&src, &dst, NULL);if (ret) {printf("c_RkRgaBlit error : %s\n", strerror(errno));}return;}return;
}void img_resize_slow(rga_context *rga_ctx, void *src_virt, int src_w, int src_h,void *dst_virt, int dst_w, int dst_h) {// printf("rga use virtual, src(%dx%d) -> dst(%dx%d)\n", src_w, src_h, dst_w,// dst_h);if (rga_ctx->rga_handle) {int ret = 0;rga_info_t src, dst;memset(&src, 0, sizeof(rga_info_t));src.fd = -1;src.mmuFlag = 1;src.virAddr = (void *)src_virt;memset(&dst, 0, sizeof(rga_info_t));dst.fd = -1;dst.mmuFlag = 1;dst.virAddr = dst_virt;dst.nn.nn_flag = 0;rga_set_rect(&src.rect, 0, 0, src_w, src_h, src_w, src_h,RK_FORMAT_RGB_888);rga_set_rect(&dst.rect, 0, 0, dst_w, dst_h, dst_w, dst_h,RK_FORMAT_RGB_888);ret = rga_ctx->blit_func(&src, &dst, NULL);if (ret) {printf("c_RkRgaBlit error : %s\n", strerror(errno));}return;}return;
}int RGA_deinit(rga_context *rga_ctx) {if (rga_ctx->rga_handle) {dlclose(rga_ctx->rga_handle);rga_ctx->rga_handle = NULL;}
}
3. DRM部分代码(使用瑞芯微官方源码)
drm_func.h
#ifndef __DRM_FUNC_H__
#define __DRM_FUNC_H__
#include <errno.h>
#include <linux/input.h>
#include <stdint.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <sys/fcntl.h> // open function
#include <sys/mman.h>
#include <unistd.h> // close function#include "libdrm/drm_fourcc.h"
#include "xf86drm.h"#ifdef __cplusplus
extern "C" {
#endiftypedef int (*FUNC_DRM_IOCTL)(int fd, unsigned long request, void *arg);typedef struct _drm_context {void *drm_handle;FUNC_DRM_IOCTL io_func;
} drm_context;int drm_init(drm_context *drm_ctx);void *drm_buf_alloc(drm_context *drm_ctx, int drm_fd, int TexWidth,int TexHeight, int bpp, int *fd, unsigned int *handle,size_t *actual_size);int drm_buf_destroy(drm_context *drm_ctx, int drm_fd, int buf_fd, int handle,void *drm_buf, size_t size);void drm_deinit(drm_context *drm_ctx, int drm_fd);#ifdef __cplusplus
}
#endif
#endif /*__DRM_FUNC_H__*/
rga_func.cpp
#include "drm_func.h"#include <dlfcn.h>//根据libdrm库实际路径进行修改
#define LIBDRMFILE "../lib/libdrm.so"int drm_init(drm_context *drm_ctx) {static const char *card = "/dev/dri/card0";int flag = O_RDWR;int drm_fd = -1;drm_fd = open(card, flag);if (drm_fd < 0) {printf("failed to open %s\n", card);return -1;}drm_ctx->drm_handle = dlopen(LIBDRMFILE, RTLD_LAZY);if (!drm_ctx->drm_handle) {printf("failed to dlopen %s \n",LIBDRMFILE);drm_deinit(drm_ctx, drm_fd);return -1;}drm_ctx->io_func = (FUNC_DRM_IOCTL)dlsym(drm_ctx->drm_handle, "drmIoctl");if (drm_ctx->io_func == NULL) {dlclose(drm_ctx->drm_handle);drm_ctx->drm_handle = NULL;drm_deinit(drm_ctx, drm_fd);printf("failed to dlsym drmIoctl\n");return -1;}return drm_fd;
}void drm_deinit(drm_context *drm_ctx, int drm_fd) {if (drm_ctx->drm_handle) {dlclose(drm_ctx->drm_handle);drm_ctx->drm_handle = NULL;}if (drm_fd > 0) {close(drm_fd);}
}void *drm_buf_alloc(drm_context *drm_ctx, int drm_fd, int TexWidth,int TexHeight, int bpp, int *fd, unsigned int *handle,size_t *actual_size) {int ret;if (drm_ctx == NULL) {printf("drm context is unvalid\n");return NULL;}char *map = NULL;void *vir_addr = NULL;struct drm_prime_handle fd_args;struct drm_mode_map_dumb mmap_arg;struct drm_mode_destroy_dumb destory_arg;struct drm_mode_create_dumb alloc_arg;memset(&alloc_arg, 0, sizeof(alloc_arg));alloc_arg.bpp = bpp;alloc_arg.width = TexWidth;alloc_arg.height = TexHeight;// alloc_arg.flags = ROCKCHIP_BO_CONTIG;//获取handle和sizeret = drm_ctx->io_func(drm_fd, DRM_IOCTL_MODE_CREATE_DUMB, &alloc_arg);if (ret) {printf("failed to create dumb buffer: %s\n", strerror(errno));return NULL;}if (handle != NULL) {*handle = alloc_arg.handle;}if (actual_size != NULL) {*actual_size = alloc_arg.size;}// printf("create width=%u, height=%u, bpp=%u, size=%lu dumb// buffer\n",alloc_arg.width,alloc_arg.height,alloc_arg.bpp,alloc_arg.size);// printf("out handle= %d\n",alloc_arg.handle);//获取fdmemset(&fd_args, 0, sizeof(fd_args));fd_args.fd = -1;fd_args.handle = alloc_arg.handle;;fd_args.flags = 0;ret = drm_ctx->io_func(drm_fd, DRM_IOCTL_PRIME_HANDLE_TO_FD, &fd_args);if (ret) {printf("rk-debug handle_to_fd failed ret=%d,err=%s, handle=%x \n", ret,strerror(errno), fd_args.handle);return NULL;}// printf("out fd = %d, drm fd: %d\n",fd_args.fd,drm_fd);if (fd != NULL) {*fd = fd_args.fd;}//获取虚拟地址memset(&mmap_arg, 0, sizeof(mmap_arg));mmap_arg.handle = alloc_arg.handle;ret = drm_ctx->io_func(drm_fd, DRM_IOCTL_MODE_MAP_DUMB, &mmap_arg);if (ret) {printf("failed to create map dumb: %s\n", strerror(errno));vir_addr = NULL;goto destory_dumb;}vir_addr = map = mmap(0, alloc_arg.size, PROT_READ | PROT_WRITE, MAP_SHARED,drm_fd, mmap_arg.offset);if (map == MAP_FAILED) {printf("failed to mmap buffer: %s\n", strerror(errno));vir_addr = NULL;goto destory_dumb;}// printf("alloc map=%x \n",map);return vir_addr;
destory_dumb:memset(&destory_arg, 0, sizeof(destory_arg));destory_arg.handle = alloc_arg.handle;ret = drm_ctx->io_func(drm_fd, DRM_IOCTL_MODE_DESTROY_DUMB, &destory_arg);if (ret) printf("failed to destory dumb %d\n", ret);return vir_addr;
}int drm_buf_destroy(drm_context *drm_ctx, int drm_fd, int buf_fd, int handle,void *drm_buf, size_t size) {int ret = -1;if (drm_buf == NULL) {printf("drm buffer is NULL\n");return -1;}munmap(drm_buf, size);struct drm_mode_destroy_dumb destory_arg;memset(&destory_arg, 0, sizeof(destory_arg));destory_arg.handle = handle;ret = drm_ctx->io_func(drm_fd, DRM_IOCTL_MODE_DESTROY_DUMB, &destory_arg);if (ret)printf("failed to destory dumb %d, error=%s\n", ret, strerror(errno));if (buf_fd > 0) {close(buf_fd);}return ret;
}
4. 封装接口代码
image_util.h
#ifndef _IMAGE_UTIL_H
#define _IMAGE_UTIL_H#include "drm_func.h"
#include "opencv2/opencv.hpp"
#include "rga_func.h"class ImageUtil {private:ImageUtil() { init(); };virtual ~ImageUtil() { release(); };ImageUtil(const ImageUtil &) = delete;ImageUtil(ImageUtil &&) = delete;ImageUtil &operator=(const ImageUtil &) = delete;ImageUtil &operator=(ImageUtil &&) = delete;void *drm_buf = NULL;int drm_fd = -1;int buf_fd = -1; // converted from buffer handleunsigned int handle;size_t actual_size = 0;rga_context rga_ctx;drm_context drm_ctx;void init(void);void release(void);public:static ImageUtil &getInstance();void resize(const cv::Mat &src, const cv::Size &size, void *dstPtr);
};#endif
image_util.cpp
#include "image_util.h"void ImageUtil::init(void) {memset(&rga_ctx, 0, sizeof(rga_context));memset(&drm_ctx, 0, sizeof(drm_context));drm_fd = drm_init(&drm_ctx);drm_buf = drm_buf_alloc(&drm_ctx, drm_fd, 1920, 1080, 24, &buf_fd, &handle,&actual_size);RGA_init(&rga_ctx);
}
void ImageUtil::release(void) {drm_buf_destroy(&drm_ctx, drm_fd, buf_fd, handle, drm_buf, actual_size);drm_deinit(&drm_ctx, drm_fd);RGA_deinit(&rga_ctx);
}void ImageUtil::resize(const cv::Mat& src, const cv::Size& size, void* dstPtr) {if (src.empty()) {printf("src is empty!\n");return;}int img_width = src.cols;int img_height = src.rows;memcpy(drm_buf, src.data, img_width * img_height * 3);img_resize_slow(&rga_ctx, drm_buf, img_width, img_height, dstPtr, size.width,size.height);
}ImageUtil& ImageUtil::getInstance() {static ImageUtil util;return util;
}
至此,前置准备工作基本已准备就绪,下一步将对大家感兴趣的各类代码,进行移植与测试,欢迎各位粉丝们的积极留言与探讨。
这篇关于[RV1109/RV1126系列]-4.RGA、DRM对图像Resize加速的文章就介绍到这儿,希望我们推荐的文章对编程师们有所帮助!