dx11硬件解码传递给opencl并行处理

本文主要是介绍dx11硬件解码传递给opencl并行处理，希望对大家解决编程问题提供一定的参考价值，需要的开发者们随着小编来一起学习吧！

directx11 解码

使用ffmpeg进行directx11 解码
将解码后的NV12格式数据从D3D11 Texture中通过OpenCL处理需要经过几个步骤：首先，确保D3D11 Texture正确设置并与OpenCL上下文关联；然后，将NV12数据分两个步骤处理（Y平面和UV平面）；最后，在OpenCL内核中处理这些数据。

将d3d11 texture转换成opencl对象

opencl本身是可以和dx9 dx11 等进行互操作，就是利用这一点来使用opencl来并行处理数据

bool CreateSharedSurface(void* data,  int nView, bool bIsReadOnly,cl_mem* y, cl_mem* uv)
{mfxHDLPair mid_pair = { 0 };mfxStatus sts = m_pAlloc->GetHDL(m_pAlloc->pthis, mid, reinterpret_cast<mfxHDL*>(&mid_pair));if (sts) return 0;ID3D11Texture2D *surf = (ID3D11Texture2D*)data;cl_int error = CL_SUCCESS;cl_mem memY = clCreateFromD3D11Texture2DKHR(m_clcontext, bIsReadOnly ? CL_MEM_READ_ONLY : CL_MEM_READ_WRITE,data, 0, &error);if (error) {log.error() << "clCreateFromD3D11Texture2DKHR failed. Error code: " << error << endl;return -1;}cl_mem memUV = clCreateFromD3D11Texture2DKHR(m_clcontext, bIsReadOnly ? CL_MEM_READ_ONLY : CL_MEM_READ_WRITE,data, 1, &error);if (error) {log.error() << "clCreateFromD3D11Texture2DKHR failed. Error code: " << error << endl;return -1;}*y = memY;*uv = memUV;return 0;
}

以上是转换过程

编写OpenCL内核处理NV12数据

和cuda一样，需要写核函数来处理

__kernel void processNV12(__read_only image2d_t yImage, __read_only image2d_t uvImage, __write_only image2d_t outputImage) {// 这里只是一个示例，实际处理逻辑根据需求编写int2 pos = (int2)(get_global_id(0), get_global_id(1));uchar4 yPixel = read_imageui(yImage, pos); // 读取Y平面像素uchar2 uvPixel = read_imageui(uvImage, pos / 2); // UV平面是Y的一半大小，因此坐标除以2// 这里简单地将YUV转换为RGB，实际应用中会有更复杂的处理float y = yPixel.x;float u = uvPixel.x - 128.0f;float v = uvPixel.y - 128.0f;float r = y + 1.14f * v;float g = y - 0.395f * u - 0.581f * v;float b = y + 2.032f * u;uchar4 outPixel = (uchar4)(saturate(r), saturate(g), saturate(b), 255);write_imageui(outputImage, pos, outPixel);
}

cpp执行OpenCL内核

// 假设outputImage是目标OpenCL图像对象，已根据需要创建
size_t globalWorkSize[2] = { width, height }; // width和height分别为处理图像的宽度和高度
cl_kernel kernel = clCreateKernel(program, "processNV12", &err); // program为编译好的OpenCL程序
clSetKernelArg(kernel, 0, sizeof(cl_mem), &clYImage);
clSetKernelArg(kernel, 1, sizeof(cl_mem), &clUVImage);
clSetKernelArg(kernel, 2, sizeof(cl_mem), &outputImage);err = clEnqueueNDRangeKernel(commandQueue, kernel, 2, nullptr, globalWorkSize, nullptr, 0, nullptr, nullptr);
if (err != CL_SUCCESS) { /* 处理错误 */ }// 同步、读回数据、清理等操作

初始化过程

void initializeContextFromD3D11Device(ID3D11Device* pD3D11Device)
{
CV_UNUSED(pD3D11Device);
cl_uint numPlatforms;
cl_int status = clGetPlatformIDs(0, NULL, &numPlatforms);
if (status != CL_SUCCESS)
CV_Error(cv::Error::OpenCLInitError, "OpenCL: Can't get number of platforms");
if (numPlatforms == 0)
CV_Error(cv::Error::OpenCLInitError, "OpenCL: No available platforms");std::vector<cl_platform_id> platforms(numPlatforms);
status = clGetPlatformIDs(numPlatforms, &platforms[0], NULL);
if (status != CL_SUCCESS)CV_Error(cv::Error::OpenCLInitError, "OpenCL: Can't get number of platforms");// TODO Filter platforms by name from OPENCV_OPENCL_DEVICEint found = -1;
cl_device_id device = NULL;
cl_uint numDevices = 0;
cl_context context = NULL;// try with CL_PREFERRED_DEVICES_FOR_D3D11_NV
for (int i = 0; i < (int)numPlatforms; i++)
{clGetDeviceIDsFromD3D11NV_fn clGetDeviceIDsFromD3D11NV = (clGetDeviceIDsFromD3D11NV_fn)clGetExtensionFunctionAddressForPlatform(platforms[i], "clGetDeviceIDsFromD3D11NV");if (!clGetDeviceIDsFromD3D11NV)continue;device = NULL;numDevices = 0;status = clGetDeviceIDsFromD3D11NV(platforms[i], CL_D3D11_DEVICE_NV, pD3D11Device,CL_PREFERRED_DEVICES_FOR_D3D11_NV, 1, &device, &numDevices);//CL_PREFERRED_DEVICES_FOR_D3D11_NVif (status != CL_SUCCESS)continue;if (numDevices > 0){cl_context_properties properties[] ={CL_CONTEXT_D3D11_DEVICE_NV, (cl_context_properties)pD3D11Device,CL_CONTEXT_PLATFORM, (cl_context_properties)platforms[i],//CL_CONTEXT_INTEROP_USER_SYNC, CL_FALSE,0};context = clCreateContext(properties, 1, &device, NULL, NULL, &status);if (status != CL_SUCCESS){clReleaseDevice(device);}else{found = i;break;}}
}
if (found < 0)
{// try with CL_ALL_DEVICES_FOR_D3D11_NVfor (int i = 0; i < (int)numPlatforms; i++){clGetDeviceIDsFromD3D11NV_fn clGetDeviceIDsFromD3D11NV = (clGetDeviceIDsFromD3D11NV_fn)clGetExtensionFunctionAddressForPlatform(platforms[i], "clGetDeviceIDsFromD3D11NV");if (!clGetDeviceIDsFromD3D11NV)continue;device = NULL;numDevices = 0;status = clGetDeviceIDsFromD3D11NV(platforms[i], CL_D3D11_DEVICE_NV, pD3D11Device,CL_ALL_DEVICES_FOR_D3D11_NV, 1, &device, &numDevices);//CL_ALL_DEVICES_FOR_D3D11_NVif (status != CL_SUCCESS)continue;if (numDevices > 0){cl_context_properties properties[] ={CL_CONTEXT_D3D11_DEVICE_NV, (cl_context_properties)pD3D11Device,CL_CONTEXT_PLATFORM, (cl_context_properties)platforms[i],//CL_CONTEXT_INTEROP_USER_SYNC, CL_FALSE,0};context = clCreateContext(properties, 1, &device, NULL, NULL, &status);if (status != CL_SUCCESS){clReleaseDevice(device);}else{found = i;break;}}}if (found < 0)CV_Error(cv::Error::OpenCLInitError, "OpenCL: Can't create context for DirectX interop");
}//cv::ocl::Context& ctx = cv::ocl::Context::getDefault(false);
//cv::ocl::initializeContextFromHandle(ctx, platforms[found], context, device);
///As you say,`initializeContextFromHandle()` symbol is not in the exported list. So I use attachContext() instead for test.
cv::ocl::attachContext("NVIDIA CUDA", platforms[found], context, device);//Test passed//return ctx;}