本文主要是介绍cuda编程---cuda硬件信息与错误处置,希望对大家解决编程问题提供一定的参考价值,需要的开发者们随着小编来一起学习吧!
一、硬件信息查询:
#include <stdio.h>int main() {int nDevices;cudaGetDeviceCount(&nDevices);for (int i=0; i < nDevices; i++) {cudaDeviceProp prop;cudaGetDeviceProperties(&prop, i);printf("Device Number: %d\n", i);printf(" Device name: %s\n", prop.name);printf(" Memory Clock Rate (KHz): %d\n",prop.memoryClockRate);printf(" Memory Bus Width (bits): %d\n",prop.memoryBusWidth);printf(" Peak Memory Bandwidth (GB/s): %f\n\n",2.0*prop.memoryClockRate*(prop.memoryBusWidth/8)/1.0e6);}return 0;
}
二、错误处置:
1、代码段一:
#include <stdio.h>int main() {int nDevices;cudaError_t err = cudaGetDeviceCount(&nDevices);if (err != cudaSuccess) printf("%s\n", cudaGetErrorString(err));for (int i=0; i < nDevices; i++) {cudaDeviceProp prop;cudaGetDeviceProperties(&prop, i);printf("Device Number: %d\n", i);printf(" Device name: %s\n", prop.name);printf(" Memory Clock Rate (KHz): %d\n",prop.memoryClockRate);printf(" Memory Bus Width (bits): %d\n",prop.memoryBusWidth);printf(" Peak Memory Bandwidth (GB/s): %f\n\n",2.0*prop.memoryClockRate*(prop.memoryBusWidth/8)/1.0e6);}return 0;
}
这段代码在下面处有改变:
cudaError_t err = cudaGetDeviceCount(&nDevices);
if (err != cudaSuccess)
printf("%s\n", cudaGetErrorString(err));
2、代码段二:
#include <iostream>
#include <math.h>
#include <stdio.h>__global__
void saxpy(int n,float a,float *x,float *y)
{int i = blockIdx.x*blockDim.x +threadIdx.x;if (i < n) y[i] = a*x[i] + y[i];
}int main(void){int N = 1 << 20; //1M element.//float *x=new float[N];//float *y=new float[N];//Allocate Unified Memory -- accessible from CPU or GPUfloat *x, *y, *d_x, *d_y;x = (float*)malloc(N*sizeof(float));y = (float*)malloc(N*sizeof(float));cudaMalloc(&d_x, N*sizeof(float));cudaMalloc(&d_y, N*sizeof(float));//initialize x and y arrays on the host.for (int i=0;i<N;i++){x[i]=1.0f;y[i]=2.0f;}cudaEvent_t start, stop;cudaEventCreate(&start);cudaEventCreate(&stop);cudaMemcpy(d_x, x, N*sizeof(float), cudaMemcpyHostToDevice);cudaMemcpy(d_y, y, N*sizeof(float), cudaMemcpyHostToDevice);cudaEventRecord(start);saxpy<<< (N+255)/256, 256>>>(N, 2.0, d_x, d_y);cudaError_t errSync = cudaGetLastError();cudaError_t errAsync = cudaDeviceSynchronize();if (errSync != cudaSuccess)printf("Sync kernel error: %s\n", cudaGetErrorString(errSync));if (errAsync != cudaSuccess)printf("Async kernel error: %s\n",cudaGetErrorString(errAsync));cudaEventRecord(stop);cudaMemcpy(y, d_y, N*sizeof(float), cudaMemcpyDeviceToHost);cudaEventSynchronize(stop);float milliseconds = 0;cudaEventElapsedTime(&milliseconds, start, stop);// Check for errors (all values should be 3.0f)float maxError=0.0f;for (int i=0;i<N;i++)maxError=max(maxError,fabs(y[i]-4.0f));printf("Max error: %f . \n", maxError);printf("Effective Bandwidth (GB/s): %f .\n", N*4*3/milliseconds/1e6);cudaFree(x);cudaFree(y);cudaFree(d_x);cudaFree(d_y);cudaEventDestroy(start);cudaEventDestroy(stop);return 0;
}
这篇关于cuda编程---cuda硬件信息与错误处置的文章就介绍到这儿,希望我们推荐的文章对编程师们有所帮助!