To get up to speed quickly I am reading through "CUDA Application Design and Development". One thing that is not covered in the first chapter that I am very interested in is how to time the execution of a kernel. I did find some info on stack overflow about the cudaEvent object. Here is a simple example:
#include <iostream>
using namespace std;
#include <thrust/reduce.h>
#include <thrust/sequence.h>
#include <thrust/device_vector.h>
#include <thrust host_vector.h>
int main(){
const int N=50000;
float elapsedTime=0.0;
cudaEvent_t start, stop;
cudaEventCreate(&start);
cudaEventCreate(&stop);
cudaEventRecord(start, 0) ;//start event timer running
thrust::device_vector<int> a(N);
thrust::sequence(a.begin(), a.end(), 0);
int sumA=thrust::reduce(a.begin(), a.end(), 0);
cudaEventRecord(stop, 0);
cudaEventSynchronize(stop);//remember kernels run asynchronously
cudaEventElapsedTime(&elapsedTime, start, stop);
int sumCheck=0;
for (int i=0; i<N; i++)sumCheck+=i;
cudaEventElapsedTime(&elapseTime, start, stop);
if (sumCheck==sumA)cout<<"Test Succeeded in "<<elapseTime<<" milliseconds!"<<endl;
else cout<<"Test FAILED"<<endl;
cudaEventDestroy(start);
cudaEventDestroy(stop);
return(0);
}
No comments:
Post a Comment