Numerical Error in simple CUDA code_问答_开发者

开发者 https://www.devze.com 2023-01-14 06:08 出处：网络

I just started experimenting cuda with the following cude #include \"macro.hpp\" #include <algorithm>

相关专题：c gpu numerical

I just started experimenting cuda with the following cude

#include "macro.hpp"
#include <algorithm>
#include <iostream>
#include <cstdlib>

//#define double float
//#define double int

int RandomNumber(){return static_cast<double>(rand() % 1000);}

__global__ void sum3(double const* a,
             double const* b,
             double const* c,
             double * resu开发者_JS百科lt, 
             unsigned const* n)
{    
   unsigned i = blockIdx.x;
   while(i < (*n))
{
  result[i] = (a[i] + b[i] + c[i]);
}
};


int main()
{

  static unsigned size = 1e2;
  srand(0);
  double* a = new double[size];
  double* b = new double[size];
  double* c = new double[size];
  double* result = new double[size];

  std::generate(a, a+size, RandomNumber);
  std::generate(b, b+size, RandomNumber);
  std::generate(c, c+size, RandomNumber);

  double* ad, *bd,* cd;
  double* resultd;

  unsigned * sized;
  std::cout << cudaMalloc((void**) &ad, size*sizeof(double)) << std::endl;
  std::cout << cudaMalloc((void**) &bd, size*sizeof(double)) << std::endl;
  std::cout << cudaMalloc((void**) &cd, size*sizeof(double)) << std::endl;
  std::cout << cudaMalloc((void**) &resultd, size*sizeof(double)) << std::endl;
  std::cout << cudaMalloc((void**) &sized, sizeof(unsigned)) << std::endl;

  cudaMemcpy((void**) &sized, &size, sizeof(unsigned), cudaMemcpyHostToDevice);

  //  print_array(a, size);
  for(int i = 0; i < 1000; ++i)
    {
      cudaMemcpy(ad, a, size*sizeof(double), cudaMemcpyHostToDevice);
      cudaMemcpy(bd, b, size*sizeof(double), cudaMemcpyHostToDevice);
      cudaMemcpy(cd, c, size*sizeof(double), cudaMemcpyHostToDevice);      
      sum3<<<size, 1>>>(ad, bd, cd, resultd, sized);
      cudaMemcpy(result, resultd, size*sizeof(double), cudaMemcpyDeviceToHost);
    }

#ifdef PRINT
  for( int i = 0; i < size; ++i)
    {
      std::cout << a[i] << ", "<< b[i] <<"," << c[i] << "," << result[i]<< std::endl;
    }
#endif

  cudaFree(ad);
  cudaFree(bd);
  cudaFree(cd);
  cudaFree(resultd);

  delete[] a;
  delete[] b;
  delete[] c;
  delete[] result;

  return 0;
}

Compile this on mac book pro without any problem. However when I try to run this I get

930, 22,538,899
691, 832,205,23
415, 655,148,120
872, 876,481,985
761, 909,583,619
841, 104,466,917
610, 635,911,52
//more useless numbers

I have compared my samples with the one in Cuda By Example and I dont see any major difference except type. Any pointer on this problem is appreciated.

while(i < (*n))
{
  result[i] = (a[i] + b[i] + c[i]);
}

is wrong (infinite)

this is wrong

cudaMemcpy((void**) &sized, &size, sizeof(unsigned), cudaMemcpyHostToDevice);

&sized is address of pointer variable, not pointer value

Single number can be passed to device on the stack, so use

unsigned size

check return status of your cuda functions, http://www.drdobbs.com/high-performance-computing/207603131

you wrote:

double* a = new double[size];

so, "a" is a pointer to an array of doubles, then you say

  std::generate(a, a+size, RandomNumber);
  std::generate(b, b+size, RandomNumber);
  std::generate(c, c+size, RandomNumber);

which is wrong, you should say

  std::generate(*a, (*a)+size, RandomNumber);
  std::generate(*b, (*b)+size, RandomNumber);
  std::generate(*c, (*c)+size, RandomNumber);

Would be easier to help you if you state what do you want your program to do.

Also, you put

 unsigned * sized;
 std::cout << cudaMalloc((void**) &ad, size*sizeof(double)) << std::endl;

but you could do

 unsigned * sized;
 std::cout << cudaMalloc((void*) ad, size*sizeof(double)) << std::endl;

depending on what you are trying to do.

Numerical Error in simple CUDA code

精彩评论

关注公众号

热门标签

图文推荐

Numerical Error in simple CUDA code

更多 问答 相关资讯：

精彩评论

关注公众号

热门标签

图文推荐

更多问答相关资讯：