CUDA errors: identifier "(global/device)" is undefined, no suitable conversion, _问答_开发者

Im doing a project on Photon mapping. I coded raytracer part and it ran successfully on CPU. Now im doing the same on GPU(through ssh).

im getting the following errors

    nvcc -c -lSDL -lGL -lGLU AntTweakBar.a gpuRayTracer.cu
gpuRayTracer.cu(44): error: identifier "raytracer" is undefined

gpuRayTracer.cu(53): error: no suitable conversion function from

"Float3" to "void *" exists

gpuRayTracer.cu(55): error: no suitable conversion function from

"Float3" to "void *" exists

gpuRayTracer.cu(76): error: identifier "GPUsub" is undefined

gpuRayTracer.cu(77): error: identifier "GPUnormalize" is undefined

gpuRayTracer.cu(78): error: identifier "GPUcross" is undefined

gpuRayTracer.cu(80): error: calling a host function from a

device/_global_ function is not allowed

gpuRayTracer.cu(90): error: identifier "GPUmul" is undefined

gpuRayTracer.cu(95): error: calling a host function from a

device/_global_ function is not allowed

gpuRayTracer.cu(95): error: identifier "GPUadd" is undefined

gpuRayTracer.cu(192): error: calling a host function from a

device/_global_ function is not allowed

15 errors detected in the compilation of

"/tmp/tmpxft_0000432c_00000000-4_gpuRayTracer.cpp1.ii" . make: * [gpuRayTracer.o] Error 2

gpuRayTracer.cu line 44,53, 55(errors) are marked in the below code

Float3 used below is a structure containing 3 float variables(x,y,z coordinates)

void Scene::GPUrayTracer(){

Object *d_objectList[OBJ_MAX];
GLubyte         * d_pixels;
int *d_Width, *d_Height;
Float3 *d_eye,*d_lookAt;
int *d_objectCount;
size_t size1=sizeof(Float3);
size_t size2=sizeof(int);
size_t size3=sizeof(GLubyte);
//size_t size4=sizeof(Object);

cudaMalloc(&d_eye,size1);
cudaMalloc(&d_lookAt,size1);
cudaMemcpy(d_eye,&this->eye,size1,cudaMemcpyHostToDevice);

cudaMemcpy(d_lookAt,&this->lookAt,size1,cudaMemcpyHostToDevice);


cudaMalloc(&d_objectCount,size2);
cudaMemcpy(d_objectCount,&this->objectCount,size2,cudaMemcpyHostToDevice);



cudaMalloc(&d_Width,size2);
cudaMalloc(&d_Height,size2);
cudaMemcpy(d_Width,&this->screenWidth,size2,cudaMemcpyHostToDevice);

cudaMemcpy(d_Height,&this->screenHeight,size2,cudaMemcpyHostToDevice);


cudaMalloc(&d_pixels,size3);
cudaMemcpy(d_pixels,&this->pixels,size3,cudaMemcpyHostToDevice);


cudaMalloc((void **)&d_objectList,
(sizeof(this->objectList)));
cudaMemcpy(d_objectList,
&this->objectList,
sizeof(this->objectList),cudaMemcpyHostToDevice);

line 44:raytracer<<<1,500>>>(d_pixels,d_Width,d_Height,
d_objectList,d_eye,d_lookAt);

cudaMemcpy((this->objectList),&d_objectList,sizeof(this-
>objectList),cudaMemcpyDeviceToHost);
cudaMemcpy(this->pixels,&d_pixels,size3,cudaMemcpyDeviceToHost);


cudaMemcpy((int *)this->screenWidth,&d_Width,size2,cudaMemcpyDeviceToHost);

cudaMemcpy((int *)this->screenHeight,&d_Height,size2,cudaMemcpyDeviceToHost);

cudaMemcpy((int *)this->objectCount,&d_objectCount,size2,cudaMemcpyDeviceToHost);

cudaMemcpy(


line:53   (void *)this->eye,
(void *)&d_eye,sizeof(d_eye),cudaMemcpyDeviceToHost);
line:55  cudaMemcpy(this->lookAt,(void *)&d_lookAt,sizeof(d_lookAt),c开发者_运维知识库udaMemcpyDeviceToHost);


}



__global__ void raytracer( unsigned char *out_data,const int screenWidth,const int screenHeight,Object * objectList,Float3 eye,Float3 lookAt,int objectCount)
{
int x = blockDim.x * BLOCK_SIZE + threadIdx.x;
        int y = blockDim.y * BLOCK_SIZE + threadIdx.y;

        [b]//code goes here[/b]
}

__device__ float GPUffminf(float a, float b){



if(a<b)
        return a;

return b;


}



__device__ float GPUffmaxf(float a, float b){


        if(a>b)

        return a;

return b;

}





__device__ float GPUmag(Float3 a){

float res;

res=a.x*a.x+a.y*a.y+a.z*a.z;

res=sqrt(res);

return res;

}



__device__ Float3 GPUnormalize(Float3 a){

Float3 res;

float magn=mag(a);

if(magn!=0){

magn=(float)1.0/magn;

res.x=a.x*magn;

res.y=a.y*magn;

res.z=a.z*magn;

return res;

}

return a;



}

__device__ Float3 GPUcross(Float3 a ,Float3 b){

Float3 res;

res.x=a.y*b.z-a.z*b.y;

res.y=a.z*b.x-a.x*b.z;

res.z=a.x*b.y-a.y*b.x;

return res;

}

__device__  float GPUdot(Float3 a,Float3 b){

return (float)(a.x*b.x + a.y*b.y + a.z*b.z);

}



__device__  Float3 GPUsub(Float3 a,Float3 b){

Float3 res;

res.x=a.x-b.x;

res.y=a.y-b.y;

res.z=a.z-b.z;

return res;

}

__device__ Float3 GPUadd(Float3 a,Float3 b){

Float3 res;

res.x=a.x+b.x;

res.y=a.y+b.y;

res.z=a.z+b.z;

return res;

}





__device__ Float3 GPUmul(Float3 a,float b){

Float3 res;

res.x=a.x*b;

res.y=a.y*b;

res.z=a.z*b;

return res;

}

wats wrong in the code??

apart from this i have few questions

*The order in which .cu/.cpp files are compiled..is it matter?? *Should the kernel be invoked only from main.cpp?? *If so, should a .cu file consists of only global/device functions ??

Okay first of all, you can put any C/C++ function in .cu files other than global/device functions. Neither does the order of compilation matter.

For this error: no suitable conversion function from "Float3" to "void *" exists

you need to do (void**)

instead of

(void*)

For errors like these: gpuRayTracer.cu(76): error: identifier

"GPUsub" is undefined

you need to define GPUsub function before the functions that calls it in the .cu file. Just move the function definition on top of the file.

For errors like this: calling a host function from a device/global function is not allowed

okay, you can't call any function that executes on CPU (doesn't have a device or global identifier in it) from a device or global function.

Here's what you need to do to make life easy.

Define each function in a separate .cu file and use header file for their decelerations. You should have one HOST function that executes all the pipeline, it can call gpu as well as cpu functions.