Following the Programming Giude of CUDA 4.0, I call cudaGLSetGLDevice before any other runtime calls. But the next cuda call, cudaMalloc, return "all CUDA-capable devices are busy or unavailable."
Also, in the NVIDIA forum (http://forums.nvidia.com/index.php?showtopic=186399) an user said that: "In multi-GPU systems though you're going to encounter even larger flaws in CUDA... a) You can't do CUDA/GL interop when the CUDA context and the OpenGL context are on different devices (undocumented, and unsupported in my experience) b) You can't do GL device affinity on non-windows machines. c) You can't do GL device affinity on consumer devices (Quadro/Tesla only)"
Is this true? My final work must run on a linux multi-gpu system. I开发者_如何转开发 have to change the graphic library to use? And in this case, what are you suggestions?
OS: Opensuse 11.4 64 bit
Graphic Card: GeForce 9600M GT
DRIVER: 275.21
See Cuda and OpenGL Interop
I had to replace a simple cudaMalloc() by a burden of gl* things.
Nevertheless, it works pretty well.
// The lattice as a GL Buffer
GLuint gridVBO = 0;
struct cudaGraphicsResource *gridVBO_CUDA = NULL;
// Ask for GL memory buffers
glGenBuffers(1, &gridVBO);
glBindBuffer(GL_ARRAY_BUFFER, gridVBO);
const size_t size = L * L * sizeof(unsigned char);
glBufferData(GL_ARRAY_BUFFER, size, NULL, GL_DYNAMIC_DRAW);
glBindBuffer(GL_ARRAY_BUFFER, gridVBO);
glBindBuffer(GL_ARRAY_BUFFER, 0);
cutilSafeCall(cudaGraphicsGLRegisterBuffer(&gridVBO_CUDA, gridVBO, cudaGraphicsMapFlagsWriteDiscard));
// Map the GL buffer to a device pointer
unsigned char *grid = NULL;
cutilSafeCall(cudaGraphicsMapResources(1, &gridVBO_CUDA, 0));
size_t num_bytes = 0;
cutilSafeCall(cudaGraphicsResourceGetMappedPointer((void **) &grid,
&num_bytes, gridVBO_CUDA));
// Execution configuration
dim3 dimBlock(TILE_X, TILE_Y);
dim3 dimGrid(L/TILE_X, L/TILE_Y);
// Kernel call
kernel<<<dimGrid, dimBlock>>>(grid);
cutilCheckMsg("Kernel launch failed");
// Unmap buffer object
cutilSafeCall(cudaGraphicsUnmapResources(1, &gridVBO_CUDA, 0));
精彩评论