开发者

opencl mapped memory doesn't work

开发者 https://www.devze.com 2023-02-25 12:29 出处:网络
I try to implement memory mapped technics in my OpenCL program, but it\'s doesn\'t work! Here it\'s my kernel code:

I try to implement memory mapped technics in my OpenCL program, but it's doesn't work! Here it's my kernel code:

__kernel void update(__global char *in, __global char *out)
{
    size_t i;
    for (i = 0; i < 10; i++);
        out[i] += 'A' - 'a';
}

Here it's host code:

cl_platform_id platformId = NULL;
cl_device_id deviceId = NULL;

cl_context context = NULL;
cl_command_queue commandQueue = NULL;

cl_mem cmPinnedBufIn = NULL;
cl_mem cmPinnedBufOut = NULL;
cl_mem cmDevBufIn = NULL;
cl_mem cmDevBufOut = NULL;
unsigned char *cDataIn = NULL;
unsigned char *cDataOut = NULL;

cl_program program = NULL;
cl_kernel kernel = NULL;
cl_uint retNumDevices;
cl_uint retNumPlatforms;
cl_int ret;

cl_event event;
cl_ulong start;
cl_ulong end;

size_t group_size = GLOBAL_ITEM_SIZE / LOCAL_ITEM_SIZE;

FILE *fp;
const char fileName[] = "./update.cl";
size_t sourceSize;
char *sourceStr;

unsigned char tt[10];

/* Load kernel source file */
if ( !(fp = fopen(fileName, "r")) )
    quitErr("Failed to load kernel.", EXIT_FAILURE);

sourceStr = (char *)malloc(MAX_SOURCE_SIZE);
sourceSize = fread(sourceStr, 1, MAX_SOURCE_SIZE, fp);
fclose(fp);

/* Get Platform/Device Information */
ret = clGetPlatformIDs(1, &platformId, &retNumPlatforms);
assert(ret == CL_SUCCESS);
ret = clGetDeviceIDs(platformId, CL_DEVICE_TYPE_GPU, 1, &deviceId, &retNumDevices);
assert(ret == CL_SUCCESS);

/* Create OpenCL Context */
context = clCreateContext( NULL, retNumDevices, &deviceId, NULL, NULL, &ret);

/* Create command queue with measurment of preformance */
commandQueue = clCreateCommandQueue(context, deviceId, CL_QUEUE_PROFILING_ENABLE, &ret);

/* Create buffer objects */
size_t memSize = 10 * sizeof(unsigned char);
cmPinnedBufIn = clCreateBuffer(context, CL_MEM_READ_WRITE | CL_MEM_ALLOC_HOST_PTR, memSize, NULL, &ret);
assert(ret == CL_SUCCESS);
cmPinnedBufOut = clCreateBuffer(context, CL_MEM_READ_WRITE | CL_MEM_ALLOC_HOST_PTR, memSize, NULL, &ret);
assert(ret == CL_SUCCESS);

/* Mapp pinned memmory */
cDataIn = (unsigned char *)clEnqueueMapBuffer(commandQueue, cmPinnedBufIn, CL_TRUE, CL_MAP_WRITE, 0, memSize, 0, NULL, NULL, &ret);
assert(ret == CL_SUCCESS);

/* Initialize data */
for (size_t w = 0; w < memSize; w++)
    cDataIn[w] = 'a' + w;

/* Create kernel program from source file */
program = clCreateProgramWithSource(context, 1, (const char **)&sourceStr, (const size_t *)&sourceSize, &ret);
assert(ret == CL_SUCCESS);
ret = clBuildProgram(program, 1, &deviceId, NULL, NULL, NULL);
if (ret != CL_SUCCESS) {  
    error("\nFail to build the program\n");
    char buffer[10240];
    clGetProgramBuildInfo(program, deviceId, CL_PROGRAM_BUILD_LOG, sizeof(buffer), buffer, NULL);
    quitErr(buffer, EXIT_FAILURE);
}

/* Create data parallel OpenCL kernel */
kernel = clCreateKernel(program, "update", &ret);
assert(ret == CL_SUCCESS);

/* Set OpenCL kernel arguments */
ret = clSetKernelArg(kernel, 0, sizeof(cl_mem), (void *)&cmPinnedBufIn);
assert(ret == CL_SUCCESS);

ret = clSetKernelArg(kernel, 1, sizeof(cl_mem), (void *)&cmPinnedBufOut);
assert(ret == CL_SUCCESS);

size_t global_item_size = GLOBAL_ITEM_SIZE;
size_t local_item_size = LOCAL_ITEM_SIZE;

/* Execute OpenCL kernel as data parallel */
ret = clEnqueueNDRangeKernel(commandQueue, kernel, 1, NULL, &global_item_size, &local_item_size, 0, NULL, &event);
if (ret == CL_INVALID_WORK_GROUP_SIZE)
    quitErr("Invalid work group size: error when compute group size.", EXIT_FAILURE);
assert(ret == CL_SUCCESS);

/* Execute measurment issue */
if (preformanceMeas) {
    clWaitForEvents(1, &event);
    clGetEventProfilingInfo(event, CL_PROFILING_COMMAND_START, sizeof(cl_ulong), &start, NULL);
    clGetEventProfilingInfo(event, CL_PROFILING_COMMAND_END, sizeof(cl_ulong), &end, NULL);
    printf("Kernels execution time: %10.6f [ms]\n", (end - start) * 1.0e-6f);
}

cDataOut = (unsigned char *)clEnqueueMapBuffer(commandQueue, cmPinnedBufOut, CL_TRUE, CL_MAP_READ, 0, memSize, 0, NULL, NULL, &ret);
assert(ret == CL_SUCCESS);

/* Transfer result to host */
memcpy(tt, cDataOut, memSize);

/* Transfer measurment issue */
if (preformanceMeas) {
    //clWaitForEvents(1, &event);
    clGetEventProfilingInfo(event, CL_PROFILING_COMMAND_START, sizeof(cl_ulong), &start, NULL);
    clGetEventProfilingInfo(event, CL_PROFILING_COMMAND_END, sizeof(cl_ulong), &end, NULL);
    printf("Memory x buffer read: %10.6f [ms]\n", (end - start) * 1.0e-6f);
}

/* Display Results */
int i;
for (i = 0; i < group_size; i++)
    for (size_t x = 0; x < memSize; x++)
        printf("%c", tt[x]);
printf("\n"); 

/* Finalization */
clFlush(c开发者_如何学GoommandQueue);
clFinish(commandQueue);
clReleaseKernel(kernel);
clReleaseProgram(program);
clReleaseMemObject(cmPinnedBufIn);
clReleaseMemObject(cmPinnedBufOut);
clReleaseMemObject(cmDevBufIn);
clReleaseMemObject(cmDevBufOut);
clReleaseCommandQueue(commandQueue);
clReleaseContext(context);

free(sourceStr);

return EXIT_SUCCESS;

The kernel change the lower case characters to uppercase, but my output is empty. When I statically assign the characters in the kernel like this:

__kernel void update(__global char *in, __global char *out)
{
    size_t i;
    for (i = 0; i < 10; i++)
        out[i] = 'A' + i;
}

than the result is OK. So with that I conclude from the input data are not transferred correctly to the memory, but why? Can anyone help me please?


The for loop of the kernel has a final ";", so it's an empty loop.

for (i = 0; i < 10; i++);


You must call clEnqueueUnmapMemObject after you write input in the mapped buffer. See The OpenCL 1.1 spec, 5.4.2.1.

Your kernels do not access in, and do not depend on the thread index get_global_id(0). You are probably wanting something like:

size_t i = get_global_id(0)
char c = in[i];
out[i] = (c>='a' && c<='z')?(c + 'A' - 'a'):c;

To write char arrays in OpenCL 1.0, you need to enable the byte_addressable_store extension.r

0

精彩评论

暂无评论...
验证码 换一张
取 消

关注公众号