开发者

convert u_int64_t to u_char on CUDA 2.3 nvopencc

开发者 https://www.devze.com 2022-12-09 05:08 出处:网络
CUDA 2.3 V0.2.1221 / 32bit linux Hi, I have a problem with the following code: __device__ void put_u64(void *vp, u_int64_t v) {

CUDA 2.3 V0.2.1221 / 32bit linux

Hi, I have a problem with the following code:

__device__ void put_u64(void *vp, u_int64_t v) {
u_char *p = (u_char *) vp;

p[0] = (u_char) (v >> 56) & 0xff;
p[1] = (u_char) (v >> 48) & 0xff;
p[2] = (u_char) (v >> 40) & 0xff;
p[3] = (u_char) (v >> 32) & 0xff;
p[4] = (u_char) (v >> 24) & 0xff;
p[5] = (u_char) (v >> 16) & 0xff;
p[6] = (u_char) (v >> 8) & 0xff;
p[7] = (u_char) v & 0xff; }

make emu=1 compiles and works

-

without emulation i get the error:

### Assertion failure at line 1923开发者_JAVA技巧 or ../../be/cg/cgemit.cxx

### incorrect register class for operand 0

nvopencc INTERNAL ERROR ... status 1

-

can someone help pls - i just need a working way for converting u_int64_t to a unsinged char[7].


I tried a simple testcase and it compiled ok unfortunately. How are u_char and u_int_64 defined?

I would definitely agree that you should report this to NVIDIA, but they will need more code to be able to reproduce the problem. It may also help to look at the PTX code that is generated.

I used the following code:

#include <iostream>
#include <algorithm>
#include <cuda.h>

__device__ void put_u64(void *vp, unsigned long long v)
{
    unsigned char *p = (unsigned char *) vp;
    p[0] = (unsigned char) (v >> 56) & 0xff;
    p[1] = (unsigned char) (v >> 48) & 0xff;
    p[2] = (unsigned char) (v >> 40) & 0xff;
    p[3] = (unsigned char) (v >> 32) & 0xff;
    p[4] = (unsigned char) (v >> 24) & 0xff;
    p[5] = (unsigned char) (v >> 16) & 0xff;
    p[6] = (unsigned char) (v >> 8) & 0xff;
    p[7] = (unsigned char) v & 0xff;
}

__global__ void test(unsigned char *output, unsigned long long input)
{
    put_u64(output, input);
}

int main(void)
{
    unsigned char result[8];
    unsigned char *d_result;
    unsigned long long input = 0x1212343456567878ULL;
    cudaMalloc((void **)&d_result, 8 * sizeof(unsigned char));
    test<<<1,1>>>(d_result, input);
    cudaMemcpy(&result, d_result, 8 * sizeof(unsigned char), cudaMemcpyDeviceToHost);
    std::cout << std::hex;
    std::copy(result, result + 8, std::ostream_iterator<int>(std::cout));
    std::cout << std::endl;
}
0

精彩评论

暂无评论...
验证码 换一张
取 消