This code doesn't work except for squared matrices like a[4][4]
, b[4][4]
, result2[4][4]
but it didn't work for a[4][10]
, b[10][10]
, result2[4][10]
//Init
for(r=0;r<numrowsa;r++){
for(c=0;c<numcolsa;c++){
a[r][c]=rand()%101;
}
}
for(r=0;r<numrowsb;r++){
for(c=0;c<numcolsb;c++){
b[r][c]=rand()%101;
}
}
for(r=0;r<numrowsr;r++){
for(c=0;c<numcolsr;c++){
result[r][c]=0;
}
}
for(r=0;r<numrowsr2;r++){
for(c=0;c<numcolr2;c++){
result2[r][c]=0;
}
}
//end init
t1=clock();
//trying parallel calculation
# pragma omp parallel num_threads(4) private(j)
{
int thr = omp_get_thread_num();
if(thr == 0)
for(c=0;c<numcolsr;c++){
for(j=0 ;j<numcolsa ; j++)
result[0][c]+=a[0][j]*b[j][c];
}
else if (thr ==1 )
for(c=0;c<numcolsr;c++){
for(j=0 ; j<numcolsa ; j++)
result[1][c]+=a[1][j]*b[j][c];
}
else if (thr ==2 )
for(c=0;c<numcolsr;c++){
for(j=0 ; j<numcol开发者_运维技巧sa ; j++)
result[2][c]+=a[2][j]*b[j][c];
}
else if (thr ==3)
for(c=0;c<numcolsr;c++){
for(j=0 ; j<numcolsa ; j++)
result[3][c]+=a[3][j]*b[j][c];
}
}
t2=clock();
cout <<endl<< "Time"<<t2-t1<<"ms"<<endl;
t1 = clock();
//trying serial calculation
for(r=0;r<numrowsr2;r++){
for(c=0;c<numcolr2;c++){
for(i=0 ; i<numcolsa ; i++)
result2[r][c]+=a[r][i]*b[i][c];
}
}
t2 = clock();
/*
for(r=0;r<numrows;r++){
for(c=0;c<numcols;c++){
cout <<result[r][c]<<" ";
if(c == numcols-1)
cout << endl;
}
}
*/
cout <<endl<< "Time"<<t2-t1<<"ms"<<endl;
The error is
Runtime check failure Stack around variable
'b'
was is corrupted
What's the problem?
ouch.
# pragma omp parallel
int t = omp_get_thread_num();
int nt = ...;
for(int i=t ; i<M ; i += nt) {
for(int j=0 ; j<N ; ++j) {
for(int k=0 ; k<K ; ++k) {
....
}
}
}
You wrote:
if(thr == 0)
for(c=0;c<numcolsr;c++){
for(j=0 ;j<numcolsa ; j++)
result[0][c]+=a[0][j]*b[j][c];
}
else if (thr ==1 )
for(c=0;c<numcolsr;c++){
for(j=0 ; j<numcolsa ; j++)
result[1][c]+=a[1][j]*b[j][c];
}
// and so on
I don't know the details of omp, but am I correct in thinking that c
and j
will be shared among the threads? Because all loops use the same c
and the same j
.
In that case, various race conditions could happen. E.g. thread 1 could execute c++
just before thread 0 would execute result[0][c]+=a[0][j]*b[j][c]
, with an out-of-bounds read/write as result.
精彩评论