s=1
r=m=n=o=p=q=u=t=19
myfile = fopen ("sequence2.txt", "w", "ieee-le");
for a=0:1
if(a==1)
r=5
endif
for b=0:r
if(a==1 && b==5)
m=11
endif
for c=0:m
n=o=19
for d=0:1
if(d==1)
n=5
endif
for e=0:n
if(d==1 && e==5)
o=11
endif
for f=0:o
p=q=19
for g=0:1
if(g==1)
p=5
endif
for h=0:p
if(g==1 && h==5)
q=11
endif
for i=0:q
t=u=19
for j=0:1
if(j==1)
t=5
endif
for k=0:t
if(j==1 && k==5)
u=11
endif
for l=0:u
s=s+1
fputs(myfile,num2str(a));
fputs(myfile,".");
fputs(myfile,num2str(b));
fputs(myfile,".");
fputs(myfile,num2str(c));
fputs(myfile,":");
fflush(stdout);
fputs(myfile,num2str(d));
fputs(myfile,".");
fputs(myfile,num2str(e));
fputs(myfile,".");
fputs(myfile,num2str(f));
fputs(myfile,":");
fflush(stdout);
fputs(myfile,num2str(g));
fputs(myfile,".");
fputs(myfile,num2str(h));
fputs(myfile,".");
fputs(myfile,num2str(i));
fputs(myfile,":");
fflush(stdout);
fputs(myfile,num2str(j));
fputs(myfile,".");
fputs(myfile,num2str(k));
fputs(myfile,".");
fputs(myfile,num2str(l));
fputs(myfile,"\n");
fflush(stdout);
end
end
end
end
end
end
end
end
end
end
end
end
The above cod开发者_开发问答e in octave is to generate a number sequence that is writing to a text file. it will take days to complete execution since it is generating around 2^36 numbers. so can anyone please let us know how to parallelise this code in hpc.
You may not need to parallelize this; you can speed this up by about 10000x by moving to a compiled language. (Seriously; see below.) Octave or even matlab are going to be slow as molasses running this. They're great for big matrix operations, but tonnes of nested loops with if statements in them is going to run slow slow slow. Normally I'd suggest moving Octave/Matlab code to FORTRAN, but since you've already got the file I/O written essentially with C statements anyway, the C equivalent of this code almost writes itself:
#include <stdio.h>
int main(int argc, char **argv) {
int a,b,c,d,e,f,g,h,i,j,k,l;
int s,r,m,n,o,p,q,u,t;
FILE *myfile;
s=1;
r=m=n=o=p=q=u=t=19;
myfile = fopen ("sequence2-c.txt", "w");
for (a=0; a<=1; a++) {
if (a == 1)
r = 5;
for (b=0; b<=r; b++) {
if (a == 1 && b == 5)
m = 11;
for (c=0; c<=m; c++) {
n = o = 19;
for (d=0; d<=1; d++) {
if (d==1)
n = 5;
for (e=0; e<=n; e++) {
if (d==1 && e == 5)
o = 11;
for (f=0; f<=o; f++) {
p = q = 19;
for (g=0; g<=1; g++) {
if (g == 1)
p = 5;
for (h=0; h<=p; h++) {
if (g == 1 && h==5)
q = 11;
for (i = 0; i<=q; i++) {
t=u=19;
for (j=0; j<=1; j++) {
if (j==1)
t=5;
for (k=0; k<=t; k++) {
if (j==1 && k==5)
u=11;
for (l=0;l<=u;l++){
s++;
fprintf(myfile,"%d.%d.%d:%d.%d.%d:%d.%d.%d:%d.%d.%d\n",a,b,c,d,e,f,g,h,i,j,k,l);
}
}
}
}
}
}
}
}
}
}
}
}
return 0;
}
Running your octave code above and this C code (compiled with -O3) for one minute each, the octave code got through about 2,163 items in the sequence, and the compiled C code got through 23,299,068. So that's good.
In terms of parallelization, breaking this up into independant pieces is easy, but they won't be especially well load-balanced. If you start (say) 26 processes, and give them (a=0,b=0), (a=0,b=1)...,(a=0,b=19),(a=1,b=0), (a=1,b=1),.. (a=1,b=5), they can all run independantly and you can concatenate the results when they're all done. The only down side is that the a=0 jobs will run somewhat slower than the a=1 jobs, but maybe that's good enough to start.
精彩评论