how to parallelise this code in hpc?_问答_开发者

开发者 https://www.devze.com 2023-02-17 03:46 出处：网络

s=1 r=m=n=o=p=q=u=t=19 myfile = fopen (\"sequence2.txt\", \"w\", \"ieee-le\"); for a=0:1 if(a==1) r=5 endif

s=1

r=m=n=o=p=q=u=t=19

myfile = fopen ("sequence2.txt", "w", "ieee-le");

for a=0:1

  if(a==1)

      r=5

  endif

  for b=0:r

    if(a==1 && b==5)

    m=11

    endif   

for c=0:m

n=o=19

  for d=0:1

if(d==1)

  n=5

  endif

for e=0:n

  if(d==1 && e==5)

    o=11

        endif   

  for f=0:o

    p=q=19

    for g=0:1

      if(g==1)

    p=5

      endif

      for h=0:p

    if(g==1 && h==5)

      q=11

        endif   

    for i=0:q

      t=u=19

      for j=0:1

        if(j==1)

          t=5

        endif

        for k=0:t

          if(j==1 && k==5)

        u=11

              endif   

          for l=0:u



        s=s+1

        fputs(myfile,num2str(a));

        fputs(myfile,".");

        fputs(myfile,num2str(b)); 

        fputs(myfile,".");

        fputs(myfile,num2str(c));

        fputs(myfile,":");

        fflush(stdout);

        fputs(myfile,num2str(d));

        fputs(myfile,".");

        fputs(myfile,num2str(e)); 

        fputs(myfile,".");

        fputs(myfile,num2str(f));

        fputs(myfile,":");

        fflush(stdout);

        fputs(myfile,num2str(g));

        fputs(myfile,".");

        fputs(myfile,num2str(h)); 

        fputs(myfile,".");

        fputs(myfile,num2str(i));

        fputs(myfile,":");

        fflush(stdout);

        fputs(myfile,num2str(j));

        fputs(myfile,".");

        fputs(myfile,num2str(k)); 

        fputs(myfile,".");

        fputs(myfile,num2str(l));

        fputs(myfile,"\n");

        fflush(stdout);

        end

          end

        end

      end

        end

      end

    end

      end 

end

  end

end 

  end

The above cod开发者_开发问答e in octave is to generate a number sequence that is writing to a text file. it will take days to complete execution since it is generating around 2^36 numbers. so can anyone please let us know how to parallelise this code in hpc.

You may not need to parallelize this; you can speed this up by about 10000x by moving to a compiled language. (Seriously; see below.) Octave or even matlab are going to be slow as molasses running this. They're great for big matrix operations, but tonnes of nested loops with if statements in them is going to run slow slow slow. Normally I'd suggest moving Octave/Matlab code to FORTRAN, but since you've already got the file I/O written essentially with C statements anyway, the C equivalent of this code almost writes itself:

#include <stdio.h>

int main(int argc, char **argv) {
    int a,b,c,d,e,f,g,h,i,j,k,l;
    int s,r,m,n,o,p,q,u,t;
    FILE *myfile;

    s=1;

    r=m=n=o=p=q=u=t=19;

    myfile = fopen ("sequence2-c.txt", "w");
    for (a=0; a<=1; a++) {

        if (a == 1)
            r = 5;

        for (b=0; b<=r; b++) {
            if (a == 1 && b == 5) 
                m = 11;

            for (c=0; c<=m; c++) {
                n = o = 19;

                for (d=0; d<=1; d++) {
                    if (d==1)
                        n = 5;

                    for (e=0; e<=n; e++) {
                        if (d==1 && e == 5)
                            o = 11;


                        for (f=0; f<=o; f++) {
                            p = q = 19;

                            for (g=0; g<=1; g++) {
                                if (g == 1)
                                    p = 5;


                                for (h=0; h<=p; h++) {
                                    if (g == 1 && h==5) 
                                        q = 11;                                           

                                    for (i = 0; i<=q; i++) {
                                        t=u=19;

                                        for (j=0; j<=1; j++) {
                                            if (j==1)
                                                t=5;

                                            for (k=0; k<=t; k++) {
                                                if (j==1 && k==5)
                                                    u=11;                                                    

                                                for (l=0;l<=u;l++){
                                                    s++;                                                        
                                                    fprintf(myfile,"%d.%d.%d:%d.%d.%d:%d.%d.%d:%d.%d.%d\n",a,b,c,d,e,f,g,h,i,j,k,l);

                                                }
                                            }
                                        }
                                    }
                                }
                            }                            
                        }
                    }
                }
            }
        }
    }
    return 0;
}

Running your octave code above and this C code (compiled with -O3) for one minute each, the octave code got through about 2,163 items in the sequence, and the compiled C code got through 23,299,068. So that's good.

In terms of parallelization, breaking this up into independant pieces is easy, but they won't be especially well load-balanced. If you start (say) 26 processes, and give them (a=0,b=0), (a=0,b=1)...,(a=0,b=19),(a=1,b=0), (a=1,b=1),.. (a=1,b=5), they can all run independantly and you can concatenate the results when they're all done. The only down side is that the a=0 jobs will run somewhat slower than the a=1 jobs, but maybe that's good enough to start.