MPI Fortran 90 three loops of four nested loops parallelized

! to compile, use

! mpif90 parallelize_outer_of_triple_sum.mpi.f90

! to run, use

! mpirun -np 2 ./a.out

program exampleParallel

implicit none

include 'mpif.h'

INTEGER :: num_cpu, rank, ierr

INTEGER :: loop1Indx, loop2Indx, loop3Indx, loop4Indx

INTEGER :: loop1Limit, loop2Limit, loop3Limit, loop4Limit

INTEGER :: bound

INTEGER :: current_val, summd_val,all_summd

loop1Limit=2

loop2Limit=2

loop3Limit=1

! needs to obey loop1Limit*loop2Limit*loop3Limit=num_cpu

loop4Limit=4

current_val=0

summd_val=0

call MPI_INIT(ierr)

call MPI_COMM_RANK(MPI_COMM_WORLD, rank, ierr)

call MPI_COMM_SIZE(MPI_COMM_WORLD, num_cpu, ierr)

write(*,*) 'Number of CPUs=',num_cpu,' My rank=',rank

loop3Indx=1

outermostloop: do while (loop3Indx.le.loop3Limit)

loop2Indx=1

do while (loop2Indx.le.loop2Limit)

bound=loop1Limit*(loop2Indx+(loop3Indx-1)*loop2Limit)

if (((bound-1).le.rank).AND.(rank.lt.bound)) then

loop1Indx=rank+1-loop2Limit*(loop2Indx-1)-loop2Limit*loop3Limit*(loop3Indx-1)

exit outermostloop

endif

loop2Indx=loop2Indx+1

enddo

loop3Indx=loop3Indx+1

enddo outermostloop

! now loop1Indx and loop2Indx are set by the rank

write(*,*) 'a=',loop1Indx,'b=',loop2Indx,'on CPU',rank

do loop4Indx = 1, loop4Limit

current_val = loop4Indx+(loop3Indx-1)*loop4Limit+(loop2Indx-1)*loop3Limit*loop4Limit+&

(loop1Indx-1)*loop2Limit*loop3Limit*loop4Limit

write(*,*) current_val, ' from CPU ', rank

summd_val = summd_val+current_val

enddo

call MPI_BARRIER(MPI_COMM_WORLD,ierr)

if (ierr.NE.MPI_SUCCESS) then

write(*,*) 'mpi_barrier failed, ierr=',ierr,'rank=',rank

stop

endif

call MPI_REDUCE(summd_val,all_summd,1,MPI_INTEGER,MPI_SUM,0,MPI_COMM_WORLD,ierr)

if (ierr.NE.MPI_SUCCESS) then

write(*,*) 'mpi_reduce validgaincount failed, ierr=',ierr,'rank=',rank

stop

endif

if (rank.eq.0) then

write(*,*) 'all summed value is ', all_summd

endif

call mpi_finalize(ierr)

end program