http://www.nag-j.co.jp/openMP/
https://web.kudpc.kyoto-u.ac.jp/Archives/PDF/NewsLetter/2003-6_openmp.pdf
http://exp.cs.kobe-u.ac.jp/wiki/comp_practice/index.php?plugin=attach&refer=4.OpenMP%A4%F2%CD%D1%A4%A4%A4%BF%CA%C2%CE%F3%B7%D7%BB%BB&openfile=openmp1_yaguchi_2012.pdf
http://www.cc.kyushu-u.ac.jp/scp/system/library/OpenMP/openmp0209.pdf
[2015年 5月 2日 土曜日 19:11:57 JST]
[~/OpenMP]
[am@aofd165]
$ cat compile.run.sh
#!/bin/shusage(){cat <<EOFusage : $0 prog_nameEOF}if [ $# -ne 1 ]; then Error in $0 : Wrong number of argument. usage exit 1fiprog=$1src=${prog}.f90exe=${prog}.exeif [ ! -f $src ]; then echo Error in $0 : No such file, $src usage exit 1fiechocat $srcechoifort -openmp ${src} -o ${exe}export OMP_NUM_THREADS=32echo "OMP_NUM_THREADS= $OMP_NUM_THREADS"./${exe}[2015年 5月 2日 土曜日 19:13:00 JST]
[~/OpenMP]
[am@aofd165]
$ compile.run.sh hello
program helloOpenMP !$ use omp_lib implicit none print *, "START"!$omp parallel print *, "Hello! N =", omp_get_num_threads(), " and I am ", omp_get_thread_num()!$omp end parallel print *, "END"endOMP_NUM_THREADS= 32
START
Hello! N = 32 and I am 16
Hello! N = 32 and I am 19
Hello! N = 32 and I am 5
Hello! N = 32 and I am 17
Hello! N = 32 and I am 11
Hello! N = 32 and I am 1
Hello! N = 32 and I am 4
Hello! N = 32 and I am 10
Hello! N = 32 and I am 30
Hello! N = 32 and I am 2
Hello! N = 32 and I am 29
Hello! N = 32 and I am 8
Hello! N = 32 and I am 7
Hello! N = 32 and I am 6
Hello! N = 32 and I am 18
Hello! N = 32 and I am 14
Hello! N = 32 and I am 25
Hello! N = 32 and I am 31
Hello! N = 32 and I am 9
Hello! N = 32 and I am 28
Hello! N = 32 and I am 15
Hello! N = 32 and I am 13
Hello! N = 32 and I am 3
Hello! N = 32 and I am 24
Hello! N = 32 and I am 26
Hello! N = 32 and I am 20
Hello! N = 32 and I am 23
Hello! N = 32 and I am 12
Hello! N = 32 and I am 21
Hello! N = 32 and I am 22
Hello! N = 32 and I am 27
Hello! N = 32 and I am 0
END
[2015年 5月 2日 土曜日 19:13:04 JST]
[~/OpenMP]
[am@aofd165]
$ compile.run.sh simpleDo
program simpleDo implicit none integer,parameter :: N = 100 integer i double precision x(N) do i=1, N x(i) = dble(i)/N end do print *, sum(x)end programOMP_NUM_THREADS= 32
50.5000000000000
[2015年 5月 2日 土曜日 20:20:43 JST]
[~/OpenMP]
[am@aofd165]
$ compile.run.sh simpleDo.2
program simpleDo2 !$ use omp_lib implicit none integer,parameter :: im = 1000,jm=1000,km=100 integer i,j,k double precision x(im,jm,km) !$ double precision st, en !$ st = omp_get_wtime() do k=1,km do j=1,jm do i=1,im x(i,j,k) = dble(1000*i+100*j+k) end do !i enddo !j enddo !k print *, sum(x) !$ en = omp_get_wtime() !$ print *, "Elapsed time :", en-stend programOMP_NUM_THREADS= 1
55060050000000.0
Elapsed time : 2.43593597412109
real 0m3.093s
user 0m2.665s
sys 0m0.428s
OMP_NUM_THREADS= 2
55060050000000.0
Elapsed time : 2.40606188774109
real 0m3.062s
user 0m2.644s
sys 0m0.418s
OMP_NUM_THREADS= 4
55060050000000.0
Elapsed time : 2.31341409683228
real 0m2.970s
user 0m2.540s
sys 0m0.430s
OMP_NUM_THREADS= 8
55060050000000.0
Elapsed time : 2.48756599426270
real 0m3.105s
user 0m2.696s
sys 0m0.409s
OMP_NUM_THREADS= 32
55060050000000.0
Elapsed time : 2.31031107902527
real 0m2.963s
user 0m2.544s
sys 0m0.419s
$ compile.run.sh doTest
program doTest implicit none integer,parameter :: N = 50 integer i, a(N)!$omp parallel!$omp do do i=1,N a(i) = i end do!$omp end do!$omp end parallel print *, aend programOMP_NUM_THREADS= 32
1 2 3 4 5 6
7 8 9 10 11 12
13 14 15 16 17 18
19 20 21 22 23 24
25 26 27 28 29 30
31 32 33 34 35 36
37 38 39 40 41 42
43 44 45 46 47 48
49 50
[2015年 5月 2日 土曜日 19:16:04 JST]
[~/OpenMP]
[am@aofd165]
$ compile.run.sh kadaiTime
program kadaiTime !$ use omp_lib implicit none integer,parameter :: N = 1000*1000, M = 100 integer i, j double precision,allocatable :: a(:) double precision x !$ double precision st, en allocate(a(N)) !$ st = omp_get_wtime()!$omp parallel private(x)!$omp do do i=1,N x = 0 do j=1,M x = x + log(dble(i+j)) end do a(i) = x/M end do!$omp end do!$omp end parallel !$ en = omp_get_wtime() print *, nint(sum(a)) !$ print *, "Elapsed time :", en-stend programOMP_NUM_THREADS= 2
12816056
Elapsed time : 0.736011981964111
[2015年 5月 2日 土曜日 19:16:21 JST]
[~/OpenMP]
[am@aofd165]
$ compile.run.sh kadaiTime
program kadaiTime !$ use omp_lib implicit none integer,parameter :: N = 1000*1000, M = 100 integer i, j double precision,allocatable :: a(:) double precision x !$ double precision st, en allocate(a(N)) !$ st = omp_get_wtime()!$omp parallel private(x)!$omp do do i=1,N x = 0 do j=1,M x = x + log(dble(i+j)) end do a(i) = x/M end do!$omp end do!$omp end parallel !$ en = omp_get_wtime() print *, nint(sum(a)) !$ print *, "Elapsed time :", en-stend programOMP_NUM_THREADS= 32
12816056
Elapsed time : 0.251950979232788
[2015年 5月 2日 土曜日 19:21:38 JST]
[~/OpenMP]
[am@aofd165]
$ compile.run.sh kadaiSections
program kadaiSections implicit none integer, parameter :: N = 20000 real,allocatable :: a(:), b(:) allocate( a(N), b(N) ) call random_number(a) call random_number(b)!$omp parallel!$omp sections!$omp section call sort(a)!$omp section call sort(b)!$omp end sections!$omp end parallel print '("first 4 numbers in a : ",4f10.7)', a(1:4) print '("first 4 numbers in b : ",4f10.7)', b(1:4)contains subroutine sort(x) real x(:), tmp integer i, j do i = 1, N - 1 do j = i + 1, N if (x(i)>x(j)) then tmp = x(i) x(i) = x(j) x(j) = tmp end if end do end do end subroutine sortend programOMP_NUM_THREADS= 1
first 4 numbers in a : 0.0000004 0.0000211 0.0000626 0.0000764
first 4 numbers in b : 0.0000067 0.0000323 0.0001214 0.0001603
real 0m1.569s
user 0m1.565s
sys 0m0.004s
[2015年 5月 2日 土曜日 19:21:06 JST]
[~/OpenMP]
[am@aofd165]
$ compile.run.sh kadaiSections
program kadaiSections implicit none integer, parameter :: N = 20000 real,allocatable :: a(:), b(:) allocate( a(N), b(N) ) call random_number(a) call random_number(b)!$omp parallel!$omp sections!$omp section call sort(a)!$omp section call sort(b)!$omp end sections!$omp end parallel print '("first 4 numbers in a : ",4f10.7)', a(1:4) print '("first 4 numbers in b : ",4f10.7)', b(1:4)contains subroutine sort(x) real x(:), tmp integer i, j do i = 1, N - 1 do j = i + 1, N if (x(i)>x(j)) then tmp = x(i) x(i) = x(j) x(j) = tmp end if end do end do end subroutine sortend programOMP_NUM_THREADS= 3
first 4 numbers in a : 0.0000004 0.0000211 0.0000626 0.0000764
first 4 numbers in b : 0.0000067 0.0000323 0.0001214 0.0001603
real 0m1.012s
user 0m2.144s
sys 0m0.028s
[2015年 5月 2日 土曜日 19:34:23 JST]
[~/OpenMP]
[am@aofd165]
$ compile.run.sh kadaiDataSharing
program kadaiDataSharing
!$ use omp_lib
implicit none
integer i, j
integer,parameter :: N=1000000
integer,allocatable :: a(:), b(:)
!$ double precision st, en
allocate( a(N), b(N) )
!$ st = omp_get_wtime()
a = (/(i,i=1,N)/)
b = 0
!$omp parallel default(none), private(i,j), shared(a,b)
!$omp do
do i=1, N
j = N - i + 1
b(j) = a(i)
end do
!$omp end do
!$omp end parallel
!$ en = omp_get_wtime()
print *, sum(a), "=", sum(b)
!$ print *, "Elapsed time :", en-st
end program
OMP_NUM_THREADS= 1
1784293664 = 1784293664
Elapsed time : 8.723974227905273E-003
real 0m0.013s
user 0m0.006s
sys 0m0.007s
[2015年 5月 2日 土曜日 19:34:51 JST]
[~/OpenMP]
[am@aofd165]
$ compile.run.sh kadaiDataSharing
program kadaiDataSharing
!$ use omp_lib
implicit none
integer i, j
integer,parameter :: N=1000000
integer,allocatable :: a(:), b(:)
!$ double precision st, en
allocate( a(N), b(N) )
!$ st = omp_get_wtime()
a = (/(i,i=1,N)/)
b = 0
!$omp parallel default(none), private(i,j), shared(a,b)
!$omp do
do i=1, N
j = N - i + 1
b(j) = a(i)
end do
!$omp end do
!$omp end parallel
!$ en = omp_get_wtime()
print *, sum(a), "=", sum(b)
!$ print *, "Elapsed time :", en-st
end program
OMP_NUM_THREADS= 32
1784293664 = 1784293664
Elapsed time : 0.121893882751465
real 0m0.413s
user 0m6.048s
sys 0m0.010s
[2015年 5月 2日 土曜日 19:48:46 JST]
[~/OpenMP]
[am@aofd165]
$ compile.run.sh pi
! http://jp.xlsoft.com/documents/intel/compiler/527J-001.pdf program omp !$ use omp_lib integer num_steps real*8 step,x,pi,sum num_steps = 100000000 step = 1.0D0 / dble(num_steps) sum = 0.0D0 !$OMP PARALLEL PRIVATE(X) nthread = OMP_GET_NUM_THREADS() !$OMP DO REDUCTION(+:SUM) do i = 1, num_steps x = (dble(i)-0.5d0)*step sum = sum + 4.0D0 / (1.0D0 + x * x) end do !$OMP END DO !$OMP END PARALLEL pi = step * sum write (6,*) nthread,' Threads',' PI = ',pi end program ompOMP_NUM_THREADS= 8
8 Threads PI = 3.14159265358981
real 0m0.608s
user 0m4.035s
sys 0m0.003s
[2015年 5月 2日 土曜日 19:56:41 JST]
[~/OpenMP]
[am@aofd165]
$ compile.run.sh pi
! http://jp.xlsoft.com/documents/intel/compiler/527J-001.pdf program omp !$ use omp_lib integer num_steps real*8 step,x,pi,sum num_steps = 100000000 step = 1.0D0 / dble(num_steps) sum = 0.0D0 !$OMP PARALLEL PRIVATE(X) nthread = OMP_GET_NUM_THREADS() !$OMP DO REDUCTION(+:SUM) do i = 1, num_steps x = (dble(i)-0.5d0)*step sum = sum + 4.0D0 / (1.0D0 + x * x) end do !$OMP END DO !$OMP END PARALLEL pi = step * sum write (6,*) nthread,' Threads',' PI = ',pi end program ompOMP_NUM_THREADS= 32
32 Threads PI = 3.14159265358981
real 0m0.408s
user 0m9.673s
sys 0m0.184s
[2015年 6月 5日 金曜日 09:44:44 JST]
[~/OpenMP]
[am@aofd165]
$ cat omp_parallel_do.sh
#!/bin/bash# 4 doループの並列化# http://www.nag-j.co.jp/openMP/openMPDoDirective.html#exe=$(basename $0 .sh)src=$(basename $0 .sh).f90cat <<EOF > $srcprogram ${exe}!$ use omp_libimplicit noneinteger,parameter :: N = 10integer ireal a(N),sum!\$OMP PARALLEL DO &!\$OMP PRIVATE ( i )do i=1,N a(i) = float(i) print *, "Num_threads =", omp_get_num_threads(), ".I am ", omp_get_thread_num(),"a(i)=",a(i)end do!\$OMP END PARALLEL DOprint *,'a(i):'print *, aprint *do i=1,N sum=sum+a(i)enddoprint *,'sum=',sumend program ${exe}EOFifort -o ${exe} -openmp ${src}if [ $? -ne 0 ]; then echo echo COMPILE ERROR! echo exit 1fint=5export OMP_NUM_THREADS=$ntechoecho "OMP_NUM_THREADS= "$ntecho./$exeexit 0$ omp_parallel_do.sh
OMP_NUM_THREADS= 5
Num_threads = 5 .I am 0 a(i)= 1.000000
Num_threads = 5 .I am 0 a(i)= 2.000000
Num_threads = 5 .I am 2 a(i)= 5.000000
Num_threads = 5 .I am 2 a(i)= 6.000000
Num_threads = 5 .I am 4 a(i)= 9.000000
Num_threads = 5 .I am 4 a(i)= 10.00000
Num_threads = 5 .I am 3 a(i)= 7.000000
Num_threads = 5 .I am 3 a(i)= 8.000000
Num_threads = 5 .I am 1 a(i)= 3.000000
Num_threads = 5 .I am 1 a(i)= 4.000000
a(i):
1.000000 2.000000 3.000000 4.000000 5.000000
6.000000 7.000000 8.000000 9.000000 10.00000
sum= 55.00000
$ compile.run.sh tama
!!http://tama.green.gifu-u.ac.jp/~tama/Memo/openmp.html!implicit noneinteger, parameter :: N=1000, M=1000, NN=10, MAX_THREAD=10integer :: i, j, i1, j1real, allocatable :: x(:,:), y(:,:)!$use omp_libcharacter(len=1024) :: omp_num_threadsinteger, external :: omp_get_thread_numinteger :: i_omp_num_threads, i_threadinteger :: ifilecharacter(len=1024) :: filenameinteger :: neach, istart, iendcall getenv("OMP_NUM_THREADS", omp_num_threads)read(omp_num_threads, *) i_omp_num_threadswrite(0,*) 'OMP_NUM_THREADS ', i_omp_num_threads!allocate( x(N,M) )!allocate( y(N,M) )neach = N/i_omp_num_threads!$omp parallel private(i_thread, istart, iend, filename, ifile, i, j, x, y )i_thread = omp_get_thread_num()write(*,*) 'thread num = ', i_threadistart = neach * i_thread + 1iend = neach * (i_thread+1)ifile = 10+i_threadwrite(filename, '("OUT-", I1)') i_threadallocate( x(neach,M) )allocate( y(neach,M) )do i=istart, iend do j=1, M x(i,j) = log(abs( sin(0.2*i*j))) end doend doopen(unit=ifile, file=filename(1:len_trim(filename)))if( istart < NN+1 ) istart = NN+1if( iend > N-NN ) iend = N-NNdo i=istart, iend do j=NN+1, M-NN y(i,j) = 0 do i1=-NN, NN do j1 = -NN, NN y(i, j) = y(i,j) + x(i+i1, j+j1) end do end do y(i,j) = y(i,j) / ((2*NN+1)*(2*NN+1)) end do write(ifile, *) i, (y(i,j), j=NN+1, M-NN)end doclose(ifile)!$omp end parallelendOMP_NUM_THREADS= 32
OMP_NUM_THREADS 32
thread num = 14
thread num = 30
thread num = 21
thread num = 13
thread num = 2
thread num = 7
thread num = 9
thread num = 17
thread num = 1
thread num = 22
thread num = 11
thread num = 5
thread num = 20
thread num = 12
thread num = 8
thread num = 31
thread num = 3
thread num = 4
thread num = 10
thread num = 16
thread num = 24
thread num = 27
thread num = 26
thread num = 15
thread num = 23
thread num = 18
thread num = 25
thread num = 0
thread num = 28
thread num = 19
thread num = 6
thread num = 29
forrtl: 致命的なエラー (174): SIGSEGV、segmentation fault occurred
Image PC Routine Line Source
tama.exe 0000000000418875 Unknown Unknown Unknown
tama.exe 00000000004343B4 Unknown Unknown Unknown
tama.exe 0000000000404061 Unknown Unknown Unknown
libiomp5.so 00002B6F6E14F4F3 Unknown Unknown Unknown
$ export OMP_STACKSIZE=64000000
[2015年 5月 2日 土曜日 19:43:13 JST]
[~/OpenMP]
[am@aofd165]
$ ulimit -s unlimited
[2015年 5月 2日 土曜日 19:43:22 JST]
[~/OpenMP]
[am@aofd165]
$ compile.run.sh tama
!!http://tama.green.gifu-u.ac.jp/~tama/Memo/openmp.html!implicit noneinteger, parameter :: N=1000, M=1000, NN=10, MAX_THREAD=10integer :: i, j, i1, j1real, allocatable :: x(:,:), y(:,:)!$use omp_libcharacter(len=1024) :: omp_num_threadsinteger, external :: omp_get_thread_numinteger :: i_omp_num_threads, i_threadinteger :: ifilecharacter(len=1024) :: filenameinteger :: neach, istart, iendcall getenv("OMP_NUM_THREADS", omp_num_threads)read(omp_num_threads, *) i_omp_num_threadswrite(0,*) 'OMP_NUM_THREADS ', i_omp_num_threads!allocate( x(N,M) )!allocate( y(N,M) )neach = N/i_omp_num_threads!$omp parallel private(i_thread, istart, iend, filename, ifile, i, j, x, y )i_thread = omp_get_thread_num()write(*,*) 'thread num = ', i_threadistart = neach * i_thread + 1iend = neach * (i_thread+1)ifile = 10+i_threadwrite(filename, '("OUT-", I1)') i_threadallocate( x(neach,M) )allocate( y(neach,M) )do i=istart, iend do j=1, M x(i,j) = log(abs( sin(0.2*i*j))) end doend doopen(unit=ifile, file=filename(1:len_trim(filename)))if( istart < NN+1 ) istart = NN+1if( iend > N-NN ) iend = N-NNdo i=istart, iend do j=NN+1, M-NN y(i,j) = 0 do i1=-NN, NN do j1 = -NN, NN y(i, j) = y(i,j) + x(i+i1, j+j1) end do end do y(i,j) = y(i,j) / ((2*NN+1)*(2*NN+1)) end do write(ifile, *) i, (y(i,j), j=NN+1, M-NN)end doclose(ifile)!$omp end parallelendOMP_NUM_THREADS= 32
OMP_NUM_THREADS 32
thread num = 10
thread num = 21
thread num = 23
thread num = 24
thread num = 27
thread num = 26
thread num = 1
thread num = 2
thread num = 4
thread num = 20
thread num = 13
thread num = 19
thread num = 16
thread num = 17
thread num = 14
thread num = 3
thread num = 29
thread num = 0
thread num = 30
forrtl: 致命的なエラー (174): SIGSEGV、segmentation fault occurred
Image PC Routine Line Source
tama.exe 0000000000493D6F Unknown Unknown Unknown
tama.exe 000000000046C94F Unknown Unknown Unknown
tama.exe 0000000000463E77 Unknown Unknown Unknown
tama.exe 0000000000434714 Unknown Unknown Unknown
tama.exe 0000000000404061 Unknown Unknown Unknown
libiomp5.so 00002AD4857B44F3 Unknown Unknown Unknown
thread num = 18
thread num = 22
thread num = 28
thread num = 7
thread num = 31
thread num = 15
thread num = 6
thread num = 12
thread num = 8
thread num = 25
thread num = 5
thread num = 11
thread num = 9
real 0m0.041s
user 0m0.163s
sys 0m0.226s