サンプル集
http://www.nag-j.co.jp/openMP/
https://web.kudpc.kyoto-u.ac.jp/Archives/PDF/NewsLetter/2003-6_openmp.pdf
http://exp.cs.kobe-u.ac.jp/wiki/comp_practice/index.php?plugin=attach&refer=4.OpenMP%A4%F2%CD%D1%A4%A4%A4%BF%CA%C2%CE%F3%B7%D7%BB%BB&openfile=openmp1_yaguchi_2012.pdf
http://www.cc.kyushu-u.ac.jp/scp/system/library/OpenMP/openmp0209.pdf
[2015年 5月 2日 土曜日 19:11:57 JST]
[~/OpenMP]
[am@aofd165]
$ cat compile.run.sh
#!/bin/sh
usage(){
cat <<EOF
usage : $0 prog_name
EOF
}
if [ $# -ne 1 ]; then
Error in $0 : Wrong number of argument.
usage
exit 1
fi
prog=$1
src=${prog}.f90
exe=${prog}.exe
if [ ! -f $src ]; then
echo Error in $0 : No such file, $src
usage
exit 1
fi
echo
cat $src
echo
ifort -openmp ${src} -o ${exe}
export OMP_NUM_THREADS=32
echo "OMP_NUM_THREADS= $OMP_NUM_THREADS"
./${exe}
[2015年 5月 2日 土曜日 19:13:00 JST]
[~/OpenMP]
[am@aofd165]
$ compile.run.sh hello
program helloOpenMP
!$ use omp_lib
implicit none
print *, "START"
!$omp parallel
print *, "Hello! N =", omp_get_num_threads(), " and I am ", omp_get_thread_num()
!$omp end parallel
print *, "END"
end
OMP_NUM_THREADS= 32
START
Hello! N = 32 and I am 16
Hello! N = 32 and I am 19
Hello! N = 32 and I am 5
Hello! N = 32 and I am 17
Hello! N = 32 and I am 11
Hello! N = 32 and I am 1
Hello! N = 32 and I am 4
Hello! N = 32 and I am 10
Hello! N = 32 and I am 30
Hello! N = 32 and I am 2
Hello! N = 32 and I am 29
Hello! N = 32 and I am 8
Hello! N = 32 and I am 7
Hello! N = 32 and I am 6
Hello! N = 32 and I am 18
Hello! N = 32 and I am 14
Hello! N = 32 and I am 25
Hello! N = 32 and I am 31
Hello! N = 32 and I am 9
Hello! N = 32 and I am 28
Hello! N = 32 and I am 15
Hello! N = 32 and I am 13
Hello! N = 32 and I am 3
Hello! N = 32 and I am 24
Hello! N = 32 and I am 26
Hello! N = 32 and I am 20
Hello! N = 32 and I am 23
Hello! N = 32 and I am 12
Hello! N = 32 and I am 21
Hello! N = 32 and I am 22
Hello! N = 32 and I am 27
Hello! N = 32 and I am 0
END
[2015年 5月 2日 土曜日 19:13:04 JST]
[~/OpenMP]
[am@aofd165]
$ compile.run.sh simpleDo
program simpleDo
implicit none
integer,parameter :: N = 100
integer i
double precision x(N)
do i=1, N
x(i) = dble(i)/N
end do
print *, sum(x)
end program
OMP_NUM_THREADS= 32
50.5000000000000
[2015年 5月 2日 土曜日 20:20:43 JST]
[~/OpenMP]
[am@aofd165]
$ compile.run.sh simpleDo.2
program simpleDo2
!$ use omp_lib
implicit none
integer,parameter :: im = 1000,jm=1000,km=100
integer i,j,k
double precision x(im,jm,km)
!$ double precision st, en
!$ st = omp_get_wtime()
do k=1,km
do j=1,jm
do i=1,im
x(i,j,k) = dble(1000*i+100*j+k)
end do !i
enddo !j
enddo !k
print *, sum(x)
!$ en = omp_get_wtime()
!$ print *, "Elapsed time :", en-st
end program
OMP_NUM_THREADS= 1
55060050000000.0
Elapsed time : 2.43593597412109
real 0m3.093s
user 0m2.665s
sys 0m0.428s
OMP_NUM_THREADS= 2
55060050000000.0
Elapsed time : 2.40606188774109
real 0m3.062s
user 0m2.644s
sys 0m0.418s
OMP_NUM_THREADS= 4
55060050000000.0
Elapsed time : 2.31341409683228
real 0m2.970s
user 0m2.540s
sys 0m0.430s
OMP_NUM_THREADS= 8
55060050000000.0
Elapsed time : 2.48756599426270
real 0m3.105s
user 0m2.696s
sys 0m0.409s
OMP_NUM_THREADS= 32
55060050000000.0
Elapsed time : 2.31031107902527
real 0m2.963s
user 0m2.544s
sys 0m0.419s
$ compile.run.sh doTest
program doTest
implicit none
integer,parameter :: N = 50
integer i, a(N)
!$omp parallel
!$omp do
do i=1,N
a(i) = i
end do
!$omp end do
!$omp end parallel
print *, a
end program
OMP_NUM_THREADS= 32
1 2 3 4 5 6
7 8 9 10 11 12
13 14 15 16 17 18
19 20 21 22 23 24
25 26 27 28 29 30
31 32 33 34 35 36
37 38 39 40 41 42
43 44 45 46 47 48
49 50
[2015年 5月 2日 土曜日 19:16:04 JST]
[~/OpenMP]
[am@aofd165]
$ compile.run.sh kadaiTime
program kadaiTime
!$ use omp_lib
implicit none
integer,parameter :: N = 1000*1000, M = 100
integer i, j
double precision,allocatable :: a(:)
double precision x
!$ double precision st, en
allocate(a(N))
!$ st = omp_get_wtime()
!$omp parallel private(x)
!$omp do
do i=1,N
x = 0
do j=1,M
x = x + log(dble(i+j))
end do
a(i) = x/M
end do
!$omp end do
!$omp end parallel
!$ en = omp_get_wtime()
print *, nint(sum(a))
!$ print *, "Elapsed time :", en-st
end program
OMP_NUM_THREADS= 2
12816056
Elapsed time : 0.736011981964111
[2015年 5月 2日 土曜日 19:16:21 JST]
[~/OpenMP]
[am@aofd165]
$ compile.run.sh kadaiTime
program kadaiTime
!$ use omp_lib
implicit none
integer,parameter :: N = 1000*1000, M = 100
integer i, j
double precision,allocatable :: a(:)
double precision x
!$ double precision st, en
allocate(a(N))
!$ st = omp_get_wtime()
!$omp parallel private(x)
!$omp do
do i=1,N
x = 0
do j=1,M
x = x + log(dble(i+j))
end do
a(i) = x/M
end do
!$omp end do
!$omp end parallel
!$ en = omp_get_wtime()
print *, nint(sum(a))
!$ print *, "Elapsed time :", en-st
end program
OMP_NUM_THREADS= 32
12816056
Elapsed time : 0.251950979232788
[2015年 5月 2日 土曜日 19:21:38 JST]
[~/OpenMP]
[am@aofd165]
$ compile.run.sh kadaiSections
program kadaiSections
implicit none
integer, parameter :: N = 20000
real,allocatable :: a(:), b(:)
allocate( a(N), b(N) )
call random_number(a)
call random_number(b)
!$omp parallel
!$omp sections
!$omp section
call sort(a)
!$omp section
call sort(b)
!$omp end sections
!$omp end parallel
print '("first 4 numbers in a : ",4f10.7)', a(1:4)
print '("first 4 numbers in b : ",4f10.7)', b(1:4)
contains
subroutine sort(x)
real x(:), tmp
integer i, j
do i = 1, N - 1
do j = i + 1, N
if (x(i)>x(j)) then
tmp = x(i)
x(i) = x(j)
x(j) = tmp
end if
end do
end do
end subroutine sort
end program
OMP_NUM_THREADS= 1
first 4 numbers in a : 0.0000004 0.0000211 0.0000626 0.0000764
first 4 numbers in b : 0.0000067 0.0000323 0.0001214 0.0001603
real 0m1.569s
user 0m1.565s
sys 0m0.004s
[2015年 5月 2日 土曜日 19:21:06 JST]
[~/OpenMP]
[am@aofd165]
$ compile.run.sh kadaiSections
program kadaiSections
implicit none
integer, parameter :: N = 20000
real,allocatable :: a(:), b(:)
allocate( a(N), b(N) )
call random_number(a)
call random_number(b)
!$omp parallel
!$omp sections
!$omp section
call sort(a)
!$omp section
call sort(b)
!$omp end sections
!$omp end parallel
print '("first 4 numbers in a : ",4f10.7)', a(1:4)
print '("first 4 numbers in b : ",4f10.7)', b(1:4)
contains
subroutine sort(x)
real x(:), tmp
integer i, j
do i = 1, N - 1
do j = i + 1, N
if (x(i)>x(j)) then
tmp = x(i)
x(i) = x(j)
x(j) = tmp
end if
end do
end do
end subroutine sort
end program
OMP_NUM_THREADS= 3
first 4 numbers in a : 0.0000004 0.0000211 0.0000626 0.0000764
first 4 numbers in b : 0.0000067 0.0000323 0.0001214 0.0001603
real 0m1.012s
user 0m2.144s
sys 0m0.028s
[2015年 5月 2日 土曜日 19:34:23 JST]
[~/OpenMP]
[am@aofd165]
$ compile.run.sh kadaiDataSharing
program kadaiDataSharing
!$ use omp_lib
implicit none
integer i, j
integer,parameter :: N=1000000
integer,allocatable :: a(:), b(:)
!$ double precision st, en
allocate( a(N), b(N) )
!$ st = omp_get_wtime()
a = (/(i,i=1,N)/)
b = 0
!$omp parallel default(none), private(i,j), shared(a,b)
!$omp do
do i=1, N
j = N - i + 1
b(j) = a(i)
end do
!$omp end do
!$omp end parallel
!$ en = omp_get_wtime()
print *, sum(a), "=", sum(b)
!$ print *, "Elapsed time :", en-st
end program
OMP_NUM_THREADS= 1
1784293664 = 1784293664
Elapsed time : 8.723974227905273E-003
real 0m0.013s
user 0m0.006s
sys 0m0.007s
[2015年 5月 2日 土曜日 19:34:51 JST]
[~/OpenMP]
[am@aofd165]
$ compile.run.sh kadaiDataSharing
program kadaiDataSharing
!$ use omp_lib
implicit none
integer i, j
integer,parameter :: N=1000000
integer,allocatable :: a(:), b(:)
!$ double precision st, en
allocate( a(N), b(N) )
!$ st = omp_get_wtime()
a = (/(i,i=1,N)/)
b = 0
!$omp parallel default(none), private(i,j), shared(a,b)
!$omp do
do i=1, N
j = N - i + 1
b(j) = a(i)
end do
!$omp end do
!$omp end parallel
!$ en = omp_get_wtime()
print *, sum(a), "=", sum(b)
!$ print *, "Elapsed time :", en-st
end program
OMP_NUM_THREADS= 32
1784293664 = 1784293664
Elapsed time : 0.121893882751465
real 0m0.413s
user 0m6.048s
sys 0m0.010s
[2015年 5月 2日 土曜日 19:48:46 JST]
[~/OpenMP]
[am@aofd165]
$ compile.run.sh pi
! http://jp.xlsoft.com/documents/intel/compiler/527J-001.pdf
program omp
!$ use omp_lib
integer num_steps
real*8 step,x,pi,sum
num_steps = 100000000
step = 1.0D0 / dble(num_steps)
sum = 0.0D0
!$OMP PARALLEL PRIVATE(X)
nthread = OMP_GET_NUM_THREADS()
!$OMP DO REDUCTION(+:SUM)
do i = 1, num_steps
x = (dble(i)-0.5d0)*step
sum = sum + 4.0D0 / (1.0D0 + x * x)
end do
!$OMP END DO
!$OMP END PARALLEL
pi = step * sum
write (6,*) nthread,' Threads',' PI = ',pi
end program omp
OMP_NUM_THREADS= 8
8 Threads PI = 3.14159265358981
real 0m0.608s
user 0m4.035s
sys 0m0.003s
[2015年 5月 2日 土曜日 19:56:41 JST]
[~/OpenMP]
[am@aofd165]
$ compile.run.sh pi
! http://jp.xlsoft.com/documents/intel/compiler/527J-001.pdf
program omp
!$ use omp_lib
integer num_steps
real*8 step,x,pi,sum
num_steps = 100000000
step = 1.0D0 / dble(num_steps)
sum = 0.0D0
!$OMP PARALLEL PRIVATE(X)
nthread = OMP_GET_NUM_THREADS()
!$OMP DO REDUCTION(+:SUM)
do i = 1, num_steps
x = (dble(i)-0.5d0)*step
sum = sum + 4.0D0 / (1.0D0 + x * x)
end do
!$OMP END DO
!$OMP END PARALLEL
pi = step * sum
write (6,*) nthread,' Threads',' PI = ',pi
end program omp
OMP_NUM_THREADS= 32
32 Threads PI = 3.14159265358981
real 0m0.408s
user 0m9.673s
sys 0m0.184s
[2015年 6月 5日 金曜日 09:44:44 JST]
[~/OpenMP]
[am@aofd165]
$ cat omp_parallel_do.sh
#!/bin/bash
# 4 doループの並列化
# http://www.nag-j.co.jp/openMP/openMPDoDirective.html
#
exe=$(basename $0 .sh)
src=$(basename $0 .sh).f90
cat <<EOF > $src
program ${exe}
!$ use omp_lib
implicit none
integer,parameter :: N = 10
integer i
real a(N),sum
!\$OMP PARALLEL DO &
!\$OMP PRIVATE ( i )
do i=1,N
a(i) = float(i)
print *, "Num_threads =", omp_get_num_threads(), ".I am ", omp_get_thread_num(),"a(i)=",a(i)
end do
!\$OMP END PARALLEL DO
print *,'a(i):'
print *, a
print *
do i=1,N
sum=sum+a(i)
enddo
print *,'sum=',sum
end program ${exe}
EOF
ifort -o ${exe} -openmp ${src}
if [ $? -ne 0 ]; then
echo
echo COMPILE ERROR!
echo
exit 1
fi
nt=5
export OMP_NUM_THREADS=$nt
echo
echo "OMP_NUM_THREADS= "$nt
echo
./$exe
exit 0
$ omp_parallel_do.sh
OMP_NUM_THREADS= 5
Num_threads = 5 .I am 0 a(i)= 1.000000
Num_threads = 5 .I am 0 a(i)= 2.000000
Num_threads = 5 .I am 2 a(i)= 5.000000
Num_threads = 5 .I am 2 a(i)= 6.000000
Num_threads = 5 .I am 4 a(i)= 9.000000
Num_threads = 5 .I am 4 a(i)= 10.00000
Num_threads = 5 .I am 3 a(i)= 7.000000
Num_threads = 5 .I am 3 a(i)= 8.000000
Num_threads = 5 .I am 1 a(i)= 3.000000
Num_threads = 5 .I am 1 a(i)= 4.000000
a(i):
1.000000 2.000000 3.000000 4.000000 5.000000
6.000000 7.000000 8.000000 9.000000 10.00000
sum= 55.00000
$ compile.run.sh tama
!
!http://tama.green.gifu-u.ac.jp/~tama/Memo/openmp.html
!
implicit none
integer, parameter :: N=1000, M=1000, NN=10, MAX_THREAD=10
integer :: i, j, i1, j1
real, allocatable :: x(:,:), y(:,:)
!$use omp_lib
character(len=1024) :: omp_num_threads
integer, external :: omp_get_thread_num
integer :: i_omp_num_threads, i_thread
integer :: ifile
character(len=1024) :: filename
integer :: neach, istart, iend
call getenv("OMP_NUM_THREADS", omp_num_threads)
read(omp_num_threads, *) i_omp_num_threads
write(0,*) 'OMP_NUM_THREADS ', i_omp_num_threads
!allocate( x(N,M) )
!allocate( y(N,M) )
neach = N/i_omp_num_threads
!$omp parallel private(i_thread, istart, iend, filename, ifile, i, j, x, y )
i_thread = omp_get_thread_num()
write(*,*) 'thread num = ', i_thread
istart = neach * i_thread + 1
iend = neach * (i_thread+1)
ifile = 10+i_thread
write(filename, '("OUT-", I1)') i_thread
allocate( x(neach,M) )
allocate( y(neach,M) )
do i=istart, iend
do j=1, M
x(i,j) = log(abs( sin(0.2*i*j)))
end do
end do
open(unit=ifile, file=filename(1:len_trim(filename)))
if( istart < NN+1 ) istart = NN+1
if( iend > N-NN ) iend = N-NN
do i=istart, iend
do j=NN+1, M-NN
y(i,j) = 0
do i1=-NN, NN
do j1 = -NN, NN
y(i, j) = y(i,j) + x(i+i1, j+j1)
end do
end do
y(i,j) = y(i,j) / ((2*NN+1)*(2*NN+1))
end do
write(ifile, *) i, (y(i,j), j=NN+1, M-NN)
end do
close(ifile)
!$omp end parallel
end
OMP_NUM_THREADS= 32
OMP_NUM_THREADS 32
thread num = 14
thread num = 30
thread num = 21
thread num = 13
thread num = 2
thread num = 7
thread num = 9
thread num = 17
thread num = 1
thread num = 22
thread num = 11
thread num = 5
thread num = 20
thread num = 12
thread num = 8
thread num = 31
thread num = 3
thread num = 4
thread num = 10
thread num = 16
thread num = 24
thread num = 27
thread num = 26
thread num = 15
thread num = 23
thread num = 18
thread num = 25
thread num = 0
thread num = 28
thread num = 19
thread num = 6
thread num = 29
forrtl: 致命的なエラー (174): SIGSEGV、segmentation fault occurred
Image PC Routine Line Source
tama.exe 0000000000418875 Unknown Unknown Unknown
tama.exe 00000000004343B4 Unknown Unknown Unknown
tama.exe 0000000000404061 Unknown Unknown Unknown
libiomp5.so 00002B6F6E14F4F3 Unknown Unknown Unknown
$ export OMP_STACKSIZE=64000000
[2015年 5月 2日 土曜日 19:43:13 JST]
[~/OpenMP]
[am@aofd165]
$ ulimit -s unlimited
[2015年 5月 2日 土曜日 19:43:22 JST]
[~/OpenMP]
[am@aofd165]
$ compile.run.sh tama
!
!http://tama.green.gifu-u.ac.jp/~tama/Memo/openmp.html
!
implicit none
integer, parameter :: N=1000, M=1000, NN=10, MAX_THREAD=10
integer :: i, j, i1, j1
real, allocatable :: x(:,:), y(:,:)
!$use omp_lib
character(len=1024) :: omp_num_threads
integer, external :: omp_get_thread_num
integer :: i_omp_num_threads, i_thread
integer :: ifile
character(len=1024) :: filename
integer :: neach, istart, iend
call getenv("OMP_NUM_THREADS", omp_num_threads)
read(omp_num_threads, *) i_omp_num_threads
write(0,*) 'OMP_NUM_THREADS ', i_omp_num_threads
!allocate( x(N,M) )
!allocate( y(N,M) )
neach = N/i_omp_num_threads
!$omp parallel private(i_thread, istart, iend, filename, ifile, i, j, x, y )
i_thread = omp_get_thread_num()
write(*,*) 'thread num = ', i_thread
istart = neach * i_thread + 1
iend = neach * (i_thread+1)
ifile = 10+i_thread
write(filename, '("OUT-", I1)') i_thread
allocate( x(neach,M) )
allocate( y(neach,M) )
do i=istart, iend
do j=1, M
x(i,j) = log(abs( sin(0.2*i*j)))
end do
end do
open(unit=ifile, file=filename(1:len_trim(filename)))
if( istart < NN+1 ) istart = NN+1
if( iend > N-NN ) iend = N-NN
do i=istart, iend
do j=NN+1, M-NN
y(i,j) = 0
do i1=-NN, NN
do j1 = -NN, NN
y(i, j) = y(i,j) + x(i+i1, j+j1)
end do
end do
y(i,j) = y(i,j) / ((2*NN+1)*(2*NN+1))
end do
write(ifile, *) i, (y(i,j), j=NN+1, M-NN)
end do
close(ifile)
!$omp end parallel
end
OMP_NUM_THREADS= 32
OMP_NUM_THREADS 32
thread num = 10
thread num = 21
thread num = 23
thread num = 24
thread num = 27
thread num = 26
thread num = 1
thread num = 2
thread num = 4
thread num = 20
thread num = 13
thread num = 19
thread num = 16
thread num = 17
thread num = 14
thread num = 3
thread num = 29
thread num = 0
thread num = 30
forrtl: 致命的なエラー (174): SIGSEGV、segmentation fault occurred
Image PC Routine Line Source
tama.exe 0000000000493D6F Unknown Unknown Unknown
tama.exe 000000000046C94F Unknown Unknown Unknown
tama.exe 0000000000463E77 Unknown Unknown Unknown
tama.exe 0000000000434714 Unknown Unknown Unknown
tama.exe 0000000000404061 Unknown Unknown Unknown
libiomp5.so 00002AD4857B44F3 Unknown Unknown Unknown
thread num = 18
thread num = 22
thread num = 28
thread num = 7
thread num = 31
thread num = 15
thread num = 6
thread num = 12
thread num = 8
thread num = 25
thread num = 5
thread num = 11
thread num = 9
real 0m0.041s
user 0m0.163s
sys 0m0.226s