# 怎样提高调用数学函数的程序的性能

## MASS 高性能库

MASS 指的是数学加速子系统（Mathematical Acceleration Subsystem）。它由数学函数组成，这些数学函数是为在各种 IBM 计算平台上优化性能所设定的。MASS 最初是由 IBM 公司在 1995 年启动的，并在随后的发展中继续得到改善，一直到现如今仍然在改进。

## 什么是自动化向量？

```subroutine sub (y, x, n)
real*8 y(*), x(*)
integer n, i
intrinsic exp
do i=1,n
y(i) = exp(x(i))  ! standard intrinsic
end do
end subroutine```

```include 'massv.include'
subroutine sub (y, x, n)
real*8 y(*), x(*)
integer n
call vexp (y, x, n)  ! MASS vector function
end subroutine```

## 自动向量化的汇编器选项

1. -qhot -qnostrict (for Fortran)
2. -qhot -qnostrict –qignerrno (for C/C++)
3. -qhot -O3
4. -O4
5. -O5

## 附录 1 – Fortran DFT 源程序

```  subroutine dft (x, a, phi , n)
real*8 x(n), a(n), phi(n)
integer n

! Compute discrete Fourier transform of real inputs
! x(i) and convert to polar form.

real*8, parameter :: pi=3.1415926535897932384d0
real*8 y_re(n), y_im(n), t, term_re, term_im
intrinsic exp, cos, sin, sqrt, atan

y_re(1:n) = 0.d0
y_im(1:n) = 0.d0
do k=1,n
! compute y(k), k-th DFT output
do i=1,n
! compute i-th term of y(k):
! x(k)*exp(-2*pi*I*(k-1)*(i-1)/n)
! compute real and imaginary parts of i-th term
! using exp(I*t)=exp(t)*(cos(t)+I*sin(t))

t = -2.d0*pi*(k-1)*(i-1)/n
term_re = x(i) * cos(t) * exp(t)
term_im = x(i) * sin(t) * exp(t)

y_re(k) = y_re(k) + term_re
y_im(k) = y_im(k) + term_im
end do
end do

! transform y to polar coordinates
do k=1,n
! compute amplitude of y(k)
a(k) = sqrt (y_re(k)**2 + y_im(k)**2)
! compute phase of y(k)
phi(k) = atan (y_im(k) / y_reim(k))
end do

end subroutine

! initialize input data
subroutine init (a, n)
real*8 a(n)
integer n
intrinsic sin,sqrt

do j=1,n
a(j)=sin(1.d0/sqrt(real(j,8)))
end do
end subroutine```

## 附录 2 – C DFT 源程序

```#include <math.h>
#define PI 3.1415926535897932384
void dft(double x[],double a[],double phi[],int *m)
{
double y_re[NMAX], y_im[NMAX], t, s, term_re, term_im;
int i,j,k,n=*m;
for(i=0;i<n;++i) {
y_re[i]=y_im[i]=0;
}
for(k=0;k<n;++k)
{
// compute y(k), k-th DFT output
for(i=0;i<n;++i)
{
// compute i-th term of y(k):
// x(k)*exp(-2*pi*I*(k-1)*(i-1)/n)
// compute real and imaginary parts of i-th term
// using exp(I*t)=exp(t)*(cos(t)+I*sin(t))

t=-2.*PI*k*i/(double)n;
term_re=x[i]*exp(t)*cos(t);
term_im=x[i]*exp(t)*sin(t);

y_re[k]+=term_re;
y_im[k]+=term_im;
}
}

// transform y to polar coordinates
for(k=0;k<n;++k)
{
// compute amplitude of y(k)
a[k]=sqrt(y_re[k]*y_re[k]+y_im[k]*y_im[k]);
// compute phase of y(k)
phi[k]=atan2(y_im[k],y_re[k]);
}
}

// initialize input data
void init(double a[],int *m)
{
int j,n=*m;
for(j=0;j<n;++j)
{
a[j]=sin(1./sqrt((double)j+1.0));
}
}

// Dummy function to use result, preventing compiler from
// optimizing away the computation.
void consume(double a[],double b[],double c[])
{
}```

## 附录 3 – 驱动器程序

```  program main
interface
subroutine dft(x,a,phi,n)
real*8 x(n),a(n),phi(n)
integer n
end subroutine
subroutine init(a,n)
real*8 a(n)
integer n
end subroutine
subroutine consume(a,b,c)
real*8 a(*),b(*),c(*)
end subroutine
end interface

! Parameters:
!   nmax is the problem size.
!   nrep is the number of repetitions of the
!        problem. This should be chosen so that
!        the elapsed time is long enough to give
!        sufficient timing resolution.
!   cyc is the clock frequency in Hz for the
!       processor that the program is to be run on.
!       (Can be found from AIX command pmcycles.)

integer, parameter :: nmax=1000
integer, parameter :: nrep=100
real*8, parameter :: cyc=4704000000.d0

real*8 x(nmax), a(nmax), phi(nmax)
real*8 tx, ty, accum, del(4)
intrinsic sin, sqrt
real*8 rtc

del(4)=0.d0
acc = 0.d0
do k=1,nrep
tx=rtc()
call init(x,nmax)
call dft(x,a,phi,nmax)
ty=rtc()
call consume(x,a,phi)
do j=1,nmax
acc = acc + a(j) + phi(j)
end do
del(4) = del(4) + (ty-tx)
end do
del(1) = del(4)/real(nmax,8)
del(2) = del(1)/real(nrep,8)
del(3) = cyc*del(2)
print *,'acc=',acc/real(nrep,8),' n=',nmax,
&    ' r=',nrep, ' a=',del(1),' b=',del(2),
&    ' c=',del(3),' w=',del(4)
end program```

## 声明

#### 评论

static.content.url=http://www.ibm.com/developerworks/js/artrating/
SITE_ID=10
Zone=Rational
ArticleID=500109
ArticleTitle=怎样提高调用数学函数的程序的性能
publish-date=07122010