float,double,long doubleで数学関数の実行速度の比を出してみる

C言語の数学関数のうちpow, sin, asin, abs, sqrt, exp, logについてfloat,double,long doubleの実行速度を比べてみた。出したのはdouble, long doubleの計算時間とfloatの計算時間の比。

生成する乱数の範囲が適当かどうかわからない。ご指摘大歓迎です。

/*
 * float, double, long doubleの数学関数の計算速度測定
 */
#include<stdio.h>
#include<stdlib.h> // rand, srand
#include<math.h>
#include<string.h> // strcpy
#include<time.h>
#include<sys/time.h>

double elapsed(void)
{
  struct timeval tp;
  struct timezone tz;
  if (gettimeofday(&tp, &tz) != 0)
    printf("error at elapsed\n");
  return (double) tp.tv_sec + (double) tp.tv_usec * 1.0e-6;
}

// [1e-5, 1e5]の範囲の乱数
float randf()
{
    return 1e5f - 2e5f * (rand() / (float)RAND_MAX);
}
double randd()
{
    return 1e5 - 2e5 * (rand() / (double)RAND_MAX);
}
long double randl()
{
    return 1e5L - 2e5L * (rand() / (long double)RAND_MAX);
}

int n = 50000;
// 経過した時間をミリ秒で返す
double timeit(void (*f)(void))
{
    int i;
    double t = elapsed();
    for (i = 0; i < n; i++)
        f();
    t = elapsed() - t;
    return t;
}
void Powf () { powf  (randf(), randf()); }
void Sinf () { sinf  (randf()); }
void Asinf() { asinf (randf()); }
void Absf () { fabsf (randf()); }
void Sqrtf() { sqrtf (randf()); }
void Expf () { expf  (randf()); }
void Logf () { logf  (randf()); }

void Pow  () { pow   (randd(), randd()); }
void Sin  () { sin   (randd()); }
void Asin () { asin  (randd()); }
void Abs  () { fabs  (randd()); }
void Sqrt () { sqrt  (randd()); }
void Exp  () { exp   (randd()); }
void Log  () { log   (randd()); }

void Powl () { powl  (randl(), randl()); }
void Sinl () { sinl  (randl()); }
void Asinl() { asinl (randl()); }
void Absl () { fabsl (randl()); }
void Sqrtl() { sqrtl (randl()); }
void Expl () { expl  (randl()); }
void Logl () { logl  (randl()); }


void Randf(){ randf(); }
void Randd(){ randd(); }
void Randl(){ randl(); }

// pow, sin, asin, abs, sqrt, exp, log
int main(int argc, char** argv)
{
#define FLT "%.5lf "
#define FMT "%s " FLT FLT FLT FLT FLT "\n"
    int i;
    char fmt[256]=FMT;

    srand(time(NULL));


    if (argc >= 2)
        n = atoi(argv[1]);

    printf("n = %d\n", n);
    //printf("\tfloat\tdouble\tldouble\td/f\tl/f\n");
    void (*fun[8][3])(void) = {
        {Powf, Pow, Powl}, {Sinf, Sin, Sinl}, {Asinf, Asin, Asinl},
        {Absf, Abs, Absl}, {Sqrtf, Sqrt, Sqrtl}, {Expf, Exp, Expl},
        {Logf, Log, Logl}, {Randf, Randd, Randl}};
    char fname[8][128] = {"pow", "sin", "asin", "abs", "sqrt", "exp", "log", "rand"};
    for (i = 0; i < 8; i++){
        double f = timeit(fun[i][0]);
        double d = timeit(fun[i][1]);
        double l = timeit(fun[i][2]);
        printf(FMT, fname[i], f, d, l, d/f, l/f);
    }
}

実行結果(表)

Core2 QuadのマシンとCore2 DuoのマシンとAtom N270のマシン(dell inspiron mini9)で実行してみた。コンパイルオプションはいずれも"-lm -O3"にした。繰り返し回数は1000万回に設定した。

Core2 Quad

OS	Fedora 9 64bit
Mem	4G
gcc	4.3.0; Red Hat 4.3.0-8

関数	float	double	ldouble	d/f比	l/f比
pow	1.01140	1.32000	2.08669	1.30512	2.06316
sin	0.09872	0.10074	0.09872	1.02043	0.99994
asin	0.26519	0.39756	1.50401	1.49916	5.67148
abs	0.10074	0.09871	0.09872	0.97986	0.97987
sqrt	0.29654	0.35533	1.54225	1.19826	5.20082
exp	0.72587	0.40774	1.38476	0.56173	1.90772
log	0.43713	0.59930	0.73032	1.37101	1.67072
rand	0.09879	0.09875	0.10072	0.99952	1.01950

Core2 Duo

OS	Ubuntu 8.04 32bit
Mem	2G
gcc	4.2.4; 4.2.4-1ubuntu3

関数	float	double	ldouble	d/f比	l/f比
pow	6.46111	6.82862	7.63098	1.05688	1.18106
sin	0.26396	0.26451	0.26510	1.00208	1.00431
asin	2.87325	3.06951	2.47927	1.06831	0.86288
abs	0.26445	0.26461	0.26444	1.00059	0.99993
sqrt	3.00352	3.18386	3.26626	1.06004	1.08748
exp	2.29557	2.41368	2.33544	1.05145	1.01737
log	1.11410	1.28898	1.40549	1.15697	1.26154
rand	0.26454	0.26420	0.26420	0.99872	0.99872

Atom N270

OS	Windows XP SP3
Mem	1G
gcc	mingw4.2.1 (Ubuntu8.04のmingw32パッケージ)

関数	float	double	ldouble	d/f比	l/f比
pow	11.39063	11.21875	12.50000	0.98491	1.09739
sin	0.48438	0.50000	0.50000	1.03226	1.03226
asin	6.14063	6.14063	6.18750	1.00000	1.00763
abs	0.48438	0.50000	0.48438	1.03226	1.00000
sqrt	3.07813	5.46875	3.20313	1.77665	1.04061
exp	11.32813	11.40625	1.95313	1.00690	0.17241
log	2.90625	6.15625	2.59375	2.11828	0.89247
rand	0.51563	0.48438	0.51563	0.93939	1.00000

実行結果(グラフ)

long doubleが相対的に遅い。

平坦。

long doubleのexpがめちゃくちゃ早い。sqrt,logでdoubleが遅い。

どういう実装になってるんでしょ

absの実行速度が型で変わらないのは、どの型でも符号ビットを反転させるだけだろうから納得。
sin,sqrt,logなどに関しては要求される精度が高くなるほど時間がかかるのかなと思っていたらそうではなかった。特にatomN270でのlong doubleのexpは他の型のexpよりもずっと早い。
glibcでは数学関数ライブラリをどう実装してるんだろーか。気が向いたら調べる。