Why is std::sin() and std::cos() slower than sin()

Test code:

#include <cmath>
#include <cstdio>

const int N = 4096;
const float PI = 3.1415926535897932384626;

float cosine[N][N];
float sine[N][N];

int main() {
    printf("a\n");
    for (int i = 0; i < N; i++) {
        for (int j = 0; j < N; j++) {
            cosine[i][j] = cos(i*j*2*PI/N);
            sine[i][j] = sin(-i*j*2*PI/N);
        }
    }
    printf("b\n");
}

Here is the time:

$ g++ main.cc -o main
$ time ./main
a
b

real    0m1.406s
user    0m1.370s
sys     0m0.030s

After adding using namespace std;, the time is:

$ g++ main.cc -o main
$ time ./main
a
b

real    0m8.743s
user    0m8.680s
sys     0m0.030s

Compiler:

$ g++ --version
g++ (Ubuntu/Linaro 4.5.2-8ubuntu4) 4.5.2

Assembly:

Dump of assembler code for function sin@plt:                                    
0x0000000000400500 <+0>:     jmpq   *0x200b12(%rip)        # 0x601018 <_GLOBAL_OFFSET_TABLE_+48>
0x0000000000400506 <+6>:     pushq  $0x3                                     
0x000000000040050b <+11>:    jmpq   0x4004c0                                 
End of assembler dump.

Dump of assembler code for function std::sin(float):                            
0x0000000000400702 <+0>:     push   %rbp                                     
0x0000000000400703 <+1>:     mov    %rsp,%rbp                                
0x0000000000400706 <+4>:     sub    $0x10,%rsp                               
0x000000000040070a <+8>:     movss  %xmm0,-0x4(%rbp)                         
0x000000000040070f <+13>:    movss  -0x4(%rbp),%xmm0                         
0x0000000000400714 <+18>:    callq  0x400500 <sinf@plt>                      
0x0000000000400719 <+23>:    leaveq                                          
0x000000000040071a <+24>:    retq                                            
End of assembler dump.

Dump of assembler code for function sinf@plt:                                   
0x0000000000400500 <+0>:     jmpq   *0x200b12(%rip)        # 0x601018 <_GLOBAL_OFFSET_TABLE_+48>
0x0000000000400506 <+6>:     pushq  $0x3                                     
0x000000000040050b <+11>:    jmpq   0x4004c0                                 
End of assembler dump.

标签： c++ performance cmath sin cos

4条回答

小情绪 Triste *

2楼-- · 2019-02-08 00:11

Use -S flag in compiler command line and check the difference between assembler output. Maybe using namespace std; gives a lot of unused stuff in executable file.

0人赞添加讨论(0) 举报

做个烂人

3楼-- · 2019-02-08 00:16

You're using a different overload:

Try

        double angle = i*j*2*PI/N;
        cosine[i][j] = cos(angle);
        sine[i][j] = sin(angle);

it should perform the same with or without using namespace std;

0人赞添加讨论(0) 举报

女痞

4楼-- · 2019-02-08 00:23

I guess the difference is that there are overloads for std::sin() for float and for double, while sin() only takes double. Inside std::sin() for floats, there may be a conversion to double, then a call to std::sin() for doubles, and then a conversion of the result back to float, making it slower.

0人赞添加讨论(0) 举报

萌系小妹纸

5楼-- · 2019-02-08 00:27

I did some measurements using clang with -O3 optimization, running on an Intel Core i7. I found that:

std::sin on float has the same cost as sinf
std::sin on double has the same cost as sin
The sin functions on double are 2.5x slower than on float (again, running on an Intel Core i7).

Here is the full code to reproduce it:

#include <chrono>
#include <cmath>
#include <iostream>

template<typename Clock>
struct Timer
{
    using rep = typename Clock::rep;
    using time_point = typename Clock::time_point;
    using resolution = typename Clock::duration;

    Timer(rep& duration) :
    duration(&duration) {
        startTime = Clock::now();
    }
    ~Timer() {
        using namespace std::chrono;
        *duration = duration_cast<resolution>(Clock::now() - startTime).count();
    }
private:

    time_point startTime;
    rep* duration;
};

template<typename T, typename F>
void testSin(F sin_func) {
  using namespace std;
  using namespace std::chrono;
  high_resolution_clock::rep duration = 0;
  T sum {};
  {
    Timer<high_resolution_clock> t(duration);
    for(int i=0; i<100000000; ++i) {
      sum += sin_func(static_cast<T>(i));
    }
  }
  cout << duration << endl;
  cout << "  " << sum << endl;
}

int main() {
  testSin<float> ([] (float  v) { return std::sin(v); });
  testSin<float> ([] (float  v) { return sinf(v); });
  testSin<double>([] (double v) { return std::sin(v); });
  testSin<double>([] (double v) { return sin(v); });
  return 0;
}

I'd be interested if people could report, in the comments on the results on their architectures, especially regarding float vs. double time.

0人赞添加讨论(0) 举报

Why is std::sin() and std::cos() slower than sin()

采纳回答

编辑标签

举报内容

检举类型

检举原因

检举说明(必填)

打开微信“扫一扫”，打开网页后点击屏幕右上角分享按钮

付费偷看金额在0.1-10元之间