Bonita Montero
unread,Sep 17, 2022, 10:36:43 AM9/17/22You do not have permission to delete messages in this group
Sign in to report message
Either email addresses are anonymous for this group or you need the view member email addresses permission to view the original message
to
I wanted to check if denormal numbers have slower performance on
modern CPUs. Intel introduced the DAZ / FTZ Bits with SSE1 because
denormals were even handled in microcode:
#include <iostream>
#include <bit>
#include <cstdint>
#include <chrono>
#include <utility>
#include <atomic>
using namespace std;
using namespace chrono;
uint64_t denScale( uint64_t rounds, bool den );
int main()
{
auto bench = []( bool den ) -> double
{
constexpr uint64_t ROUNDS = 25'000'000;
auto start = high_resolution_clock::now();
int64_t nScale = denScale( ROUNDS, den );
return (double)duration_cast<nanoseconds>(
high_resolution_clock::now() - start ).count() / nScale;
};
double
tDen = bench( true ),
tNorm = bench( false ),
rel = tDen / tNorm - 1;
cout << tDen << endl;
cout << tNorm << endl;
cout << trunc( 100 * 10 * rel + 0.5 ) / 10 << "%" << endl;
}
MASM code:
PUBLIC ?denScale@@YA_K_K_N@Z
CONST SEGMENT
DEN DQ 00008000000000000h
ONE DQ 03FF0000000000000h
P5 DQ 03fe0000000000000h
CONST ENDS
_TEXT SEGMENT
?denScale@@YA_K_K_N@Z PROC
xor rax, rax
test rcx, rcx
jz byeBye
mov r8, ONE
mov r9, DEN
test dl, dl
cmovnz r8, r9
movq xmm1, P5
mov rax, rcx
loopThis:
movq xmm0, r8
REPT 52
mulsd xmm0, xmm1
ENDM
sub rcx, 1
jae loopThis
mov rdx, 52
mul rdx
byeBye:
ret
?denScale@@YA_K_K_N@Z ENDP
_TEXT ENDS
END
For my PC normal numbers have a 25% higher throughput.
Feel free to post your results also.