Am 19.01.2022 um 19:44 schrieb Bonita Montero:
> That's slower than the variants with intrinsics.
Here's a little benchmark:
#include <iostream>
#include <stdexcept>
#include <string>
#include <string>
#include <vector>
#include <random>
#include <sstream>
#include <chrono>
#include <cstring>
#include <immintrin.h>
using namespace std;
using namespace chrono;
unsigned long long parseUllIntrinsic( char const *str );
unsigned long long parseUllStd( char const *str );
unsigned long long parseUllBart( char const *str );
unsigned long long volatile vSum;
int main()
{
constexpr size_t N = 1000;
vector<string> rNums;
rNums.reserve( N );
mt19937_64 mt;
uniform_int_distribution<unsigned long long> uidValues( 0, -1 );
ostringstream oss;
for( size_t i = 0; i != N; ++i )
{
oss.str( "" );
oss << uidValues( mt );
rNums.emplace_back( oss.str() );
}
auto bench = [&]( unsigned long long (*parseUllFn)( char const *) ) ->
double
{
unsigned long long sum = 0;
auto start = high_resolution_clock::now();
for( size_t i = 0; i != 10'000; ++i )
for( string &str : rNums )
sum += parseUllFn( str.c_str() );
::vSum = sum;
return (int64_t)duration_cast<nanoseconds>(
high_resolution_clock::now() - start ).count() / (10'000.0 * N);
};
cout << "intrinsic: " << bench( parseUllIntrinsic ) << endl;
cout << "std: " << bench( parseUllStd ) << endl;
cout << "Bart: " << bench( parseUllBart ) << endl;
}
#if defined(_MSC_VER)
__declspec(noinline)
#elif defined(__GNUC__)
__attribute__((noinline))
#endif
unsigned long long parseUllIntrinsic( char const *str )
{
if( !*str )
return 0;
unsigned long long value = (unsigned char)*str++ - '0';
for( ; *str; ++str )
{
#if defined(__llvm__) || defined(__GNUC__)
if( __builtin_umulll_overflow( value, 10, &value ) )
goto overflow;
if( __builtin_uaddll_overflow( value, (unsigned char)*str - '0', &value) )
goto overflow;
#elif defined(_MSC_VER)
unsigned long long hi;
value = _mulx_u64( value, 10, &hi );
if( hi )
goto overflow;
// _addcarry_u64 specified but missing (MSVC 2022)
if( value + ((unsigned char)*str - '0') < value )
goto overflow;
value += (unsigned char)*str - '0';
#else
#error no intinsic version
#endif
}
return value;
overflow:
throw overflow_error( "parseUll() overflow" );
}
#if defined(_MSC_VER)
__declspec(noinline)
#elif defined(__GNUC__)
__attribute__((noinline))
#endif
unsigned long long parseUllStd( char const *str )
{
unsigned long long value;
if( !*str )
return 0;
value = (unsigned char)*str++ - '0';
for( ; *str; ++str )
{
if( value * 10 / 10 != value )
goto overflow;
value *= 10;
unsigned char digit = *str - '0';
if( value + digit < value )
goto overflow;
value += digit;
}
return value;
overflow:
throw overflow_error( "parseUll() overflow" );
}
#if defined(_MSC_VER)
__declspec(noinline)
#elif defined(__GNUC__)
__attribute__((noinline))
#endif
unsigned long long parseUllBart( char const *str )
{
size_t len = strlen( str );
unsigned long long value;
if( len > 20 || len == 20 && strcmp( str, "18446744073709551615" ) > 0 )
goto overflow;
if( !*str )
return 0;
value = (unsigned char)*str++ - '0';
while( *str )
value *= 10,
value += (unsigned char)*str++ - '0';
return value;
overflow:
throw overflow_error( "parseUll() overflow" );
}
Here are the MSVC-results:
intrinsic: 20.4304
std: 20.3481
Bart: 22.2739
The gcc/O2-results:
intrinsic: 20.4639
std: 20.7395
Bart: 23.3007
The clang++/O2-results:
intrinsic: 21.1037
std: 21.9567
Bart: 22.7341