Bonita Montero
unread,Nov 7, 2022, 10:31:22 PM11/7/22You do not have permission to delete messages in this group
Either email addresses are anonymous for this group or you need the view member email addresses permission to view the original message
to
template<bool Zero = false, bool Throw = true,
std::random_access_iterator Iterator, typename Consumer>
requires std::convertible_to<std::iter_value_t<Iterator>, char8_t> &&
requires( Consumer consumer ) { { consumer( (char32_t)-1 ) }; }
utf8_exception::err_t parseUtf8( Iterator begin, Iterator end, Consumer
consumer )
{
using namespace std;
using err_t = utf8_exception::err_t;
Iterator it = begin;
auto further = [&]<typename ... Chars>( Chars &... chars ) -> err_t
{
if constexpr( !Zero )
{
if( end - it < sizeof ...(Chars) ) [[unlikely]]
return err_t::LENGTH_ERROR;
if( ((((chars = *it++) & 0xC0) == 0x80) & ...) ) [[likely]]
return err_t::NO_ERROR;
else
return err_t::ENCODING_ERROR;
}
else
{
if( ((((chars = *it++) & 0xC0) == 0x80) && ...) ) [[likely]]
return err_t::NO_ERROR;
else
{
err_t err;
((bool)(err = !chars ? err_t::LENGTH_ERROR : (chars & 0xC0) != 0x80
? err_t::ENCODING_ERROR : err_t::NO_ERROR) || ...);
return err;
}
}
};
auto juice = []( char8_t c, unsigned bits ) -> char32_t { return c &
0xFF >> 8 - bits; };
auto ret = []( err_t err ) -> err_t
{
if constexpr( !Throw )
return err;
else
throw utf8_exception( err );
};
char8_t c0, c1, c2, c3;
utf8_exception::err_t err;
constexpr char32_t UNICODE_MAX = 0x10FFFF;
for( char32_t c32; !Zero && begin != end || Zero; consumer( c32 ) )
if( (signed char)(c0 = *it++) >= 0 ) [[likely]]
if constexpr( !Zero )
c32 = c0;
else
if( c0 ) [[likely]]
c32 = c0;
else
break;
else if( (c0 & 0xE0) == 0xC0 ) [[likely]]
{
if( (bool)(err = further( c1 )) ) [[unlikely]]
return ret( err );
c32 = juice( c0, 5 ) << 6 | juice( c1, 6 );
if( c32 > UNICODE_MAX) [[unlikely]]
return ret( err_t::ENCODING_ERROR );
}
else if( (c0 & 0xF0) == 0xE0 ) [[likely]]
{
if( (bool)(err = further( c1, c2 )) ) [[unlikely]]
return ret( err );
c32 = juice( c0, 4 ) << 12 | juice( c1, 6 ) << 6 | juice( c2, 6 );
if( c32 > UNICODE_MAX ) [[unlikely]]
return ret( err_t::ENCODING_ERROR );
}
else
{
if( (c0 & 0xF8) != 0xF0 ) [[unlikely]]
return ret( err_t::ENCODING_ERROR );
if( (bool)(err = further( c1, c2, c3 )) ) [[unlikely]]
return ret( err );
c32 = juice( c0, 3 ) << 18 | juice( c1, 6 ) << 12 | juice( c2, 6 ) <<
6 | juice( c3, 6 );
if( c32 > UNICODE_MAX ) [[unlikely]]
return ret( err_t::ENCODING_ERROR );
}
return utf8_exception::err_t::NO_ERROR;
}