Base64Encode/Decode and Unicode

Frank Natoli

unread,

May 14, 2006, 10:26:41 AM5/14/06

to

Most WIN32 functions with string arguments have at least three
declarations, without suffix, with "A" suffix (for ASCII) and with "W"
suffix (for wide/Unicode). Base64Encode/Decode appear to not have such
variants. Am I correct in then assuming that the output string argument
to Base64Encode and input string argument to Base64Decode is eight bit
ASCII, period? And thus if my program is compiled for Unicode, I must
use wcstombs or mbstowcs to adapt internal program strings to these
functions? Thanks.

Carl Daniel [VC++ MVP]

unread,

May 14, 2006, 10:39:47 AM5/14/06

to

Frank Natoli wrote:
> Most WIN32 functions with string arguments have at least three
> declarations, without suffix, with "A" suffix (for ASCII) and with "W"
> suffix (for wide/Unicode).

Really just two - the one with no suffix is a preprocessor macro that
expands to one of the other two based on whether UNICODE (or is it
_UNICODE?) is defined.

> Base64Encode/Decode appear to not have such
> variants. Am I correct in then assuming that the output string
> argument to Base64Encode and input string argument to Base64Decode is
> eight bit ASCII, period? And thus if my program is compiled for
> Unicode, I must use wcstombs or mbstowcs to adapt internal program
> strings to these functions? Thanks.

I'm assuming you're talking about the functions in ATL server- there's no
Win32 API function named Base64Encode/Decode. In the case of the ATL server
functions, then yes, the base64 string is always a narrow string and if
you're using wide strings then you'll need to widen/narrow as appropriate
when calling the base64 functions.

-cd

andy...@gmail.com

unread,

May 15, 2006, 2:35:34 AM5/15/06

to

BOOL Base64Encode(
const BYTE *pbSrcData,
int nSrcLen,
LPSTR szDest,
int *pnDestLen,
DWORD dwFlags=ATL_BASE64_FLAG_NONE) throw()

nSrcLen for what??? Unicode or ASCII ... no difference... i think. You
work with BYTEs not with ASCII chars.
Encode Result you always can convert from ASCII to unicode.

// From Active Template Library.
// atlenc.h MS VC.NET 2003
// Copyright (C) Microsoft Corporation
// All rights reserved.
//
// This source code is only intended as a supplement to the
// Active Template Library Reference and related
// electronic documentation provided with the library.
// See these sources for detailed information regarding the
// Active Template Library product.
/*
#include <crtdbg.h>
#ifndef ATLASSERT
#define ATLASSERT(expr) _ASSERTE(expr)
#endif*/

//=======================================================================
// Base64Encode/Base64Decode
// compliant with RFC 2045
//=======================================================================
//
#define ATL_BASE64_FLAG_NONE 0
#define ATL_BASE64_FLAG_NOPAD 1
#define ATL_BASE64_FLAG_NOCRLF 2
//----------------------------------------------------------------------
int Base64EncodeGetRequiredLength(int nSrcLen, DWORD
dwFlags=ATL_BASE64_FLAG_NONE) throw()
{
int nRet = nSrcLen*4/3;

if ((dwFlags & ATL_BASE64_FLAG_NOPAD) == 0)
nRet += nSrcLen % 3;

int nCRLFs = nRet / 76 + 1;
int nOnLastLine = nRet % 76;

if (nOnLastLine)
{
if (nOnLastLine % 4)
nRet += 4-(nOnLastLine % 4);
}

nCRLFs *= 2;

if ((dwFlags & ATL_BASE64_FLAG_NOCRLF) == 0)
nRet += nCRLFs;

return nRet;
}
//----------------------------------------------------------------------
int Base64DecodeGetRequiredLength(int nSrcLen) throw()
{
return nSrcLen;
}

BOOL Base64Encode(
const BYTE *pbSrcData,
int nSrcLen,
LPSTR szDest,
int *pnDestLen,
DWORD dwFlags=ATL_BASE64_FLAG_NONE) throw()
{
static const char s_chBase64EncodingTable[64] = {
'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L',
'M', 'N', 'O', 'P', 'Q',
'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z', 'a', 'b', 'c',
'd', 'e', 'f', 'g', 'h',
'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't',
'u', 'v', 'w', 'x', 'y',
'z', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', '+', '/'
};

if (!pbSrcData || !szDest || !pnDestLen)
{
return FALSE;
}

if(*pnDestLen < Base64EncodeGetRequiredLength(nSrcLen, dwFlags))
{
// ATLASSERT(FALSE);
return FALSE;
}

int nWritten( 0 );
int nLen1( (nSrcLen/3)*4 );
int nLen2( nLen1/76 );
int nLen3( 19 );

for (int i=0; i<=nLen2; i++)
{
if (i==nLen2)
nLen3 = (nLen1%76)/4;

for (int j=0; j<nLen3; j++)
{
DWORD dwCurr(0);
for (int n=0; n<3; n++)
{
dwCurr |= *pbSrcData++;
dwCurr <<= 8;
}
for (int k=0; k<4; k++)
{
BYTE b = (BYTE)(dwCurr>>26);
*szDest++ = s_chBase64EncodingTable[b];
dwCurr <<= 6;
}
}
nWritten+= nLen3*4;

if ((dwFlags & ATL_BASE64_FLAG_NOCRLF)==0)
{
*szDest++ = '\r';
*szDest++ = '\n';
nWritten+= 2;
}
}

if (nWritten && (dwFlags & ATL_BASE64_FLAG_NOCRLF)==0)
{
szDest-= 2;
nWritten -= 2;
}

nLen2 = nSrcLen%3 ? nSrcLen%3 + 1 : 0;
if (nLen2)
{
DWORD dwCurr(0);
for (int n=0; n<3; n++)
{
if (n<(nSrcLen%3))
dwCurr |= *pbSrcData++;
dwCurr <<= 8;
}
for (int k=0; k<nLen2; k++)
{
BYTE b = (BYTE)(dwCurr>>26);
*szDest++ = s_chBase64EncodingTable[b];
dwCurr <<= 6;
}
nWritten+= nLen2;
if ((dwFlags & ATL_BASE64_FLAG_NOPAD)==0)
{
nLen3 = nLen2 ? 4-nLen2 : 0;
for (int j=0; j<nLen3; j++)
{
*szDest++ = '=';
}
nWritten+= nLen3;
}
}

*pnDestLen = nWritten;
return TRUE;
}
//----------------------------------------------------------------------
int DecodeBase64Char(unsigned int ch) throw()
{
// returns -1 if the character is invalid
// or should be skipped
// otherwise, returns the 6-bit code for the character
// from the encoding table
if (ch >= 'A' && ch <= 'Z')
return ch - 'A' + 0; // 0 range starts at 'A'
if (ch >= 'a' && ch <= 'z')
return ch - 'a' + 26; // 26 range starts at 'a'
if (ch >= '0' && ch <= '9')
return ch - '0' + 52; // 52 range starts at '0'
if (ch == '+')
return 62;
if (ch == '/')
return 63;
return -1;
}
//----------------------------------------------------------------------
BOOL Base64Decode(LPCSTR szSrc, int nSrcLen, BYTE *pbDest, int
*pnDestLen) throw()
{
// walk the source buffer
// each four character sequence is converted to 3 bytes
// CRLFs and =, and any characters not in the encoding table
// are skiped

if (szSrc == NULL || pnDestLen == NULL)
{
// ATLASSERT(FALSE);
return FALSE;
}

LPCSTR szSrcEnd = szSrc + nSrcLen;
int nWritten = 0;

BOOL bOverflow = (pbDest == NULL) ? TRUE : FALSE;

while (szSrc < szSrcEnd)
{
DWORD dwCurr = 0;
int i;
int nBits = 0;
for (i=0; i<4; i++)
{
if (szSrc >= szSrcEnd)
break;
int nCh = DecodeBase64Char(*szSrc);
szSrc++;
if (nCh == -1)
{
// skip this char
i--;
continue;
}
dwCurr <<= 6;
dwCurr |= nCh;
nBits += 6;
}

if(!bOverflow && nWritten + (nBits/8) > (*pnDestLen))
bOverflow = TRUE;

// dwCurr has the 3 bytes to write to the output buffer
// left to right
dwCurr <<= 24-nBits;
for (i=0; i<nBits/8; i++)
{
if(!bOverflow)
{
*pbDest = (BYTE) ((dwCurr & 0x00ff0000) >> 16);
pbDest++;
}
dwCurr <<= 8;
nWritten++;
}
}

*pnDestLen = nWritten;

if(bOverflow)
{
/* if(pbDest != NULL)
ATLASSERT(FALSE);*/

return FALSE;
}

return TRUE;
}
//------------------------------------------------------------

Ulrich Eckhardt

unread,

May 15, 2006, 3:03:36 AM5/15/06

to

Frank Natoli wrote:
> Most WIN32 functions with string arguments have at least three
> declarations, without suffix, with "A" suffix (for ASCII) and with "W"
> suffix (for wide/Unicode).

As mentioned, there are just two functions and a macro redirecting to either
of them.

> Base64Encode/Decode appear to not have such variants.

Take a look at the declaration of the function. If there are two variants,
you always have TCHAR or a derived type (LP[C]TSTR, CString...) in the
arguments. The reason is that depending on the _UNICODE macro, TCHAR is
switched between char and wchar_t. If there is no TCHAR, this doesn't
apply.

Uli