Google Groups no longer supports new Usenet posts or subscriptions. Historical content remains viewable.
Dismiss

WideCharToMultiByte and utf-8

532 views
Skip to first unread message

J1mlad

unread,
Sep 5, 2006, 8:38:02 AM9/5/06
to
Hi

I'm trying to use WideCharToMultiByte to convert some wide char arrays to
utf8.

I have a C program that works fine and I'm trying to replicate this in VB as
I need to do this in some VBA code on data in some access tables.

I've written 2 noddy programs, one in C, the other in VB - the C prog works
but the VB one fails on the call to WideCharToMultiByte with err being set to
ERROR_INVALID_PARAMETER.

all i'm initially trying to do is convert the copyright sign from its
unicode value 0x00a9 to its utf8 value 0xc2 0xa9. this works in the C code
fine.

I'm sure its something daft i'm doing wrong probably with the parameters -
could anyone please help - i'm new to VB so please bear with me...

C code below:

#include "stdafx.h"
#include <windows.h>

int main(int argc, char* argv[])
{
wchar_t lang_test[2]=L"";
int res;
int failcode=0;
char temp_conv_str[4]="";

lang_test[0] = 0x00a9;

res = WideCharToMultiByte(CP_UTF8, 0, lang_test, -1, temp_conv_str, 4, 0,
0);

if (res == 0)
{
failcode = GetLastError();
}

return 1;
}

VB code below - ignore any strings being pass from text objects, i'm just
hard stuffing the unicode value into the 16bit buffer using UTF16Buffer(0) =
&HA9


Private Declare Function VarPtrArray Lib "msvbvm60.dll" Alias "VarPtr" _
(Var() As Any) As Long

Private Declare Function lstrlenW Lib "kernel32" (lpString As Any) As Long

Private Declare Function WideCharToMultiByte _
Lib "kernel32" (ByVal CodePage As Long, _
ByVal dwFlags As Long, _
ByRef lpWideCharStr As Long, _
ByVal cchWideChar As Long, _
ByRef lpMultiByteStr As Long, _
ByVal cchMultiByte As Long, _
ByVal lpDefaultChar As String, _
ByVal lpUsedDefaultChar As Long) _
As Long


Private Const CP_ACP = 0 'CODE PAGE FOR CP_ACP
Private Const CP_UTF8 = 65001 'CODE PAGE FOR UTF8
Private Const WC_DEFAULTCHAR = 64

Private Function String_to_UTF8(strInput As String) As String

Dim res As Long
Dim UTF8Buffer() As Byte
Dim UTF16Buffer() As Integer
Dim Length As Long
Dim sRV As String
Dim addr1, addr2, addr3, add4 As Long

Length = 4

ReDim UTF16Buffer(0 To Length)
ReDim UTF8Buffer(0 To Length)


addr1 = VarPtrArray(UTF16Buffer())
addr2 = VarPtrArray(UTF8Buffer())

UTF16Buffer(0) = &HA9

res = WideCharToMultiByte(CP_UTF8, 0, addr1, -1, addr2, Length, 0, 0)

String_to_UTF8 = StrConv(sRV, vbUnicode)

End Function

Private Sub Command1_Click()
Text2 = String_to_UTF8(Text1)
End Sub

Any help would be muchly appreciated :o)

Jim

Dave O.

unread,
Sep 5, 2006, 10:11:33 AM9/5/06
to
Hi,

You have got confused haven't you?

In the line Dim addr1, addr2, addr3, add4 As Long
only add4 is declared as Long, all the others are Variants. Anyway addr3 and
add4 are never used (in this snippet)

In the line: String_to_UTF8 = StrConv(sRV, vbUnicode)
the variable sRV is empty, you declare it but never assign a value to it.

Are CP_UTF8 & CP_UTF8Buffer meant to be the same?

You need to have "Option Explicit" at the start of your form code. On the VB
tools menu on the editor tab select "Require Variable Declaration" this will
automatically put Option Explicit on all NEW forms, you'll need to add it by
hand to any existing ones. Why the default for this is off is one of those
idiotic little mysteries.

Best Regards
Dave O.

"J1mlad" <J1m...@discussions.microsoft.com> wrote in message
news:F3A9B376-10AE-4F93...@microsoft.com...

Mike D Sutton

unread,
Sep 5, 2006, 10:27:31 AM9/5/06
to
> I'm trying to use WideCharToMultiByte to convert some wide char arrays to
> utf8.
>
> I have a C program that works fine and I'm trying to replicate this in VB as
> I need to do this in some VBA code on data in some access tables.
>
> I've written 2 noddy programs, one in C, the other in VB - the C prog works
> but the VB one fails on the call to WideCharToMultiByte with err being set to
> ERROR_INVALID_PARAMETER.
>
> all i'm initially trying to do is convert the copyright sign from its
> unicode value 0x00a9 to its utf8 value 0xc2 0xa9. this works in the C code
> fine.
>
> I'm sure its something daft i'm doing wrong probably with the parameters -
> could anyone please help - i'm new to VB so please bear with me...
<code snipped>

Here's a couple of functions to perform UTF-8 conversion in VB:

'***
Private Declare Function WideCharToMultiByte Lib "Kernel32.dll" ( _
ByVal CodePage As Long, ByVal dwFlags As Long, ByVal lpWideCharStr As Long, _
ByVal cchWideChar As Long, ByVal lpMultiByteStr As String, ByVal cbMultiByte As Long, _
ByVal lpDefaultChar As String, ByRef lpUsedDefaultChar As Long) As Long
Private Declare Function MultiByteToWideChar Lib "Kernel32.dll" ( _
ByVal CodePage As Long, ByVal dwFlags As Long, ByVal lpMultiByteStr As String, _
ByVal cbMultiByte As Long, ByVal lpWideCharStr As Long, ByVal cchWideChar As Long) As Long

Private Const CP_UTF8 As Long = 65001 ' UTF-8 translation

Public Function ToUTF8(ByRef inString As String) As String
Dim BufLen As Long

BufLen = WideCharToMultiByte(CP_UTF8, 0&, ByVal StrPtr(inString), _
Len(inString), vbNullString, 0&, vbNullString, ByVal 0&)
If (BufLen > 0) Then
ToUTF8 = Space$(BufLen)
Call WideCharToMultiByte(CP_UTF8, 0&, ByVal StrPtr(inString), _
Len(inString), ToUTF8, BufLen, vbNullString, ByVal 0&)
End If
End Function

Public Function FromUTF8(ByRef inString As String) As String
Dim BufLen As Long

BufLen = MultiByteToWideChar(CP_UTF8, 0&, inString, -1, 0&, 0&)
If (BufLen > 0) Then
FromUTF8 = Space$(BufLen)
BufLen = MultiByteToWideChar(CP_UTF8, 0&, inString, _
Len(inString), ByVal StrPtr(FromUTF8), BufLen)
FromUTF8 = Left$(FromUTF8, BufLen) ' Trim null
End If
End Function
'***

You would then use them like this:

'***
Dim BaseString As String, UTF8String As String, UniString As String

BaseString = Chr$(&HA9)
UTF8String = ToUTF8(BaseString)
UniString = FromUTF8(UTF8String)
'***

Hope this helps,

Mike


- Microsoft Visual Basic MVP -
E-Mail: ED...@mvps.org
WWW: Http://EDais.mvps.org/


Dave O.

unread,
Sep 5, 2006, 10:43:17 AM9/5/06
to
oops, ignore the line:

Are CP_UTF8 & CP_UTF8Buffer meant to be the same?

You declared CP_UTF8 at the start.

Regards
Dave O.

"Dave O." <nob...@nowhere.com> wrote in message
news:OBnc$UP0GH...@TK2MSFTNGP05.phx.gbl...

J1mlad

unread,
Sep 5, 2006, 4:31:02 PM9/5/06
to
Thanks Mike

Looks like spot on what i'm after thanks - i'll try to pulmb it into my vba
tomorrow.

Thanks again for the speedy response

Jim

Tony Proctor

unread,
Sep 6, 2006, 10:49:38 AM9/6/06
to
I would strongly recommend you don't put UTF-8 character codes in String
variables Mike. Beside being techically wrong, it can lead to several
different problems.

Here's an alternative set of functions:
http://groups.google.ie/group/microsoft.public.vb.general.discussion/msg/3b57455268b5f817?hl=en&

Tony Proctor

"Mike D Sutton" <ED...@mvps.org> wrote in message
news:#9mdCeP0...@TK2MSFTNGP03.phx.gbl...

Mike D Sutton

unread,
Sep 6, 2006, 11:25:23 AM9/6/06
to
>I would strongly recommend you don't put UTF-8 character codes in String
> variables Mike. Beside being techically wrong, it can lead to several
> different problems.

Ah yes, good point. Here's a conversion of my original routines to use a Byte() rather than a String for the UTF-8
buffers:

'***
Private Declare Function WideCharToMultiByteBuf Lib "Kernel32.dll" Alias "WideCharToMultiByte" ( _


ByVal CodePage As Long, ByVal dwFlags As Long, ByVal lpWideCharStr As Long, _

ByVal cchWideChar As Long, ByRef lpMultiByteStr As Any, ByVal cbMultiByte As Long, _


ByVal lpDefaultChar As String, ByRef lpUsedDefaultChar As Long) As Long

Private Declare Function MultiByteToWideCharBuf Lib "Kernel32.dll" Alias "MultiByteToWideChar" ( _
ByVal CodePage As Long, ByVal dwFlags As Long, ByRef lpMultiByteStr As Any, _


ByVal cbMultiByte As Long, ByVal lpWideCharStr As Long, ByVal cchWideChar As Long) As Long

Private Const CP_UTF8 As Long = 65001 ' UTF-8 translation

Public Function ToUTF8Buf(ByRef inString As String, ByRef outBytes() As Byte) As Long
Dim BufLen As Long
Dim RetBuf() As Byte

BufLen = WideCharToMultiByteBuf(CP_UTF8, 0&, ByVal StrPtr(inString), _
Len(inString), ByVal 0&, 0&, vbNullString, ByVal 0&)
If (BufLen > 0) Then
ReDim RetBuf(0 To BufLen - 1) As Byte
Call WideCharToMultiByteBuf(CP_UTF8, 0&, ByVal StrPtr(inString), _
Len(inString), RetBuf(0), BufLen, vbNullString, ByVal 0&)
outBytes = RetBuf
ToUTF8Buf = BufLen
End If
End Function

Public Function FromUTF8Buf(ByRef inBytes() As Byte, _
Optional ByVal inLength As Long = 0) As String
Dim BufLen As Long
Dim UseLen As Long

If (inLength <= 0) Then
On Error Resume Next
UseLen = (UBound(inBytes()) - LBound(inBytes())) + 1
On Error GoTo 0
Else
UseLen = inLength
End If

If (UseLen > 0) Then
BufLen = MultiByteToWideCharBuf(CP_UTF8, 0&, inBytes(0), -1, 0&, 0&)
If (BufLen > 0) Then
FromUTF8Buf = Space$(BufLen)
BufLen = MultiByteToWideCharBuf(CP_UTF8, 0&, inBytes(0), _
UseLen, ByVal StrPtr(FromUTF8Buf), BufLen)
FromUTF8Buf = Left$(FromUTF8Buf, BufLen) ' Trim null
End If
End If
End Function
'***

They are used in much the same way as the originals:

'***
Dim BaseString As String, UTF8String() As Byte, UniString As String
Dim UTF8Len As Long

BaseString = Chr$(&HA9)
UTF8Len = ToUTF8Buf(BaseString, UTF8String())
UniString = FromUTF8Buf(UTF8String())
'***

Note: UTF8Len could be passed to FromUTF8Buf() to slightly speed the routine up, however if no length is passed, it will
convert eh entire buffer it's passed.

0 new messages