On 01/21/2014 01:18 AM, mokomoji wrote:
https://ko.wikipedia.org/wiki/UTF-8
You need to find bytes greater than 127 (ASCII > 127). If a byte is 128
or higher (BYTE >= 128) then it is BYTE 1 of multi-byte. BYTE 1 tells
you how many bytes total for the multi-byte character.
This script at the bottom of this message uses DEBUG but maybe it will
help. Here it the part which uses the BYTE 1 to determine what to do:
For /F "skip=1 tokens=1,2 delims=: " %%a in (
'FC /b %FileIn% %TmpFile%.fc') Do (
Set /A "byte=0x%%b"
If !byte! LSS 0x80 ( REM ASCII, Byte 1 of 1
Set /A "U=byte, b=1, n=1"
) Else If !byte! LSS 0xC0 ( REM Byte 2, 3 or 4
Set /A "U<<=6, byte&=0x3F, U|=byte, b+=1"
) Else If !byte! LSS 0xC2 ( REM Overlong
Echo.%Me%: Aborting. Overlong encoding of ASCII character at %%ah. >&2
Exit /B 1
) Else If !byte! LSS 0xE0 ( REM Byte 1 of 2
Set /A "U=byte&0x1F, n=2, b=1"
) Else If !byte! LSS 0xF0 ( REM Byte 1 of 3
Set /A "U=byte&0x0F, n=3, b=1"
) Else If !byte! LSS 0xF5 ( REM Byte 1 of 4
Set /A "U=byte&0x07, n=4, b=1"
) Else ( REM Restricted or undefined.
Echo.%Me%: Aborting. Restricted or undefined character at %%ah. >&2
Exit /B 2
)
If !b! EQU !n! (
If !U! GTR 0xFFFF (
Set /A "U-=0x10000, UL=U&0x3FF, UL|=0xDC00"
Set /A "U>>=10, U&=0x3FF, U|= 0xD800"
Set /A "L=U&0x00FF, U>>=8, n=0"
TYPE ASCII??.!L! >>%FileOut% 2>NUL:
TYPE ASCII??.!U! >>%FileOut% 2>NUL:
Set /A "L=UL&0x00FF, UL>>=8"
TYPE ASCII??.!L! >>%FileOut% 2>NUL:
TYPE ASCII??.!UL! >>%FileOut% 2>NUL:
) Else (
Set /A "L=U&0x00FF, U>>=8, n=0"
TYPE ASCII??.!L! >>%FileOut% 2>NUL:
TYPE ASCII??.!U! >>%FileOut% 2>NUL:
)
)
)
Frank
:: BEGIN FILE ::::::::::::::::::::::::::::::::::::::::::::::::::::
:: UTF8to16.cmd
:: Write a UTF-16 file from a UTF-8 file.
:: Frank P. Westlake, 2009-07-27
@Echo OFF
SetLocal ENABLEEXTENSIONS ENABLEDELAYEDEXPANSION
If /I "%1" EQU "/?" (
Echo.Writes UTF-16LE from a UTF-8 file.
Echo.
Echo. %0 filein fileout
Echo.
Echo. filein Name of the new UTF-8 file.
Echo. fileout Name of the new UTF-16 file.
Echo.
Echo.Example:
Echo. %0 UTF8.txt UTF16.txt
Goto :EOF
)
Set "Me=%~n0"
Set "FileIn="
Set "FileOut="
:: Alterable environment:
Set "MyDir=%temp%\ASCII"
Set "TmpFile=%TEMP%\%Me%"
:: End alterable environment
If "%1" EQU ":WriteBinaryFiles" (Shift & Goto :WriteBinaryFiles)
:args
If /I "%1" EQU "/NOCI" (
Set "CI="
Shift
) Else If DEFINED FileOut (
Echo.%Me%: Too many filenames. >&2
Goto :EOF
) Else If DEFINED FileIn (
Set "FileOut=%1"
Shift
) Else (
Set "FileIn=%1"
Shift
)
IF "%1" NEQ "" Goto :args
If "%FileIn%" EQU "" (
Set /P "FileIn=%Me%: Please enter the name of the existing UTF-8
file: " >&2
)
If "%FileOut%" EQU "" (
Set /P "FileOut=%Me%: Please enter the name of the new UTF-16 file:
" >&2
)
If "%FileIn%" EQU "" (Echo.%Me%: Aborting. Need input filename. >&2 &
Goto :EOF)
If "%FileOut%" EQU "" (Echo.%Me%: Aborting. Need output filename. >&2
& Goto :EOF)
For %%f in (%FileIn%) Do (Set "FileIn=%%~ff" & Set "fs=%%~zf")
For %%f in (%FileOut%) Do (Set "FileOut=%%~ff")
Set "TmpFile=%TEMP%\%~n0.tmp"
Set "HX=0123456789ABCDEF"
Start "" /wait /MIN %Me% :WriteBinaryFiles %MyDir%
ChDir /d %MyDir%
Set "FSUtil=1"
If NOT EXIST FSUTIL.EXE (
For %%f in (FSUTIL.EXE) Do (
If NOT EXIST %%~$PATH:f (
Set "FSUtil="
)
)
)
If DEFINED FSUTIL (
FSUtil FILE CREATENEW %TmpFile%.fc %fs% >NUL:
) Else (
TYPE NUL: >%TmpFile%.fc
For /L %%i in (1 1 %fs%) Do TYPE ASCII00.0 >>%TmpFile%.fc
)
Set /a b=-1, U=0, n=0
Type NUL: >%FileOut%
For /F "skip=1 tokens=1,2 delims=: " %%a in (
'FC /b %FileIn% %TmpFile%.fc') Do (
Set /A "byte=0x%%b"
If !byte! LSS 0x80 ( REM ASCII, Byte 1 of 1
Set /A "U=byte, b=1, n=1"
) Else If !byte! LSS 0xC0 ( REM Byte 2, 3 or 4
Set /A "U<<=6, byte&=0x3F, U|=byte, b+=1"
) Else If !byte! LSS 0xC2 ( REM Overlong
Echo.%Me%: Aborting. Overlong encoding of ASCII character at %%ah. >&2
Exit /B 1
) Else If !byte! LSS 0xE0 ( REM Byte 1 of 2
Set /A "U=byte&0x1F, n=2, b=1"
) Else If !byte! LSS 0xF0 ( REM Byte 1 of 3
Set /A "U=byte&0x0F, n=3, b=1"
) Else If !byte! LSS 0xF5 ( REM Byte 1 of 4
Set /A "U=byte&0x07, n=4, b=1"
) Else ( REM Restricted or undefined.
Echo.%Me%: Aborting. Restricted or undefined character at %%ah. >&2
Exit /B 2
)
If !b! EQU !n! (
If !U! GTR 0xFFFF (
Set /A "U-=0x10000, UL=U&0x3FF, UL|=0xDC00"
Set /A "U>>=10, U&=0x3FF, U|= 0xD800"
Set /A "L=U&0x00FF, U>>=8, n=0"
TYPE ASCII??.!L! >>%FileOut% 2>NUL:
TYPE ASCII??.!U! >>%FileOut% 2>NUL:
Set /A "L=UL&0x00FF, UL>>=8"
TYPE ASCII??.!L! >>%FileOut% 2>NUL:
TYPE ASCII??.!UL! >>%FileOut% 2>NUL:
) Else (
Set /A "L=U&0x00FF, U>>=8, n=0"
TYPE ASCII??.!L! >>%FileOut% 2>NUL:
TYPE ASCII??.!U! >>%FileOut% 2>NUL:
)
)
)
For %%x in (fc) Do Erase %TmpFile%.%%x
Goto :EOF
:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
:WriteBinaryFiles path
SetLocal
MkDir %MyDir% >NUL: 2>&1
ChDir /d %MyDir%
FOR /L %%i in (0 1 0xFF) Do If NOT EXIST ASCII*.%%i (
Set /A "h1=(%%i&0xF0)>>4, h2=(%%i&0x0F)"
Call Set "h=%%HX:~!h1!,1%%%%HX:~!h2!,1%%"
(
CALL Echo N ascii%%h%%.%%i
CALL Echo E 0000 %%h%%
Echo R CX
Echo 1
Echo W 0
Echo Q
) | DEBUG >NUL:
)
Exit
Goto :EOF
:: END FILE ::::::::::::::::::::::::::::::::::::::::::::::::::::::