[PATCH 4/4] Warn if the Windows console font doesn't support Unicode

701 views
Skip to first unread message

Karsten Blees

unread,
Jul 30, 2010, 8:04:03 PM7/30/10
to msy...@googlegroups.com
Unicode console output won't display correctly with default settings
because the default console font ("Terminal") only supports the system's
OEM charset. Unfortunately, this is a user specific setting, so it cannot
be easily fixed by e.g. some registry tricks in the setup program.

This change prints a warning on exit if console output contained non-ascii
characters and the console font is supposedly not a TrueType font (which
usually have decent Unicode support).

Signed-off-by: Karsten Blees <bl...@dcon.de>
---
compat/winansi.c | 56 ++++++++++++++++++++++++++++++++++++++++++++++++++++++
1 files changed, 56 insertions(+), 0 deletions(-)

diff --git a/compat/winansi.c b/compat/winansi.c
index c4be401..a5ca2d9 100644
--- a/compat/winansi.c
+++ b/compat/winansi.c
@@ -4,6 +4,8 @@

#include "../git-compat-util.h"
#include <malloc.h>
+#include <wingdi.h>
+#include <winreg.h>

/*
Functions to be wrapped:
@@ -26,6 +28,54 @@ static WORD plain_attr;
static WORD attr;
static int negative;
static FILE *last_stream = NULL;
+static int non_ascii_used = 0;
+
+typedef struct _CONSOLE_FONT_INFOEX {
+ ULONG cbSize;
+ DWORD nFont;
+ COORD dwFontSize;
+ UINT FontFamily;
+ UINT FontWeight;
+ WCHAR FaceName[LF_FACESIZE];
+} CONSOLE_FONT_INFOEX, *PCONSOLE_FONT_INFOEX;
+
+typedef BOOL (WINAPI *PGETCURRENTCONSOLEFONTEX)(HANDLE, BOOL,
+ PCONSOLE_FONT_INFOEX);
+
+static void warn_if_raster_font(void)
+{
+ DWORD fontFamily = 0;
+ PGETCURRENTCONSOLEFONTEX pGetCurrentConsoleFontEx;
+
+ /* don't bother if output was ascii only */
+ if (!non_ascii_used)
+ return;
+
+ /* GetCurrentConsoleFontEx is available since Vista */
+ pGetCurrentConsoleFontEx = GetProcAddress(GetModuleHandle("kernel32.dll"),
+ "GetCurrentConsoleFontEx");
+ if (pGetCurrentConsoleFontEx) {
+ CONSOLE_FONT_INFOEX cfi;
+ cfi.cbSize = sizeof(cfi);
+ if (pGetCurrentConsoleFontEx(console, 0, &cfi))
+ fontFamily = cfi.FontFamily;
+ } else {
+ /* pre-Vista: check default console font in registry */
+ HKEY hkey;
+ if (ERROR_SUCCESS == RegOpenKeyExA(HKEY_CURRENT_USER, "Console", 0,
+ KEY_READ, &hkey)) {
+ DWORD size = sizeof(fontFamily);
+ RegQueryValueExA(hkey, "FontFamily", NULL, NULL,
+ (LPVOID) &fontFamily, &size);
+ RegCloseKey(hkey);
+ }
+ }
+
+ if (!(fontFamily & TMPF_TRUETYPE))
+ warning("Your console font probably doesn\'t support "
+ "Unicode. If you experience strange characters in the output, "
+ "consider switching to a TrueType font such as Lucida Console!");
+}

static int is_console(FILE *stream)
{
@@ -54,6 +104,8 @@ static int is_console(FILE *stream)
attr = plain_attr = sbi.wAttributes;
negative = 0;
initialized = 1;
+ /* check console font on exit */
+ atexit(warn_if_raster_font);
}

console = hcon;
@@ -69,6 +121,10 @@ static int write_console(const char *str, size_t len)

WriteConsoleW(console, wbuf, wlen, NULL, NULL);

+ /* remember if non-ascii characters are printed */
+ if (wlen != len)
+ non_ascii_used = 1;
+
/* return original (utf-8 encoded) length */
return len;
}
--
1.7.0.2.msysgit.0.4.g34afc.dirty

Karsten Blees

unread,
Jul 30, 2010, 8:04:01 PM7/30/10
to msy...@googlegroups.com
WriteConsoleW seems to be the only way to reliably print unicode to the
console (without weird code page conversions).

Also redirects vfprintf to the winansi.c version.

Signed-off-by: Karsten Blees <bl...@dcon.de>
---

compat/mingw.h | 2 ++
compat/winansi.c | 26 ++++++++++++++++++++------
2 files changed, 22 insertions(+), 6 deletions(-)

diff --git a/compat/mingw.h b/compat/mingw.h
index 4650d8a..9dd7f16 100644
--- a/compat/mingw.h
+++ b/compat/mingw.h
@@ -250,9 +250,11 @@ sig_handler_t mingw_signal(int sig, sig_handler_t handler);
int winansi_fputs(const char *str, FILE *stream);
int winansi_printf(const char *format, ...) __attribute__((format (printf, 1, 2)));
int winansi_fprintf(FILE *stream, const char *format, ...) __attribute__((format (printf, 2, 3)));
+int winansi_vfprintf(FILE *stream, const char *format, va_list list);
#define fputs winansi_fputs
#define printf(...) winansi_printf(__VA_ARGS__)
#define fprintf(...) winansi_fprintf(__VA_ARGS__)
+#define vfprintf winansi_vfprintf

/*
* git specific compatibility
diff --git a/compat/winansi.c b/compat/winansi.c
index dedce21..abe0fea 100644
--- a/compat/winansi.c
+++ b/compat/winansi.c
@@ -3,6 +3,7 @@
*/

#include "../git-compat-util.h"
+#include <malloc.h>



/*
Functions to be wrapped:

@@ -10,6 +11,7 @@
#undef printf
#undef fprintf
#undef fputs
+#undef vfprintf
/* TODO: write */

/*
@@ -46,6 +48,18 @@ static void init(void)
initialized = 1;
}

+static int write_console(const char *str, size_t len)
+{
+ /* convert utf-8 to utf-16, write directly to console */
+ int wlen = MultiByteToWideChar(CP_UTF8, 0, str, len, NULL, 0);
+ wchar_t *wbuf = (wchar_t *) alloca(wlen * sizeof(wchar_t));
+ MultiByteToWideChar(CP_UTF8, 0, str, len, wbuf, wlen);
+
+ WriteConsoleW(console, wbuf, wlen, NULL, NULL);
+


+ /* return original (utf-8 encoded) length */

+ return len;
+}

#define FOREGROUND_ALL (FOREGROUND_RED | FOREGROUND_GREEN | FOREGROUND_BLUE)
#define BACKGROUND_ALL (BACKGROUND_RED | BACKGROUND_GREEN | BACKGROUND_BLUE)
@@ -245,13 +259,15 @@ static int ansi_emulate(const char *str, FILE *stream)
int rv = 0;
const char *pos = str;

+ fflush(stream);
+
while (*pos) {
pos = strstr(str, "\033[");
if (pos) {
size_t len = pos - str;

if (len) {
- size_t out_len = fwrite(str, 1, len, stream);
+ size_t out_len = write_console(str, len);
rv += out_len;
if (out_len < len)
return rv;
@@ -260,14 +276,12 @@ static int ansi_emulate(const char *str, FILE *stream)
str = pos + 2;
rv += 2;

- fflush(stream);
-
pos = set_attr(str);
rv += pos - str;
str = pos;
} else {
- rv += strlen(str);
- fputs(str, stream);
+ size_t len = strlen(str);
+ rv += write_console(str, len);
return rv;
}
}
@@ -294,7 +308,7 @@ int winansi_fputs(const char *str, FILE *stream)
return EOF;
}

-static int winansi_vfprintf(FILE *stream, const char *format, va_list list)
+int winansi_vfprintf(FILE *stream, const char *format, va_list list)
{
int len, rv;
char small_buf[256];
--
1.7.0.2.msysgit.0.4.g34afc.dirty

Karsten Blees

unread,
Jul 30, 2010, 8:04:00 PM7/30/10
to msy...@googlegroups.com
Git requires the TERM environment variable to be set for all color*
settings. Simulate the TERM variable if it is not set (default on Windows).

Signed-off-by: Karsten Blees <bl...@dcon.de>
---

compat/mingw.c | 15 ++++++++++-----
1 files changed, 10 insertions(+), 5 deletions(-)

diff --git a/compat/mingw.c b/compat/mingw.c
index 0df1a3d..0a4a346 100644
--- a/compat/mingw.c
+++ b/compat/mingw.c
@@ -613,11 +613,16 @@ char *mingw_getcwd(char *pointer, int len)
char *mingw_getenv(const char *name)
{
char *result = getenv(name);
- if (!result && !strcmp(name, "TMPDIR")) {
- /* on Windows it is TMP and TEMP */
- result = getenv("TMP");
- if (!result)
- result = getenv("TEMP");
+ if (!result) {
+ if (!strcmp(name, "TMPDIR")) {
+ /* on Windows it is TMP and TEMP */
+ result = getenv("TMP");
+ if (!result)
+ result = getenv("TEMP");
+ } else if (!strcmp(name, "TERM")) {
+ /* simulate TERM to enable auto-color (see color.c) */
+ result = "winansi";
+ }
}
return result;
}
--
1.7.0.2.msysgit.0.4.g34afc.dirty

Erik Faye-Lund

unread,
Dec 20, 2010, 7:13:06 AM12/20/10
to Karsten Blees, Johannes Schindelin, msy...@googlegroups.com
On Sat, Jul 31, 2010 at 2:04 AM, Karsten Blees <bl...@dcon.de> wrote:
> Unicode console output won't display correctly with default settings
> because the default console font ("Terminal") only supports the system's
> OEM charset. Unfortunately, this is a user specific setting, so it cannot
> be easily fixed by e.g. some registry tricks in the setup program.
>
> This change prints a warning on exit if console output contained non-ascii
> characters and the console font is supposedly not a TrueType font (which
> usually have decent Unicode support).
>
> Signed-off-by: Karsten Blees <bl...@dcon.de>
> ---

<snip>


+typedef struct _CONSOLE_FONT_INFOEX {
+ ULONG cbSize;
+ DWORD nFont;
+ COORD dwFontSize;
+ UINT FontFamily;
+ UINT FontWeight;
+ WCHAR FaceName[LF_FACESIZE];
+} CONSOLE_FONT_INFOEX, *PCONSOLE_FONT_INFOEX;

LF_FACESIZE isn't defined in the MSVC-version of wingdi.h if NOGDI is
defined, which we currently define (both for MSVC and MinGW builds).
Dscho seems to have had a good reason for doing this in c2822f9
(preventing a name clash).

> +       if (!(fontFamily & TMPF_TRUETYPE))
> +               warning("Your console font probably doesn\'t support "
> +                       "Unicode. If you experience strange characters in the output, "
> +                       "consider switching to a TrueType font such as Lucida Console!");
> +}

Where do you have TMPF_TRUETYPE from? It's not documented in MSDN for
the CONSOLE_FONT_INFOEX structure[1], but it IS documented for the
TEXTMETRIC structure[2]. Relying on undocumented functionality is a
bit risky, IMO...

[1]: http://msdn.microsoft.com/en-us/library/ms682069(v=VS.85).aspx
[2]: http://msdn.microsoft.com/en-us/library/dd145132(v=vs.85).aspx

By the way, this is also guarded by NOGDI in wingdi.h.

Undefining NOGDI and CONSOLE_FONT_INFOEX in winansi.c helps to get
this to compile on MSVC:

---8<---
--- a/compat/winansi.c
+++ b/compat/winansi.c
@@ -1,7 +1,7 @@
/*
* Copyright 2008 Peter Harris <g...@peter.is-a-geek.org>
*/
-
+#undef NOGDI
#include "../git-compat-util.h"
#include <malloc.h>
#include <wingdi.h>
@@ -30,15 +30,6 @@ static int negative;


static FILE *last_stream = NULL;

static int non_ascii_used = 0;

-typedef struct _CONSOLE_FONT_INFOEX {
- ULONG cbSize;
- DWORD nFont;
- COORD dwFontSize;
- UINT FontFamily;
- UINT FontWeight;
- WCHAR FaceName[LF_FACESIZE];
-} CONSOLE_FONT_INFOEX, *PCONSOLE_FONT_INFOEX;
-
typedef BOOL (WINAPI *PGETCURRENTCONSOLEFONTEX)(HANDLE, BOOL,
PCONSOLE_FONT_INFOEX);
---8<---

Erik Faye-Lund

unread,
Dec 20, 2010, 7:18:40 AM12/20/10
to Karsten Blees, Johannes Schindelin, msy...@googlegroups.com

... but breaks on MinGW. Meeeh.

karste...@dcon.de

unread,
Dec 21, 2010, 7:15:33 AM12/21/10
to kusm...@gmail.com, Karsten Blees, Johannes Schindelin, msy...@googlegroups.com

Erik Faye-Lund <kusm...@gmail.com> wrote on 20.12.2010 13:18:40:

> > <snip>
> > +typedef struct _CONSOLE_FONT_INFOEX {
> > +       ULONG cbSize;
> > +       DWORD nFont;
> > +       COORD dwFontSize;
> > +       UINT FontFamily;
> > +       UINT FontWeight;
> > +       WCHAR FaceName[LF_FACESIZE];
> > +} CONSOLE_FONT_INFOEX, *PCONSOLE_FONT_INFOEX;
> >
> > LF_FACESIZE isn't defined in the MSVC-version of wingdi.h if NOGDI is
> > defined, which we currently define (both for MSVC and MinGW builds).
> > Dscho seems to have had a good reason for doing this in c2822f9
> > (preventing a name clash).
> >

As this was introduced in Vista, I think the typedef should be guarded by #if(WIN32_WINNT >= 0x600) in wincon.h, but isn't...bah.
I've added #undef NOGDI and #ifdef __MINGW32__ around the typedef in the newest thread safe version (see my unicode patches). I don't have git/MSVC build environment set up, so I can't test this.

> >> +       if (!(fontFamily & TMPF_TRUETYPE))
> >> +               warning("Your console font probably doesn\'t support "
> >> +                       "Unicode. If you experience strange
> characters in the output, "
> >> +                       "consider switching to a TrueType font
> such as Lucida Console!");
> >> +}
> >
> > Where do you have TMPF_TRUETYPE from? It's not documented in MSDN for
> > the CONSOLE_FONT_INFOEX structure[1], but it IS documented for the
> > TEXTMETRIC structure[2]. Relying on undocumented functionality is a
> > bit risky, IMO...
> >
> > [1]:
http://msdn.microsoft.com/en-us/library/ms682069(v=VS.85).aspx
> > [2]:
http://msdn.microsoft.com/en-us/library/dd145132(v=vs.85).aspx
> >


Trial and error and a bit of a guess. My Win7 machine returns 0 for raster and 0x36 for true type (which is exactly what is stored in the FontFamily registry value, also on XP). So the MSDN documentation is obviously incorrect (says: "This parameter can be one of the following values", not "Bits 4-7 can be..."). Looking for other uses of the font family constants, I found LOGFONT.lfPitchAndFamily (cannot produce 0x36 either) and TEXTMETRIC.tmPitchAndFamily (fits nicely).

If we want to stick with fully documented functionality, I think much of msysgit code is doomed :-)

Btw. the version you are referring to will always print the font warning on Vista/7. Thats because git closes stdout (and thus the console handle) before the atexit routine is called. I've posted a fix for this in the original mailing list thread, the current thread safe console version also doesn't have this problem.

Bye,
Karsten

Erik Faye-Lund

unread,
Dec 21, 2010, 1:08:55 PM12/21/10
to karste...@dcon.de, Karsten Blees, Johannes Schindelin, msy...@googlegroups.com
On Tue, Dec 21, 2010 at 1:15 PM, <karste...@dcon.de> wrote:
>
> Erik Faye-Lund <kusm...@gmail.com> wrote on 20.12.2010 13:18:40:
>
>> > <snip>
>> > +typedef struct _CONSOLE_FONT_INFOEX {
>> > +       ULONG cbSize;
>> > +       DWORD nFont;
>> > +       COORD dwFontSize;
>> > +       UINT FontFamily;
>> > +       UINT FontWeight;
>> > +       WCHAR FaceName[LF_FACESIZE];
>> > +} CONSOLE_FONT_INFOEX, *PCONSOLE_FONT_INFOEX;
>> >
>> > LF_FACESIZE isn't defined in the MSVC-version of wingdi.h if NOGDI is
>> > defined, which we currently define (both for MSVC and MinGW builds).
>> > Dscho seems to have had a good reason for doing this in c2822f9
>> > (preventing a name clash).
>> >
>
> As this was introduced in Vista, I think the typedef should be guarded by
> #if(WIN32_WINNT >= 0x600) in wincon.h, but isn't...bah.
> I've added #undef NOGDI and #ifdef __MINGW32__ around the typedef in the
> newest thread safe version (see my unicode patches). I don't have git/MSVC
> build environment set up, so I can't test this.
>

Yeah, that's pretty much what I ended up doing locally here as well.
I'll try to compile your branch a bit later and see what happens.
Unless I forget :P

>> >> +       if (!(fontFamily & TMPF_TRUETYPE))
>> >> +               warning("Your console font probably doesn\'t support "
>> >> +                       "Unicode. If you experience strange
>> characters in the output, "
>> >> +                       "consider switching to a TrueType font
>> such as Lucida Console!");
>> >> +}
>> >
>> > Where do you have TMPF_TRUETYPE from? It's not documented in MSDN for
>> > the CONSOLE_FONT_INFOEX structure[1], but it IS documented for the
>> > TEXTMETRIC structure[2]. Relying on undocumented functionality is a
>> > bit risky, IMO...
>> >
>> > [1]: http://msdn.microsoft.com/en-us/library/ms682069(v=VS.85).aspx
>> > [2]: http://msdn.microsoft.com/en-us/library/dd145132(v=vs.85).aspx
>> >
>
> Trial and error and a bit of a guess. My Win7 machine returns 0 for raster
> and 0x36 for true type (which is exactly what is stored in the FontFamily
> registry value, also on XP). So the MSDN documentation is obviously
> incorrect (says: "This parameter can be one of the following values", not
> "Bits 4-7 can be..."). Looking for other uses of the font family constants,
> I found LOGFONT.lfPitchAndFamily (cannot produce 0x36 either) and
> TEXTMETRIC.tmPitchAndFamily (fits nicely).
>
> If we want to stick with fully documented functionality, I think much of
> msysgit code is doomed :-)
>

Uh, why? Git for Windows doesn't really use that much exotic APIs...

Generally, using undocumented functionality is asking for trouble with
future Windows versions (or some times even service packs).

But let's be real. This is only a warning, so even IF it starts
breaking with future Windows versions, at least it wouldn't render Git
unusable. Because of that, I'm not going to object about this use ;)

> Btw. the version you are referring to will always print the font warning on
> Vista/7. Thats because git closes stdout (and thus the console handle)
> before the atexit routine is called. I've posted a fix for this in the
> original mailing list thread, the current thread safe console version also
> doesn't have this problem.

Nice, thanks.

Reply all
Reply to author
Forward
0 new messages