Reviewers: Nico
CL:
https://codereview.chromium.org/1424933018/
Message:
PTAL
Description:
Allow higher unicode characters in XMB files.
The XMB tool has a regex of invalid XML characters, which erroneously
contained all unicode characters in the supplementary planes (U+10000 to
U+10FFFF). The tool would silently replace these characters with spaces
when generating XMB files, which caused problems recently when an emoji
character was added to a grd file. The translation console supports
these characters, so GRIT should too.
The XMB tool now supports these characters, and now raises an exception
if an invalid character is used (instead of silently replacing it with a
space).
BUG=554070
Base URL:
http://grit-i18n.googlecode.com/svn/trunk
Affected files (+16, -5 lines):
M grit/tool/xmb.py
M grit/tool/xmb_unittest.py
Index: grit/tool/xmb.py
diff --git a/grit/tool/xmb.py b/grit/tool/xmb.py
index
aaefeecad4b54b554402fb21049d89f22ea30072..0e7950ccde1e237eec79b5e115ff9d7e95ce26f1
100644
--- a/grit/tool/xmb.py
+++ b/grit/tool/xmb.py
@@ -28,8 +28,10 @@ _XML_QUOTE_ESCAPES = {
u"'": u''',
u'"': u'"',
}
+# See
http://www.w3.org/TR/xml/#charsets
_XML_BAD_CHAR_REGEX = lazy_re.compile(u'[^\u0009\u000A\u000D'
- u'\u0020-\uD7FF\uE000-\uFFFD]')
+ u'\u0020-\uD7FF\uE000-\uFFFD'
+ u'\U00010000-\U0010FFFF]')
def _XmlEscape(s):
@@ -40,7 +42,11 @@ def _XmlEscape(s):
if not type(s) == unicode:
s = unicode(s)
result = saxutils.escape(s, _XML_QUOTE_ESCAPES)
- return _XML_BAD_CHAR_REGEX.sub(u'', result).encode('utf-8')
+ illegal_chars = _XML_BAD_CHAR_REGEX.search(result)
+ if illegal_chars:
+ raise Exception('String contains characters disallowed in XML: %s' %
+ repr(result))
+ return result.encode('utf-8')
def _WriteAttribute(file, name, value):
Index: grit/tool/xmb_unittest.py
diff --git a/grit/tool/xmb_unittest.py b/grit/tool/xmb_unittest.py
index
10f81d7cf3edaca1cb7660e04376310fb365201b..df8e84b6200c021c7f2be53849a967b124edefc1
100644
--- a/grit/tool/xmb_unittest.py
+++ b/grit/tool/xmb_unittest.py
@@ -37,18 +37,23 @@ class XmbUnittest(unittest.TestCase):
<message name="IDS_BONGOBINGO">
Yibbee
</message>
+ <message name="IDS_UNICODE">
+ Ol\xe1, \u4eca\u65e5\u306f! \U0001F60A
+ </message>
</messages>
<structures>
<structure type="dialog" name="IDD_SPACYBOX" encoding="utf-16"
file="grit/testdata/klonk.rc" />
</structures>
</release>
- </grit>'''), '.')
+ </grit>'''.encode('utf-8')), '.')
self.xmb_file = StringIO.StringIO()
def testNormalOutput(self):
xmb.OutputXmb().Process(self.res_tree, self.xmb_file)
- output = self.xmb_file.getvalue()
- self.failUnless(output.count('Joi') and output.count('Yibbee'))
+ output = self.xmb_file.getvalue().decode('utf-8')
+ self.failUnless(output.count('Joi'))
+ self.failUnless(output.count('Yibbee'))
+ self.failUnless(output.count(u'Ol\xe1, \u4eca\u65e5\u306f!
\U0001F60A'))
def testLimitList(self):
limit_file = StringIO.StringIO(