Reviewers: Nico
CL:
https://codereview.chromium.org/1443483003/
Message:
PTAL
Description:
Allow higher unicode characters in XMB files.
The XMB tool has a regex of invalid XML characters, which erroneously
contained all unicode characters in the supplementary planes (U+10000 to
U+10FFFF). The tool would silently replace these characters with spaces
when generating XMB files, which caused problems recently when an emoji
character was added to a grd file. The translation console supports
these characters, so GRIT should too.
The XMB tool now supports these characters, and now raises an exception
if an invalid character is used (instead of silently replacing it with a
space).
BUG=498288
Base URL: svn://
svn.chromium.org/chrome/trunk/src
Affected files (+16, -5 lines):
M tools/grit/grit/tool/xmb.py
M tools/grit/grit/tool/xmb_unittest.py
Index: tools/grit/grit/tool/xmb.py
diff --git a/tools/grit/grit/tool/xmb.py b/tools/grit/grit/tool/xmb.py
old mode 100755
new mode 100644
index
aaefeecad4b54b554402fb21049d89f22ea30072..0e7950ccde1e237eec79b5e115ff9d7e95ce26f1
--- a/tools/grit/grit/tool/xmb.py
+++ b/tools/grit/grit/tool/xmb.py
@@ -28,8 +28,10 @@ _XML_QUOTE_ESCAPES = {
u"'": u''',
u'"': u'"',
}
+# See
http://www.w3.org/TR/xml/#charsets
_XML_BAD_CHAR_REGEX = lazy_re.compile(u'[^\u0009\u000A\u000D'
- u'\u0020-\uD7FF\uE000-\uFFFD]')
+ u'\u0020-\uD7FF\uE000-\uFFFD'
+ u'\U00010000-\U0010FFFF]')
def _XmlEscape(s):
@@ -40,7 +42,11 @@ def _XmlEscape(s):
if not type(s) == unicode:
s = unicode(s)
result = saxutils.escape(s, _XML_QUOTE_ESCAPES)
- return _XML_BAD_CHAR_REGEX.sub(u'', result).encode('utf-8')
+ illegal_chars = _XML_BAD_CHAR_REGEX.search(result)
+ if illegal_chars:
+ raise Exception('String contains characters disallowed in XML: %s' %
+ repr(result))
+ return result.encode('utf-8')
def _WriteAttribute(file, name, value):
Index: tools/grit/grit/tool/xmb_unittest.py
diff --git a/tools/grit/grit/tool/xmb_unittest.py
b/tools/grit/grit/tool/xmb_unittest.py
old mode 100755
new mode 100644
index
10f81d7cf3edaca1cb7660e04376310fb365201b..df8e84b6200c021c7f2be53849a967b124edefc1
--- a/tools/grit/grit/tool/xmb_unittest.py
+++ b/tools/grit/grit/tool/xmb_unittest.py
@@ -37,18 +37,23 @@ class XmbUnittest(unittest.TestCase):
<message name="IDS_BONGOBINGO">
Yibbee
</message>
+ <message name="IDS_UNICODE">
+ Ol\xe1, \u4eca\u65e5\u306f! \U0001F60A
+ </message>
</messages>
<structures>
<structure type="dialog" name="IDD_SPACYBOX" encoding="utf-16"
file="grit/testdata/klonk.rc" />
</structures>
</release>
- </grit>'''), '.')
+ </grit>'''.encode('utf-8')), '.')
self.xmb_file = StringIO.StringIO()
def testNormalOutput(self):
xmb.OutputXmb().Process(self.res_tree, self.xmb_file)
- output = self.xmb_file.getvalue()
- self.failUnless(output.count('Joi') and output.count('Yibbee'))
+ output = self.xmb_file.getvalue().decode('utf-8')
+ self.failUnless(output.count('Joi'))
+ self.failUnless(output.count('Yibbee'))
+ self.failUnless(output.count(u'Ol\xe1, \u4eca\u65e5\u306f!
\U0001F60A'))
def testLimitList(self):
limit_file = StringIO.StringIO(