kangax wrote:
> Garrett Smith wrote:
> [...]
>> Is this fixed in recent JScript in IE8?
> I'll check this later, will let you know.
If that question was whether \s in a regular expression matches '\u00A0'
in IE 8 then it appears that the answer is no.
>> Question: Why not:-
>> function trimString(s){
>> return s.replace(/^[\s|\xA0]+|[\s|\xA0]+$/g, '');
>> }
> That would work, sure.
<snip>
By "work" I assume you mean that it corrects the fact that some
javascript engines do not match '\u00A0' (the non-breaking space
character) when \s is used in a regular expression. But is this an
attempt to create methods that conform to the ECMA (3rd Ed.)
specification, and thus are consistent across platforms, or is it a
desire to arbitrarily include the non-breaking space in the set of
matched characters for a trim function.
The former seems the more reasonable goal, but in that case this trim
function still falls short as where '\u00A0' is not matched the odds are
extremely good that '\u2003' (EM space), to name but one, is not matched
either. So the above function will still produce inconsistent results
between javascript engines.
The ECMA spec wants \s to match javascript's whit space characters and
it line terminator characters, but the definition of whitespace includes
all of Unicode's Zs group, and JScript, for example, does not match the
majority of those.
Try this out:-
<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN"
"http://www.w3.org/TR/html4/loose.dtd">
<html>
<head>
<title></title>
</head>
<body>
<pre>
<script type="text/javascript">
var WhiteSpace = [
{
cp:"9", codePoint:"0x0009", character :"\u0009",
name:"<control>[ASCII Tab]", group:"Cc"
},
{
cp:"11", codePoint:"0x000B", character :"\u000B",
name:"<control>[ASCII Vertical Tab]", group:"Cc"
},
{
cp:"12", codePoint:"0x000C", character :"\u000C",
name:"<control>[ASCII Form Feed]", group:"Cc"
},
{
cp:"32", codePoint:"0x0020", character :"\u0020",
name:"SPACE", group:"Zs"
},
{
cp:"160", codePoint:"0x00A0", character :"\u00A0",
name:"NO-BREAK SPACE", group:"Zs"
},
{
cp:"5760", codePoint:"0x1680", character :"\u1680",
name:"OGHAM SPACE MARK", group:"Zs"
},
{
cp:"6158", codePoint:"0x180E", character :"\u180E",
name:"MONGOLIAN VOWEL SEPARATOR", group:"Zs"
},
{
cp:"8192", codePoint:"0x2000", character :"\u2000",
name:"EN QUAD", group:"Zs"
},
{
cp:"8193", codePoint:"0x2001", character :"\u2001",
name:"EM QUAD", group:"Zs"
},
{
cp:"8194", codePoint:"0x2002", character :"\u2002",
name:"EN SPACE", group:"Zs"
},
{
cp:"8195", codePoint:"0x2003", character :"\u2003",
name:"EM SPACE", group:"Zs"
},
{
cp:"8196", codePoint:"0x2004", character :"\u2004",
name:"THREE-PER-EM SPACE", group:"Zs"
},
{
cp:"8197", codePoint:"0x2005", character :"\u2005",
name:"FOUR-PER-EM SPACE", group:"Zs"
},
{
cp:"8198", codePoint:"0x2006", character :"\u2006",
name:"SIX-PER-EM SPACE", group:"Zs"
},
{
cp:"8199", codePoint:"0x2007", character :"\u2007",
name:"FIGURE SPACE", group:"Zs"
},
{
cp:"8200", codePoint:"0x2008", character :"\u2008",
name:"PUNCTUATION SPACE", group:"Zs"
},
{
cp:"8201", codePoint:"0x2009", character :"\u2009",
name:"THIN SPACE", group:"Zs"
},
{
cp:"8202", codePoint:"0x200A", character :"\u200A",
name:"HAIR SPACE", group:"Zs"
},
{
cp:"8239", codePoint:"0x202F", character :"\u202F",
name:"NARROW NO-BREAK SPACE", group:"Zs"
},
{
cp:"8287", codePoint:"0x205F", character :"\u205F",
name:"MEDIUM MATHEMATICAL SPACE", group:"Zs"
},
{
cp:"12288",codePoint:"0x3000", character :"\u3000",
name:"IDEOGRAPHIC SPACE", group:"Zs"
}
];
var LineTerminator = [
{
cp:"8232", codePoint:"0x2028", character :"\u2028",
name:"LINE SEPARATOR", group:"Zl"
},
{
cp:"8233", codePoint:"0x2029", character :"\u2029",
name:"PARAGRAPH SEPARATOR", group:"Zp"
},
{
cp:"10", codePoint:"0x000A", character :"\u000A",
name:"<control>[ASCII Line Feed]", group:"Cc"
},
{
cp:"13", codePoint:"0x000D", character :"\u000D",
name:"<control>[ASCII Carriage Return]", group:"Cc"
}
];
var NonWhiteSpace = [
{
cp:"8203", codePoint:"0x200B", character :"\u200B",
name:"ZERO WIDTH SPACE (will be whitespace in ES 3.1)",
group:"Cf"
},
{
cp:"8204", codePoint:"0x200C", character :"\u200C",
name:"ZERO WIDTH NON-JOINER", group:"Cf"
},
{
cp:"8205", codePoint:"0x200D", character :"\u200D",
name:"ZERO WIDTH JOINER", group:"Cf"
},
{
cp:"65279", codePoint:"0xFEFF", character :"\uFEFF",
name:"ZERO WIDTH NO-BREAK SPACE (will be whitespace in ES 3.1)",
group:"Cf"
}
];
var testAr = [WhiteSpace, LineTerminator];
var testRX = /\s/g;
function testWS(chObj){
var char, stripped, matches;
if(!(char = String.fromCharCode(+chObj.cp))){
return 'Anomaly: None empty string is false.\n'
}
/* First verify that the String.fromCharCode result matches the
Unicode escape sequence based string literal for the character.
*/
if(char != chObj.character){
return 'Anomaly in String.String.fromCharCode('+chObj.cp+')\n'
}
if((stripped = char.replace(testRX, '')) == chObj.character){
matches = false;
}else{
matches = true;
}
return (
matches+
'\t"'+stripped+'"\tcp = '+chObj.codePoint+'\tname = '+
chObj.name+' group = '+chObj.group+'\n'
);
}
var c, cp, list, ctr;
document.write('Should be matched by \\s\n');
for(c = 0;c < testAr.length;++c){
list = testAr[c];
for(cp = 0;cp < list.length;++cp){
ctr = list[cp];
document.write(testWS(ctr)
.replace('true', 'GOOD')
.replace('false', 'FAIL'));
}
}
document.write('\n\nMust not be matched by \\s (in ES 3)\n');
for(cp = 0;cp < NonWhiteSpace.length;++cp){
ctr = NonWhiteSpace[cp];
document.write(testWS(ctr)
.replace('true', 'FAIL')
.replace('false', 'GOOD'));
}
</script>
</pre>
</body>
</html>
Richard.