fix(ngSanitize): encode surrogate pair properly

The encodeEndities function encode non-alphanumeric characters to entities with charCodeAt.
charCodeAt does not return one value when their unicode codeponts is higher than 65,356.
It returns surrogate pair, and this is why the Emoji which has higher codepoints is garbled.
We need to handle them properly.

Closes #5088
Closes #6911
This commit is contained in:
Yutaka Yamaguchi
2014-03-30 06:45:36 +09:00
committed by Caitlin Potter
parent 8d18038301
commit 627b0354ec
2 changed files with 11 additions and 0 deletions

View File

@@ -161,6 +161,7 @@ var START_TAG_REGEXP =
COMMENT_REGEXP = /<!--(.*?)-->/g,
DOCTYPE_REGEXP = /<!DOCTYPE([^>]*?)>/i,
CDATA_REGEXP = /<!\[CDATA\[(.*?)]]>/g,
SURROGATE_PAIR_REGEXP = /[\uD800-\uDBFF][\uDC00-\uDFFF]/g,
// Match everything outside of normal chars and " (quote character)
NON_ALPHANUMERIC_REGEXP = /([^\#-~| |!])/g;
@@ -399,6 +400,11 @@ function decodeEntities(value) {
function encodeEntities(value) {
return value.
replace(/&/g, '&amp;').
replace(SURROGATE_PAIR_REGEXP, function (value) {
var hi = value.charCodeAt(0);
var low = value.charCodeAt(1);
return '&#' + (((hi - 0xD800) * 0x400) + (low - 0xDC00) + 0x10000) + ';';
}).
replace(NON_ALPHANUMERIC_REGEXP, function(value){
return '&#' + value.charCodeAt(0) + ';';
}).

View File

@@ -241,6 +241,11 @@ describe('HTML', function() {
expect(html).toEqual('<div>');
});
it('should handle surrogate pair', function() {
writer.chars(String.fromCharCode(55357, 56374));
expect(html).toEqual('&#128054;');
});
describe('explicitly disallow', function() {
it('should not allow attributes', function() {
writer.start('div', {id:'a', name:'a', style:'a'});