mirror of
https://github.com/zhigang1992/angular.js.git
synced 2026-01-12 22:45:52 +08:00
fix(ngSanitize): encode surrogate pair properly
The encodeEndities function encode non-alphanumeric characters to entities with charCodeAt. charCodeAt does not return one value when their unicode codeponts is higher than 65,356. It returns surrogate pair, and this is why the Emoji which has higher codepoints is garbled. We need to handle them properly. Closes #5088 Closes #6911
This commit is contained in:
committed by
Caitlin Potter
parent
8d18038301
commit
627b0354ec
@@ -161,6 +161,7 @@ var START_TAG_REGEXP =
|
||||
COMMENT_REGEXP = /<!--(.*?)-->/g,
|
||||
DOCTYPE_REGEXP = /<!DOCTYPE([^>]*?)>/i,
|
||||
CDATA_REGEXP = /<!\[CDATA\[(.*?)]]>/g,
|
||||
SURROGATE_PAIR_REGEXP = /[\uD800-\uDBFF][\uDC00-\uDFFF]/g,
|
||||
// Match everything outside of normal chars and " (quote character)
|
||||
NON_ALPHANUMERIC_REGEXP = /([^\#-~| |!])/g;
|
||||
|
||||
@@ -399,6 +400,11 @@ function decodeEntities(value) {
|
||||
function encodeEntities(value) {
|
||||
return value.
|
||||
replace(/&/g, '&').
|
||||
replace(SURROGATE_PAIR_REGEXP, function (value) {
|
||||
var hi = value.charCodeAt(0);
|
||||
var low = value.charCodeAt(1);
|
||||
return '&#' + (((hi - 0xD800) * 0x400) + (low - 0xDC00) + 0x10000) + ';';
|
||||
}).
|
||||
replace(NON_ALPHANUMERIC_REGEXP, function(value){
|
||||
return '&#' + value.charCodeAt(0) + ';';
|
||||
}).
|
||||
|
||||
@@ -241,6 +241,11 @@ describe('HTML', function() {
|
||||
expect(html).toEqual('<div>');
|
||||
});
|
||||
|
||||
it('should handle surrogate pair', function() {
|
||||
writer.chars(String.fromCharCode(55357, 56374));
|
||||
expect(html).toEqual('🐶');
|
||||
});
|
||||
|
||||
describe('explicitly disallow', function() {
|
||||
it('should not allow attributes', function() {
|
||||
writer.start('div', {id:'a', name:'a', style:'a'});
|
||||
|
||||
Reference in New Issue
Block a user