mirror of
https://github.com/zhigang1992/angular.js.git
synced 2026-01-12 22:45:52 +08:00
Fixed sanitization
* explicitly require full URLs (ftp|https?://...) * list the URI attributes * remove a lot of unneeded attributes
This commit is contained in:
117
src/sanitizer.js
117
src/sanitizer.js
@@ -21,7 +21,9 @@ var START_TAG_REGEXP = /^<\s*([\w:]+)((?:\s+\w+(?:\s*=\s*(?:(?:"[^"]*")|(?:'[^']
|
||||
BEGIN_TAG_REGEXP = /^</,
|
||||
BEGING_END_TAGE_REGEXP = /^<\s*\//,
|
||||
COMMENT_REGEXP = /<!--(.*?)-->/g,
|
||||
CDATA_REGEXP = /<!\[CDATA\[(.*?)]]>/g;
|
||||
CDATA_REGEXP = /<!\[CDATA\[(.*?)]]>/g,
|
||||
URI_REGEXP = /^((ftp|https?):\/\/|mailto:|#)/,
|
||||
NON_ALPHANUMERIC_REGEXP = /([^\#-~| |!])/g; // Match everything outside of normal chars and " (quote character)
|
||||
|
||||
// Empty Elements - HTML 4.01
|
||||
var emptyElements = makeMap("area,base,basefont,br,col,hr,img,input,isindex,link,param");
|
||||
@@ -33,24 +35,24 @@ var blockElements = makeMap("address,blockquote,button,center,dd,del,dir,div,dl,
|
||||
// Inline Elements - HTML 4.01
|
||||
var inlineElements = makeMap("a,abbr,acronym,b,basefont,bdo,big,br,button,cite,code,del,dfn,em,font,i,img,"+
|
||||
"input,ins,kbd,label,map,q,s,samp,select,small,span,strike,strong,sub,sup,textarea,tt,u,var");
|
||||
|
||||
// Elements that you can, intentionally, leave open
|
||||
// (and which close themselves)
|
||||
var closeSelfElements = makeMap("colgroup,dd,dt,li,options,p,td,tfoot,th,thead,tr");
|
||||
|
||||
// Attributes that have their values filled in disabled="disabled"
|
||||
var fillAttrs = makeMap("checked,compact,declare,defer,disabled,ismap,multiple,nohref,noresize,noshade,nowrap,readonly,selected");
|
||||
|
||||
// Special Elements (can contain anything)
|
||||
var specialElements = makeMap("script,style");
|
||||
|
||||
var validElements = extend({}, emptyElements, blockElements, inlineElements, closeSelfElements);
|
||||
var validAttrs = extend({}, fillAttrs, makeMap(
|
||||
'abbr,align,alink,alt,archive,axis,background,bgcolor,border,cellpadding,cellspacing,cite,class,classid,clear,code,codebase,'+
|
||||
'codetype,color,cols,colspan,content,coords,data,dir,face,for,headers,height,href,hreflang,hspace,id,label,lang,language,'+
|
||||
'link,longdesc,marginheight,marginwidth,maxlength,media,method,name,nowrap,profile,prompt,rel,rev,rows,rowspan,rules,scheme,'+
|
||||
'scope,scrolling,shape,size,span,src,standby,start,summary,tabindex,target,text,title,type,usemap,valign,value,valuetype,'+
|
||||
'vlink,vspace,width'));
|
||||
|
||||
//see: http://www.w3.org/TR/html4/index/attributes.html
|
||||
//Attributes that have their values filled in disabled="disabled"
|
||||
var fillAttrs = makeMap("checked,compact,declare,defer,disabled,ismap,multiple,nohref,noresize,noshade,nowrap,readonly,selected");
|
||||
//Attributes that have href and hence need to be sanitized
|
||||
var uriAttrs = makeMap("background,href,longdesc,src,usemap");
|
||||
var validAttrs = extend({}, fillAttrs, uriAttrs, makeMap(
|
||||
'abbr,align,alt,axis,bgcolor,border,cellpadding,cellspacing,class,clear,'+
|
||||
'color,cols,colspan,coords,dir,face,for,headers,height,hreflang,hspace,id,'+
|
||||
'label,lang,language,maxlength,method,name,prompt,rel,rev,rows,rowspan,rules,'+
|
||||
'scope,scrolling,shape,size,span,start,summary,tabindex,target,title,type,'+
|
||||
'valign,value,vspace,width'));
|
||||
|
||||
/**
|
||||
* @example
|
||||
@@ -64,7 +66,7 @@ var validAttrs = extend({}, fillAttrs, makeMap(
|
||||
* @param {string} html string
|
||||
* @param {object} handler
|
||||
*/
|
||||
var htmlParser = function( html, handler ) {
|
||||
function htmlParser( html, handler ) {
|
||||
var index, chars, match, stack = [], last = html;
|
||||
stack.last = function(){ return stack[ stack.length - 1 ]; };
|
||||
|
||||
@@ -112,8 +114,7 @@ var htmlParser = function( html, handler ) {
|
||||
var text = index < 0 ? html : html.substring( 0, index );
|
||||
html = index < 0 ? "" : html.substring( index );
|
||||
|
||||
if ( handler.chars )
|
||||
handler.chars( text );
|
||||
handler.chars( decodeEntities(text) );
|
||||
}
|
||||
|
||||
} else {
|
||||
@@ -122,8 +123,7 @@ var htmlParser = function( html, handler ) {
|
||||
replace(COMMENT_REGEXP, "$1").
|
||||
replace(CDATA_REGEXP, "$1");
|
||||
|
||||
if ( handler.chars )
|
||||
handler.chars( text );
|
||||
handler.chars( decodeEntities(text) );
|
||||
|
||||
return "";
|
||||
});
|
||||
@@ -157,21 +157,18 @@ var htmlParser = function( html, handler ) {
|
||||
if ( !unary )
|
||||
stack.push( tagName );
|
||||
|
||||
if ( handler.start ) {
|
||||
var attrs = {};
|
||||
var attrs = {};
|
||||
|
||||
rest.replace(ATTR_REGEXP, function(match, name) {
|
||||
var value = arguments[2] ? arguments[2] :
|
||||
arguments[3] ? arguments[3] :
|
||||
arguments[4] ? arguments[4] :
|
||||
fillAttrs[name] ? name : "";
|
||||
rest.replace(ATTR_REGEXP, function(match, name) {
|
||||
var value = arguments[2] ? arguments[2] :
|
||||
arguments[3] ? arguments[3] :
|
||||
arguments[4] ? arguments[4] :
|
||||
fillAttrs[name] ? name : "";
|
||||
|
||||
attrs[name] = value; //value.replace(/(^|[^\\])"/g, '$1\\\"') //"
|
||||
});
|
||||
attrs[name] = decodeEntities(value); //value.replace(/(^|[^\\])"/g, '$1\\\"') //"
|
||||
});
|
||||
|
||||
if ( handler.start )
|
||||
handler.start( tagName, attrs, unary );
|
||||
}
|
||||
handler.start( tagName, attrs, unary );
|
||||
}
|
||||
|
||||
function parseEndTag( tag, tagName ) {
|
||||
@@ -186,8 +183,7 @@ var htmlParser = function( html, handler ) {
|
||||
if ( pos >= 0 ) {
|
||||
// Close all the open elements, up the stack
|
||||
for ( i = stack.length - 1; i >= pos; i-- )
|
||||
if ( handler.end )
|
||||
handler.end( stack[ i ] );
|
||||
handler.end( stack[ i ] );
|
||||
|
||||
// Remove the open elements from the stack
|
||||
stack.length = pos;
|
||||
@@ -206,28 +202,32 @@ function makeMap(str){
|
||||
return obj;
|
||||
}
|
||||
|
||||
/*
|
||||
* For attack vectors see: http://ha.ckers.org/xss.html
|
||||
*/
|
||||
var JAVASCRIPT_URL = /^javascript:/i,
|
||||
NBSP_REGEXP = / /gim,
|
||||
HEX_ENTITY_REGEXP = /&#x([\da-f]*);?/igm,
|
||||
DEC_ENTITY_REGEXP = /&#(\d+);?/igm,
|
||||
CHAR_REGEXP = /[\w:]/gm,
|
||||
HEX_DECODE = function(match, code){return fromCharCode(parseInt(code,16));},
|
||||
DEC_DECODE = function(match, code){return fromCharCode(code);};
|
||||
/**
|
||||
* @param {string} url
|
||||
* @returns true if url decodes to something which starts with 'javascript:' hence unsafe
|
||||
* decodes all entities into regular string
|
||||
* @param value
|
||||
* @returns
|
||||
*/
|
||||
function isJavaScriptUrl(url) {
|
||||
var chars = [];
|
||||
url.replace(NBSP_REGEXP, '').
|
||||
replace(HEX_ENTITY_REGEXP, HEX_DECODE).
|
||||
replace(DEC_ENTITY_REGEXP, DEC_DECODE).
|
||||
// Remove all non \w: characters, unfurtunetly value.replace(/[\w:]/,'') can be defeated using \u0000
|
||||
replace(CHAR_REGEXP, function(ch){chars.push(ch);});
|
||||
return JAVASCRIPT_URL.test(lowercase(chars.join('')));
|
||||
var hiddenPre=document.createElement("pre");
|
||||
function decodeEntities(value) {
|
||||
hiddenPre.innerHTML=value.replace(/</g,"<");
|
||||
return hiddenPre.innerText || hiddenPre.textContent;
|
||||
}
|
||||
|
||||
/**
|
||||
* Escapes all potentially dangerous characters, so that the
|
||||
* resulting string can be safely inserted into attribute or
|
||||
* element text.
|
||||
* @param value
|
||||
* @returns escaped text
|
||||
*/
|
||||
function encodeEntities(value) {
|
||||
return value.
|
||||
replace(/&/g, '&').
|
||||
replace(NON_ALPHANUMERIC_REGEXP, function(value){
|
||||
return '&#' + value.charCodeAt(0) + ';';
|
||||
}).
|
||||
replace(/</g, '<').
|
||||
replace(/>/g, '>');
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -253,14 +253,12 @@ function htmlSanitizeWriter(buf){
|
||||
out('<');
|
||||
out(tag);
|
||||
foreach(attrs, function(value, key){
|
||||
if (validAttrs[lowercase(key)] && !isJavaScriptUrl(value)) {
|
||||
var lkey=lowercase(key);
|
||||
if (validAttrs[lkey] && (uriAttrs[lkey]!==true || value.match(URI_REGEXP))) {
|
||||
out(' ');
|
||||
out(key);
|
||||
out('="');
|
||||
out(value.
|
||||
replace(/</g, '<').
|
||||
replace(/>/g, '>').
|
||||
replace(/\"/g,'"'));
|
||||
out(encodeEntities(value));
|
||||
out('"');
|
||||
}
|
||||
});
|
||||
@@ -280,10 +278,7 @@ function htmlSanitizeWriter(buf){
|
||||
},
|
||||
chars: function(chars){
|
||||
if (!ignore) {
|
||||
out(chars.
|
||||
replace(/&(\w+[&;\W])?/g, function(match, entity){return entity?match:'&';}).
|
||||
replace(/</g, '<').
|
||||
replace(/>/g, '>'));
|
||||
out(encodeEntities(chars));
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
@@ -6,7 +6,7 @@ describe('HTML', function(){
|
||||
|
||||
it('should echo html', function(){
|
||||
expectHTML('hello<b class="1\'23" align=\'""\'>world</b>.').
|
||||
toEqual('hello<b class="1\'23" align="""">world</b>.');
|
||||
toEqual('hello<b class="1\'23" align="""">world</b>.');
|
||||
});
|
||||
|
||||
it('should remove script', function(){
|
||||
@@ -49,9 +49,15 @@ describe('HTML', function(){
|
||||
expectHTML('a<my:hr/><my:div>b</my:div>c').toEqual('abc');
|
||||
});
|
||||
|
||||
it('should handle entities', function(){
|
||||
var everything = '<div id="!@#$%^&*()_+-={}[]:";\'<>?,./`~ ħ">' +
|
||||
'!@#$%^&*()_+-={}[]:";\'<>?,./`~ ħ</div>';
|
||||
expectHTML(everything).toEqual(everything);
|
||||
});
|
||||
|
||||
it('should handle improper html', function(){
|
||||
expectHTML('< div id="</div>" alt=abc href=\'"\' >text< /div>').
|
||||
toEqual('<div id="</div>" alt="abc" href=""">text</div>');
|
||||
expectHTML('< div id="</div>" alt=abc dir=\'"\' >text< /div>').
|
||||
toEqual('<div id="</div>" alt="abc" dir=""">text</div>');
|
||||
});
|
||||
|
||||
it('should handle improper html2', function(){
|
||||
@@ -81,19 +87,14 @@ describe('HTML', function(){
|
||||
expect(html).toEqual('a<div>&</div>c');
|
||||
});
|
||||
|
||||
it('should not double escape entities', function(){
|
||||
writer.chars(' ><');
|
||||
expect(html).toEqual(' ><');
|
||||
});
|
||||
|
||||
it('should escape IE script', function(){
|
||||
writer.chars('&{}');
|
||||
expect(html).toEqual('&{}');
|
||||
writer.chars('&<>{}');
|
||||
expect(html).toEqual('&<>{}');
|
||||
});
|
||||
|
||||
it('should escape attributes', function(){
|
||||
writer.start('div', {id:'\"\'<>'});
|
||||
expect(html).toEqual('<div id=""\'<>">');
|
||||
writer.start('div', {id:'!@#$%^&*()_+-={}[]:";\'<>?,./`~ \n\0\r\u0127'});
|
||||
expect(html).toEqual('<div id="!@#$%^&*()_+-={}[]:";\'<>?,./`~ � ħ">');
|
||||
});
|
||||
|
||||
it('should ignore missformed elements', function(){
|
||||
@@ -105,12 +106,32 @@ describe('HTML', function(){
|
||||
writer.start('div', {unknown:""});
|
||||
expect(html).toEqual('<div>');
|
||||
});
|
||||
|
||||
describe('isUri', function(){
|
||||
|
||||
function isUri(value) {
|
||||
return value.match(URI_REGEXP);
|
||||
}
|
||||
|
||||
it('should be URI', function(){
|
||||
expect(isUri('http://abc')).toBeTruthy();
|
||||
expect(isUri('https://abc')).toBeTruthy();
|
||||
expect(isUri('ftp://abc')).toBeTruthy();
|
||||
expect(isUri('mailto:me@example.com')).toBeTruthy();
|
||||
expect(isUri('#anchor')).toBeTruthy();
|
||||
});
|
||||
|
||||
it('should not be UIR', function(){
|
||||
expect(isUri('')).toBeFalsy();
|
||||
expect(isUri('javascript:alert')).toBeFalsy();
|
||||
});
|
||||
});
|
||||
|
||||
describe('javascript URL attribute', function(){
|
||||
beforeEach(function(){
|
||||
this.addMatchers({
|
||||
toBeValidUrl: function(){
|
||||
return !isJavaScriptUrl(this.actual);
|
||||
return URI_REGEXP.exec(this.actual);
|
||||
}
|
||||
});
|
||||
});
|
||||
@@ -118,7 +139,7 @@ describe('HTML', function(){
|
||||
it('should ignore javascript:', function(){
|
||||
expect('JavaScript:abc').not.toBeValidUrl();
|
||||
expect(' \n Java\n Script:abc').not.toBeValidUrl();
|
||||
expect('JavaScript/my.js').toBeValidUrl();
|
||||
expect('http://JavaScript/my.js').toBeValidUrl();
|
||||
});
|
||||
|
||||
it('should ignore dec encoded javascript:', function(){
|
||||
|
||||
Reference in New Issue
Block a user