mirror of
https://github.com/zhigang1992/angular.js.git
synced 2026-01-12 22:45:52 +08:00
Refactor lexer to use regular expressions
This commit is contained in:
137
src/parser.js
137
src/parser.js
@@ -32,7 +32,7 @@ function lex(text, parseStringsForObjects){
|
||||
index = 0,
|
||||
json = [],
|
||||
ch,
|
||||
lastCh = ':'; // can start regexp
|
||||
lastCh = ':';
|
||||
|
||||
while (index < text.length) {
|
||||
ch = text.charAt(index);
|
||||
@@ -71,6 +71,9 @@ function lex(text, parseStringsForObjects){
|
||||
lastCh = ch;
|
||||
}
|
||||
return tokens;
|
||||
|
||||
|
||||
//////////////////////////////////////////////
|
||||
|
||||
function is(chars) {
|
||||
return chars.indexOf(ch) != -1;
|
||||
@@ -95,10 +98,6 @@ function lex(text, parseStringsForObjects){
|
||||
'A' <= ch && ch <= 'Z' ||
|
||||
'_' == ch || ch == '$';
|
||||
}
|
||||
function isExpOperator(ch) {
|
||||
return ch == '-' || ch == '+' || isNumber(ch);
|
||||
}
|
||||
|
||||
function throwError(error, start, end) {
|
||||
end = end || index;
|
||||
throw Error("Lexer Error: " + error + " at column" +
|
||||
@@ -107,103 +106,61 @@ function lex(text, parseStringsForObjects){
|
||||
" " + end) +
|
||||
" in expression [" + text + "].");
|
||||
}
|
||||
|
||||
function consume(regexp, processToken, errorMsg) {
|
||||
var match = text.substr(index).match(regexp);
|
||||
var token = {index: index};
|
||||
var start = index;
|
||||
if (!match) throwError(errorMsg);
|
||||
index += match[0].length;
|
||||
processToken(token, token.text = match[0], start);
|
||||
tokens.push(token);
|
||||
}
|
||||
|
||||
function readNumber() {
|
||||
var number = "";
|
||||
var start = index;
|
||||
while (index < text.length) {
|
||||
var ch = lowercase(text.charAt(index));
|
||||
if (ch == '.' || isNumber(ch)) {
|
||||
number += ch;
|
||||
} else {
|
||||
var peekCh = peek();
|
||||
if (ch == 'e' && isExpOperator(peekCh)) {
|
||||
number += ch;
|
||||
} else if (isExpOperator(ch) &&
|
||||
peekCh && isNumber(peekCh) &&
|
||||
number.charAt(number.length - 1) == 'e') {
|
||||
number += ch;
|
||||
} else if (isExpOperator(ch) &&
|
||||
(!peekCh || !isNumber(peekCh)) &&
|
||||
number.charAt(number.length - 1) == 'e') {
|
||||
throwError('Invalid exponent');
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
}
|
||||
index++;
|
||||
}
|
||||
number = 1 * number;
|
||||
tokens.push({index:start, text:number, json:true,
|
||||
fn:function(){return number;}});
|
||||
consume(/^(\d+)?(\.\d+)?([eE][+-]?\d+)?/, function(token, number){
|
||||
token.text = number = 1 * number;
|
||||
token.json = true;
|
||||
token.fn = valueFn(number);
|
||||
}, "Not a valid number");
|
||||
}
|
||||
|
||||
function readIdent() {
|
||||
var ident = "";
|
||||
var start = index;
|
||||
var fn;
|
||||
while (index < text.length) {
|
||||
var ch = text.charAt(index);
|
||||
if (ch == '.' || isIdent(ch) || isNumber(ch)) {
|
||||
ident += ch;
|
||||
} else {
|
||||
break;
|
||||
consume(/^[\w_\$][\w_\$\d]*(\.[\w_\$][\w_\$\d]*)*/, function(token, ident){
|
||||
fn = OPERATORS[ident];
|
||||
if (!fn) {
|
||||
fn = getterFn(ident);
|
||||
fn.isAssignable = ident;
|
||||
}
|
||||
index++;
|
||||
}
|
||||
fn = OPERATORS[ident];
|
||||
tokens.push({
|
||||
index:start,
|
||||
text:ident,
|
||||
json: fn,
|
||||
fn:fn||extend(getterFn(ident), {
|
||||
token.fn = OPERATORS[ident]||extend(getterFn(ident), {
|
||||
assign:function(self, value){
|
||||
return setter(self, ident, value);
|
||||
}
|
||||
})
|
||||
});
|
||||
token.json = OPERATORS[ident];
|
||||
});
|
||||
}
|
||||
|
||||
function readString(quote) {
|
||||
var start = index;
|
||||
index++;
|
||||
var string = "";
|
||||
var rawString = quote;
|
||||
var escape = false;
|
||||
while (index < text.length) {
|
||||
var ch = text.charAt(index);
|
||||
rawString += ch;
|
||||
if (escape) {
|
||||
if (ch == 'u') {
|
||||
var hex = text.substring(index + 1, index + 5);
|
||||
if (!hex.match(/[\da-f]{4}/i))
|
||||
throwError( "Invalid unicode escape [\\u" + hex + "]");
|
||||
index += 4;
|
||||
string += String.fromCharCode(parseInt(hex, 16));
|
||||
} else {
|
||||
var rep = ESCAPE[ch];
|
||||
if (rep) {
|
||||
string += rep;
|
||||
} else {
|
||||
string += ch;
|
||||
}
|
||||
}
|
||||
escape = false;
|
||||
} else if (ch == '\\') {
|
||||
escape = true;
|
||||
} else if (ch == quote) {
|
||||
index++;
|
||||
tokens.push({index:start, text:rawString, string:string, json:true,
|
||||
fn:function(){
|
||||
return (string.length == dateParseLength) ?
|
||||
angular['String']['toDate'](string) : string;
|
||||
}});
|
||||
return;
|
||||
} else {
|
||||
string += ch;
|
||||
}
|
||||
index++;
|
||||
}
|
||||
throwError("Unterminated quote", start);
|
||||
consume(/^(('(\\'|[^'])*')|("(\\"|[^"])*"))/, function(token, rawString, start){
|
||||
var hasError;
|
||||
var string = token.string = rawString.substr(1, rawString.length - 2).
|
||||
replace(/(\\u(.?.?.?.?))|(\\(.))/g,
|
||||
function(match, wholeUnicode, unicode, wholeEscape, escape){
|
||||
if (unicode && !unicode.match(/[\da-fA-F]{4}/))
|
||||
hasError = hasError || bind(null, throwError, "Invalid unicode escape [\\u" + unicode + "]", start);
|
||||
return unicode ?
|
||||
String.fromCharCode(parseInt(unicode, 16)) :
|
||||
ESCAPE[escape] || escape;
|
||||
});
|
||||
(hasError||noop)();
|
||||
token.json = true;
|
||||
token.fn = function(){
|
||||
return (string.length == dateParseLength) ?
|
||||
angular['String']['toDate'](string) :
|
||||
string;
|
||||
};
|
||||
}, "Unterminated string");
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -82,9 +82,15 @@ describe('parser', function() {
|
||||
expect(tokens.length).toEqual(1);
|
||||
expect(tokens[0].string).toEqual('\u00a0');
|
||||
});
|
||||
|
||||
it('should error when non terminated string', function(){
|
||||
expect(function(){
|
||||
lex('ignore "text');
|
||||
}).toThrow(new Error('Lexer Error: Unterminated string at column 7 in expression [ignore "text].'));
|
||||
});
|
||||
|
||||
it('should ignore whitespace', function() {
|
||||
var tokens = lex("a \t \n \r b");
|
||||
var tokens = lex("a \t \n \r \u00A0 b");
|
||||
expect(tokens[0].text).toEqual('a');
|
||||
expect(tokens[1].text).toEqual('b');
|
||||
});
|
||||
@@ -130,16 +136,6 @@ describe('parser', function() {
|
||||
expect(tokens[0].text).toEqual(0.5E+10);
|
||||
});
|
||||
|
||||
it('should throws exception for invalid exponent', function() {
|
||||
expect(function() {
|
||||
lex("0.5E-");
|
||||
}).toThrow(new Error('Lexer Error: Invalid exponent at column 4 in expression [0.5E-].'));
|
||||
|
||||
expect(function() {
|
||||
lex("0.5E-A");
|
||||
}).toThrow(new Error('Lexer Error: Invalid exponent at column 4 in expression [0.5E-A].'));
|
||||
});
|
||||
|
||||
it('should tokenize number starting with a dot', function() {
|
||||
var tokens = lex(".5");
|
||||
expect(tokens[0].text).toEqual(0.5);
|
||||
@@ -147,8 +143,8 @@ describe('parser', function() {
|
||||
|
||||
it('should throw error on invalid unicode', function() {
|
||||
expect(function() {
|
||||
lex("'\\u1''bla'");
|
||||
}).toThrow(new Error("Lexer Error: Invalid unicode escape [\\u1''b] at column 2 in expression ['\\u1''bla']."));
|
||||
lex("'\\u1xbla'");
|
||||
}).toThrow(new Error("Lexer Error: Invalid unicode escape [\\u1xbl] at columns 0-9 ['\\u1xbla'] in expression ['\\u1xbla']."));
|
||||
});
|
||||
});
|
||||
|
||||
|
||||
Reference in New Issue
Block a user