Put TokenParser extension in its own file

Improve file layout by putting the TokenParser extension in its own
file.
This commit is contained in:
David Dufresne
2016-10-08 14:22:35 -04:00
parent 7a20d1a888
commit e998813c86
4 changed files with 994 additions and 980 deletions

View File

@@ -0,0 +1,984 @@
//
// DefaultTokenParser.swift
// SwiftParsec
//
// Created by David Dufresne on 2016-10-08.
// Copyright © 2016 David Dufresne. All rights reserved.
//
import func Foundation.pow
//==============================================================================
// Default implementation of the methods of the `TokenParser` parser type.
extension TokenParser {
// Type aliases used internally to simplify the code.
typealias StrParser = GenericParser<String, UserState, String>
typealias CharacterParser = GenericParser<String, UserState, Character>
typealias IntParser = GenericParser<String, UserState, Int>
typealias DoubleParser = GenericParser<String, UserState, Double>
typealias IntDoubleParser =
GenericParser<String, UserState, Either<Int, Double>>
typealias VoidParser = GenericParser<String, UserState, ()>
//
// Identifiers & Reserved words
//
/// This lexeme parser parses a legal identifier. Returns the identifier
/// string. This parser will fail on identifiers that are reserved words.
/// Legal identifier (start) characters and reserved words are defined in
/// the `LanguageDefinition` that is passed to the initializer of this token
/// parser. An `identifier` is treated as a single token using
/// `GenericParser.attempt`.
public var identifier: GenericParser<String, UserState, String> {
let langDef = languageDefinition
let ident: StrParser = langDef.identifierStart >>- { char in
langDef.identifierLetter(char).many >>- { chars in
let cs = chars.prepending(char)
return GenericParser(result: String(cs))
}
} <?> LocalizedString("identifier")
let identCheck: StrParser = ident >>- { name in
let reservedNames: Set<String>
let n: String
if langDef.isCaseSensitive {
reservedNames = langDef.reservedNames
n = name
} else {
reservedNames = langDef.reservedNames.map { $0.lowercased() }
n = name.lowercased()
}
guard !reservedNames.contains(n) else {
let reservedWordMsg = LocalizedString("reserved word ")
return GenericParser.unexpected(reservedWordMsg + name)
}
return GenericParser(result: name)
}
return lexeme(identCheck.attempt)
}
/// The lexeme parser `reservedName(name)` parses `symbol(name)`, but it
/// also checks that the `name` is not a prefix of a valid identifier. A
/// _reserved_ word is treated as a single token using
/// `GenericParser.attempt`.
///
/// - parameter name: The reserved name to parse.
/// - returns: A parser returning nothing.
public func reservedName(
_ name: String
) -> GenericParser<String, UserState, ()> {
let lastChar = name.last!
let reserved = caseString(name) *>
languageDefinition.identifierLetter(lastChar).noOccurence <?>
LocalizedString("end of ") + name
return lexeme(reserved.attempt)
}
//
// Operators & reserved operators
//
/// This lexeme parser parses a legal operator and returns the name of the
/// operator. This parser will fail on any operators that are reserved
/// operators. Legal operator (start) characters and reserved operators are
/// defined in the `LanguageDefinition` that is passed to the initializer of
/// this token parser. An 'operator' is treated as a single token using
/// `GenericParser.attempt`.
public var legalOperator: GenericParser<String, UserState, String> {
let langDef = languageDefinition
let op: StrParser = langDef.operatorStart >>- { char in
langDef.operatorLetter.many >>- { chars in
let cs = chars.prepending(char)
return GenericParser(result: String(cs))
}
} <?> LocalizedString("operator")
let opCheck: StrParser = op >>- { name in
guard !langDef.reservedOperators.contains(name) else {
let reservedOperatorMsg = LocalizedString("reserved operator ")
return GenericParser.unexpected(reservedOperatorMsg + name)
}
return GenericParser(result: name)
}
return lexeme(opCheck.attempt)
}
/// The lexeme parser `reservedOperator(name)` parses `symbol(name)`, but it
/// also checks that the `name` is not a prefix of a valid operator. A
/// 'reservedOperator' is treated as a single token using
/// `GenericParser.attempt`.
///
/// - parameter name: The operator name.
/// - returns: A parser returning nothing.
public func reservedOperator(
_ name: String
) -> GenericParser<String, UserState, ()> {
let op = VoidParser.string(name) *>
languageDefinition.operatorLetter.noOccurence <?>
LocalizedString("end of ") + name
return lexeme(op.attempt)
}
//
// Characters & Strings
//
/// This lexeme parser parses a single literal character and returns the
/// literal character value. This parser deals correctly with escape
/// sequences.
public var characterLiteral: GenericParser<String, UserState, Character> {
let characterLetter = CharacterParser.satisfy { char in
char != "'" && char != "\\" && char != substituteCharacter
}
let defaultCharEscape = GenericParser.character("\\") *>
GenericTokenParser<UserState>.escapeCode
let characterEscape =
languageDefinition.characterEscape ?? defaultCharEscape
let character = characterLetter <|> characterEscape <?>
LocalizedString("literal character")
let quote = CharacterParser.character("'")
let endOfCharMsg = LocalizedString("end of character")
return lexeme(character.between(quote, quote <?> endOfCharMsg)) <?>
LocalizedString("character")
}
/// This lexeme parser parses a literal string and returns the literal
/// string value. This parser deals correctly with escape sequences and
/// gaps.
public var stringLiteral: GenericParser<String, UserState, String> {
let stringLetter = CharacterParser.satisfy { char in
char != "\"" && char != "\\" && char != substituteCharacter
}
let escapeGap: GenericParser<String, UserState, Character?> =
GenericParser.space.many1 *> GenericParser.character("\\") *>
GenericParser(result: nil) <?>
LocalizedString("end of string gap")
let escapeEmpty: GenericParser<String, UserState, Character?> =
GenericParser.character("&") *> GenericParser(result: nil)
let characterEscape = GenericParser.character("\\") *>
(escapeGap <|> escapeEmpty <|>
GenericTokenParser.escapeCode.map { $0 })
let stringEscape =
languageDefinition.characterEscape?.map { $0 } ?? characterEscape
let stringChar = stringLetter.map { $0 } <|> stringEscape
let doubleQuote = CharacterParser.character("\"")
let endOfStringMsg = LocalizedString("end of string")
let string = stringChar.many.between(
doubleQuote, doubleQuote <?> endOfStringMsg
)
let literalString = string.map({ str in
str.reduce("") { (acc, char) in
guard let c = char else { return acc }
return acc.appending(c)
}
}) <?> LocalizedString("literal string")
return lexeme(literalString)
}
//
// Numbers
//
/// This lexeme parser parses a natural number (a positive whole number) and
/// returns the value of the number. The number can be specified in
/// 'decimal', 'hexadecimal' or 'octal'.
public var natural: GenericParser<String, UserState, Int> {
return lexeme(GenericTokenParser.naturalNumber) <?>
LocalizedString("natural")
}
/// This lexeme parser parses an integer (a whole number). This parser is
/// like `natural` except that it can be prefixed with sign (i.e. "-" or
/// "+"). It returns the value of the number. The number can be specified in
/// 'decimal', 'hexadecimal' or 'octal'.
public var integer: GenericParser<String, UserState, Int> {
let int = lexeme(GenericTokenParser.sign()) >>- { f in
GenericTokenParser.naturalNumber >>- {
GenericParser(result: f($0))
}
}
return lexeme(int) <?> LocalizedString("integer")
}
/// This lexeme parser parses an integer (a whole number). It is like
/// `integer` except that it can parse bigger numbers. Returns the value of
/// the number as a `Double`.
public var integerAsFloat: GenericParser<String, UserState, Double> {
let hexaPrefix = CharacterParser.oneOf(hexadecimalPrefixes)
let hexa = hexaPrefix *> GenericTokenParser.doubleWithBase(
16,
parser: GenericParser.hexadecimalDigit
)
let octPrefix = CharacterParser.oneOf(octalPrefixes)
let oct = octPrefix *> GenericTokenParser.doubleWithBase(
8,
parser: GenericParser.octalDigit
)
let decDigit = CharacterParser.decimalDigit
let dec = GenericTokenParser.doubleWithBase(10, parser: decDigit)
let zeroNumber = (GenericParser.character("0") *>
(hexa <|> oct <|> dec <|> GenericParser(result: 0))) <?> ""
let nat = zeroNumber <|> dec
let double = lexeme(GenericTokenParser.sign()) >>- { sign in
nat >>- { GenericParser(result: sign($0)) }
}
return lexeme(double) <?> LocalizedString("integer")
}
/// This lexeme parser parses a floating point value and returns the value
/// of the number.
public var float: GenericParser<String, UserState, Double> {
let intPart = GenericTokenParser<UserState>.doubleIntegerPart
let expPart = GenericTokenParser<UserState>.fractionalExponent
let f = intPart >>- { expPart($0) }
let double = lexeme(GenericTokenParser.sign()) >>- { sign in
f >>- { GenericParser(result: sign($0)) }
}
return lexeme(double) <?> LocalizedString("float")
}
/// This lexeme parser parses either `integer` or a `float` and returns the
/// value of the number. This parser deals with any overlap in the grammar
/// rules for integers and floats.
public var number: GenericParser<String, UserState, Either<Int, Double>> {
let intDouble = float.map({ Either.right($0) }).attempt <|>
integer.map({ Either.left($0) })
return lexeme(intDouble) <?> LocalizedString("number")
}
/// Parses a positive whole number in the decimal system. Returns the value
/// of the number.
public static var decimal: GenericParser<String, UserState, Int> {
return numberWithBase(10, parser: GenericParser.decimalDigit)
}
/// Parses a positive whole number in the hexadecimal system. The number
/// should be prefixed with "x" or "X". Returns the value of the number.
public static var hexadecimal: GenericParser<String, UserState, Int> {
return GenericParser.oneOf(hexadecimalPrefixes) *>
numberWithBase(16, parser: GenericParser.hexadecimalDigit)
}
/// Parses a positive whole number in the octal system. The number should be
/// prefixed with "o" or "O". Returns the value of the number.
public static var octal: GenericParser<String, UserState, Int> {
return GenericParser.oneOf(octalPrefixes) *>
numberWithBase(8, parser: GenericParser.octalDigit)
}
//
// White space & symbols
//
/// Lexeme parser `symbol(str)` parses `str` and skips trailing white space.
///
/// - parameter name: The name of the symbol to parse.
/// - returns: `name`.
public func symbol(
_ name: String
) -> GenericParser<String, UserState, String> {
return lexeme(StrParser.string(name))
}
/// `lexeme(parser)` first applies `parser` and than the `whiteSpace`
/// parser, returning the value of `parser`. Every lexical token (lexeme) is
/// defined using `lexeme`, this way every parse starts at a point without
/// white space. Parsers that use `lexeme` are called _lexeme_ parsers in
/// this document.
///
/// The only point where the 'whiteSpace' parser should be called explicitly
/// is the start of the main parser in order to skip any leading white
/// space.
///
/// let mainParser = sum <^> whiteSpace *> lexeme(digit) <* eof
///
/// - parameter parser: The parser to transform in a 'lexeme'.
/// - returns: The value of `parser`.
public func lexeme<Result>(
_ parser: GenericParser<String, UserState, Result>
) -> GenericParser<String, UserState, Result> {
return parser <* whiteSpace
}
/// Parses any white space. White space consists of _zero_ or more
/// occurrences of a 'space', a line comment or a block (multiline) comment.
/// Block comments may be nested. How comments are started and ended is
/// defined in the `LanguageDefinition` that is passed to the initializer of
/// this token parser.
public var whiteSpace: GenericParser<String, UserState, ()> {
let simpleSpace = CharacterParser.satisfy({ $0.isSpace }).skipMany1
let commentLineEmpty = languageDefinition.commentLine.isEmpty
let commentStartEmpty = languageDefinition.commentStart.isEmpty
if commentLineEmpty && commentStartEmpty {
return (simpleSpace <?> "").skipMany
}
if commentLineEmpty {
return (simpleSpace <|> multiLineComment <?> "").skipMany
}
if commentStartEmpty {
return (simpleSpace <|> oneLineComment <?> "").skipMany
}
return (
simpleSpace <|> oneLineComment <|> multiLineComment <?> ""
).skipMany
}
//
// Bracketing
//
/// Lexeme parser `parentheses(parser)` parses `parser` enclosed in
/// parentheses, returning the value of `parser`.
///
/// - parameter parser: The parser applied between the parentheses.
/// - returns: The value of `parser`.
public func parentheses<Result>(
_ parser: GenericParser<String, UserState, Result>
) -> GenericParser<String, UserState, Result> {
return parser.between(symbol("("), symbol(")"))
}
/// Lexeme parser `braces(parser)` parses `parser` enclosed in braces "{"
/// and "}", returning the value of `parser`.
///
/// - parameter parser: The parser applied between the braces.
/// - returns: The value of `parser`.
public func braces<Result>(
_ parser: GenericParser<String, UserState, Result>
) -> GenericParser<String, UserState, Result> {
return parser.between(symbol("{"), symbol("}"))
}
/// Lexeme parser `angles(parser)` parses `parser` enclosed in angle
/// brackets "<" and ">", returning the value of `parser`.
///
/// - parameter parser: The parser applied between the angles.
/// - returns: The value of `parser`.
public func angles<Result>(
_ parser: GenericParser<String, UserState, Result>
) -> GenericParser<String, UserState, Result> {
return parser.between(symbol("<"), symbol(">"))
}
/// Lexeme parser `brackets(parser)` parses `parser` enclosed in brackets
/// "[" and "]", returning the value of `parser`.
///
/// - parameter parser: The parser applied between the brackets.
/// - returns: The value of `parser`.
public func brackets<Result>(
_ parser: GenericParser<String, UserState, Result>
) -> GenericParser<String, UserState, Result> {
return parser.between(symbol("["), symbol("]"))
}
/// Lexeme parser `semicolon` parses the character ";" and skips any
/// trailing white space. Returns the string ";".
public var semicolon: GenericParser<String, UserState, String> {
return symbol(";")
}
/// Lexeme parser `comma` parses the character "," and skips any trailing
/// white space. Returns the string ",".
public var comma: GenericParser<String, UserState, String> {
return symbol(",")
}
/// Lexeme parser `colon` parses the character ":" and skips any trailing
/// white space. Returns the string ":".
public var colon: GenericParser<String, UserState, String> {
return symbol(":")
}
/// Lexeme parser `dot` parses the character "." and skips any trailing
/// white space. Returns the string ".".
public var dot: GenericParser<String, UserState, String> {
return symbol(".")
}
/// Lexeme parser `semicolonSeperated(parser)` parses _zero_ or more
/// occurrences of `parser` separated by `semicolon`. Returns an array of
/// values returned by `parser`.
///
/// - parameter parser: The parser applied between semicolons.
/// - returns: An array of values returned by `parser`.
public func semicolonSeparated<Result>(
_ parser: GenericParser<String, UserState, Result>
) -> GenericParser<String, UserState, [Result]> {
return parser.separatedBy(semicolon)
}
/// Lexeme parser `semicolonSeperated1(parser)` parses _one_ or more
/// occurrences of `parser` separated by `semicolon`. Returns an array of
/// values returned by `parser`.
///
/// - parameter parser: The parser applied between semicolons.
/// - returns: An array of values returned by `parser`.
public func semicolonSeparated1<Result>(
_ parser: GenericParser<String, UserState, Result>
) -> GenericParser<String, UserState, [Result]> {
return parser.separatedBy1(semicolon)
}
/// Lexeme parser `commaSeparated(parser)` parses _zero_ or more occurrences
/// of `parser` separated by `comma`. Returns an array of values returned by
/// `parser`.
///
/// - parameter parser: The parser applied between commas.
/// - returns: An array of values returned by `parser`.
public func commaSeparated<Result>(
_ parser: GenericParser<String, UserState, Result>
) -> GenericParser<String, UserState, [Result]> {
return parser.separatedBy(comma)
}
/// Lexeme parser `commaSeparated1(parser)` parses _one_ or more occurrences
/// of `parser` separated by `comma`. Returns an array of values returned by
/// `parser`.
///
/// - parameter parser: The parser applied between commas.
/// - returns: An array of values returned by `parser`.
public func commaSeparated1<Result>(
_ parser: GenericParser<String, UserState, Result>
) -> GenericParser<String, UserState, [Result]> {
return parser.separatedBy1(comma)
}
//
// Private methods. They sould be in a separate private extension but it
// causes problems with the internal typealiases.
//
private var oneLineComment: VoidParser {
let commentStart = StrParser.string(languageDefinition.commentLine)
return commentStart.attempt *>
GenericParser.satisfy({ $0 != "\n"}).skipMany *>
GenericParser(result: ())
}
private var multiLineComment: VoidParser {
return GenericParser {
let commentStart =
StrParser.string(self.languageDefinition.commentStart)
return commentStart.attempt *> self.inComment
}
}
private var inComment: VoidParser {
return languageDefinition.allowNestedComments ?
inNestedComment : inNonNestedComment
}
private var inNestedComment: VoidParser {
return GenericParser {
let langDef = self.languageDefinition
let startEnd = (
langDef.commentStart + langDef.commentEnd
).removingDuplicates()
let commentEnd = StrParser.string(langDef.commentEnd)
return commentEnd.attempt *> GenericParser(result: ()) <|>
self.multiLineComment *> self.inNestedComment <|>
GenericParser.noneOf(startEnd).skipMany1 *>
self.inNestedComment <|> GenericParser.oneOf(startEnd) *>
self.inNestedComment <?>
LocalizedString("end of comment")
}
}
private var inNonNestedComment: VoidParser {
return GenericParser {
let langDef = self.languageDefinition
let startEnd = (
langDef.commentStart + langDef.commentEnd
).removingDuplicates()
let commentEnd = StrParser.string(langDef.commentEnd)
return commentEnd.attempt *> GenericParser(result: ()) <|>
GenericParser.noneOf(startEnd).skipMany1 *>
self.inNonNestedComment <|> GenericParser.oneOf(startEnd) *>
self.inNonNestedComment <?>
LocalizedString("end of comment")
}
}
private static var escapeCode: CharacterParser {
return charEscape <|> charNumber <|> charAscii <|> charControl <?>
LocalizedString("escape code")
}
private static var charEscape: CharacterParser {
let parsers = escapeMap.map { escCode in
CharacterParser.character(escCode.esc) *>
GenericParser(result: escCode.code)
}
return GenericParser.choice(parsers)
}
private static var charNumber: CharacterParser {
let octalDigit = CharacterParser.octalDigit
let hexaDigit = CharacterParser.hexadecimalDigit
let num = decimal <|>
GenericParser.character("o") *>
numberWithBase(8, parser: octalDigit) <|>
GenericParser.character("x") *>
numberWithBase(16, parser: hexaDigit)
return num >>- { characterFromInt($0) }
}
private static var charAscii: CharacterParser {
let parsers = asciiCodesMap.map { control in
StrParser.string(control.esc) *> GenericParser(result: control.code)
}
return GenericParser.choice(parsers)
}
private static var charControl: CharacterParser {
let upper = "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
let ctrlCodes: CharacterParser =
GenericParser.oneOf(upper).flatMap { char in
let charA: Character = "A"
let value = char.unicodeScalar.value -
charA.unicodeScalar.value + 1
let unicode = UnicodeScalar.fromUInt32(value)!
return GenericParser(result: Character(unicode))
}
return GenericParser.character("^") *> (ctrlCodes <|>
GenericParser.character("@") *> GenericParser(result: "\0") <|>
GenericParser.character("[") *>
GenericParser(result: "\u{001B}") <|>
GenericParser.character("]") *>
GenericParser(result: "\u{001C}") <|>
GenericParser.character("\\") *>
GenericParser(result: "\u{001D}") <|>
GenericParser.character("^") *>
GenericParser(result: "\u{001E}") <|>
GenericParser.character("_") *> GenericParser(result: "\u{001F}"))
}
static func characterFromInt(_ v: Int) -> CharacterParser {
guard let us = UnicodeScalar.fromInt(v) else {
let outsideMsg = LocalizedString(
"value outside of Unicode codespace"
)
return GenericParser.fail(outsideMsg)
}
return GenericParser(result: Character(us))
}
private static func numberWithBase(
_ base: Int,
parser: CharacterParser
) -> IntParser {
return parser.many1 >>- { digits in
return integerWithDigits(String(digits), base: base)
}
}
static func integerWithDigits(_ digits: String, base: Int) -> IntParser {
guard let integer = Int(digits, radix: base) else {
let overflowMsg = LocalizedString("Int overflow")
return GenericParser.fail(overflowMsg)
}
return GenericParser(result: integer)
}
private static func doubleWithBase(
_ base: Int,
parser: CharacterParser
) -> DoubleParser {
let baseDouble = Double(base)
return parser.many1 >>- { digits in
let double = digits.reduce(0.0) { acc, d in
baseDouble * acc + Double(Int(String(d), radix: base)!)
}
return GenericParser(result: double)
}
}
private static var doubleIntegerPart: DoubleParser {
return GenericParser.decimalDigit.many1 >>- { digits in
GenericParser(result: Double(String(digits))!)
}
}
private static var naturalNumber: IntParser {
let zeroNumber = GenericParser.character("0") *>
(hexadecimal <|> octal <|> decimal <|> GenericParser(result: 0))
<?> ""
return zeroNumber <|> decimal
}
private static func sign<Number: SignedNumber>()
-> GenericParser<String, UserState, (Number) -> Number> {
return GenericParser.character("-") *> GenericParser(result: -) <|>
GenericParser.character("+") *> GenericParser(result: { $0 }) <|>
GenericParser(result: { $0 })
}
private static func fractionalExponent(_ number: Double) -> DoubleParser {
let fractionMsg = LocalizedString("fraction")
let fract = CharacterParser.character(".") *>
(GenericParser.decimalDigit.many1 <?> fractionMsg).map { digits in
digits.reduceRight(0) { frac, digit in
(frac + Double(String(digit))!) / 10
}
}
let exponentMsg = LocalizedString("exponent")
let expo = GenericParser.oneOf("eE") *> sign() >>- { sign in
(self.decimal <?> exponentMsg) >>- { exp in
GenericParser(result: power(sign(exp)))
}
}
let fraction = (fract <?> fractionMsg) >>- { frac in
(expo <?> exponentMsg).otherwise(1) >>- { exp in
return GenericParser(result: (number + frac) * exp)
}
}
let exponent = expo >>- { exp in
GenericParser(result: number * exp)
}
return fraction <|> exponent
}
private func caseString(_ name: String) -> StrParser {
if languageDefinition.isCaseSensitive {
return StrParser.string(name)
}
func walk(_ string: String) -> VoidParser {
let unit = VoidParser(result: ())
guard !string.isEmpty else { return unit }
var str = string
let c = str.popFirst()!
let charParser: VoidParser
if c.isAlpha {
charParser = (GenericParser.character(c.lowercase) <|>
GenericParser.character(c.uppercase)) *> unit
} else {
charParser = GenericParser.character(c) *> unit
}
return (charParser <?> name) >>- { _ in walk(str) }
}
return walk(name) *> GenericParser(result: name)
}
}
/// Generic implementation of the `TokenParser`.
public struct GenericTokenParser<UserState>: TokenParser {
/// Language definition parameterizing the lexer.
public let languageDefinition: LanguageDefinition<UserState>
/// Creates a `TokenParser` that contains lexical parsers that are defined
/// using the definitions in the `LanguageDefinition` structure.
///
/// One uses the appropiate language definition and selects the lexical
/// parsers that are needed from the resulting `GenericTokenParser`.
///
/// import SwiftParsec
///
/// // The lexer
/// let swiftDef = LanguageDefinition<()>.swift
/// let lexer = GenericTokenParser(languageDefinition: swiftDef)
///
/// // The parser
/// let expression = lexer.identifier <|>
/// lexer.legalOperator <|> ...
///
/// - parameter languageDefinition: Language definition for the lexical
/// parsers.
public init(languageDefinition: LanguageDefinition<UserState>) {
self.languageDefinition = languageDefinition
}
}
private let hexadecimalPrefixes = "xX"
private let octalPrefixes = "oO"
private let substituteCharacter: Character = "\u{001A}"
private let escapeMap: [(esc: Character, code: Character)] = [
("a", "\u{0007}"), ("b", "\u{0008}"), ("f", "\u{000C}"), ("n", "\n"),
("r", "\r"), ("t", "\t"), ("v", "\u{000B}"), ("\\", "\\"), ("\"", "\""),
("'", "'")
]
private let asciiCodesMap: [(esc: String, code:Character)] = [
("NUL", "\u{0000}"), ("SOH", "\u{0001}"), ("STX", "\u{0002}"),
("ETX", "\u{0003}"), ("EOT", "\u{0004}"), ("ENQ", "\u{0005}"),
("ACK", "\u{0006}"), ("BEL", "\u{0007}"), ("BS", "\u{0008}"),
("HT", "\u{0009}"), ("LF", "\u{000A}"), ("VT", "\u{000B}"),
("FF", "\u{000C}"), ("CR", "\u{000D}"), ("SO", "\u{000E}"),
("SI", "\u{000F}"), ("DLE", "\u{0010}"), ("DC1", "\u{0011}"),
("DC2", "\u{0012}"), ("DC3", "\u{0013}"), ("DC4", "\u{0014}"),
("NAK", "\u{0015}"), ("SYN", "\u{0016}"), ("ETB", "\u{0017}"),
("CAN", "\u{0018}"), ("EM", "\u{0019}"), ("SUB", "\u{001A}"),
("ESC", "\u{001B}"), ("FS", "\u{001C}"), ("GS", "\u{001D}"),
("RS", "\u{001E}"), ("US", "\u{001F}"), ("SP", "\u{0020}"),
("DEL", "\u{007F}")
]
private func power(_ exp: Int) -> Double {
if exp < 0 {
return 1.0 / power(-exp)
}
return pow(10.0, Double(exp))
}

View File

@@ -10,8 +10,6 @@
// Operator implementations for the `Message` type.
//==============================================================================
import func Foundation.pow
//==============================================================================
/// Types implementing this protocol hold lexical parsers.
public protocol TokenParser {
@@ -229,975 +227,3 @@ public protocol TokenParser {
) -> GenericParser<String, UserState, [Result]>
}
//==============================================================================
// Default implementation of the methods of the `TokenParser` parser type.
extension TokenParser {
// Type aliases used internally to simplify the code.
typealias StrParser = GenericParser<String, UserState, String>
typealias CharacterParser = GenericParser<String, UserState, Character>
typealias IntParser = GenericParser<String, UserState, Int>
typealias DoubleParser = GenericParser<String, UserState, Double>
typealias IntDoubleParser =
GenericParser<String, UserState, Either<Int, Double>>
typealias VoidParser = GenericParser<String, UserState, ()>
//
// Identifiers & Reserved words
//
/// This lexeme parser parses a legal identifier. Returns the identifier
/// string. This parser will fail on identifiers that are reserved words.
/// Legal identifier (start) characters and reserved words are defined in
/// the `LanguageDefinition` that is passed to the initializer of this token
/// parser. An `identifier` is treated as a single token using
/// `GenericParser.attempt`.
public var identifier: GenericParser<String, UserState, String> {
let langDef = languageDefinition
let ident: StrParser = langDef.identifierStart >>- { char in
langDef.identifierLetter(char).many >>- { chars in
let cs = chars.prepending(char)
return GenericParser(result: String(cs))
}
} <?> LocalizedString("identifier")
let identCheck: StrParser = ident >>- { name in
let reservedNames: Set<String>
let n: String
if langDef.isCaseSensitive {
reservedNames = langDef.reservedNames
n = name
} else {
reservedNames = langDef.reservedNames.map { $0.lowercased() }
n = name.lowercased()
}
guard !reservedNames.contains(n) else {
let reservedWordMsg = LocalizedString("reserved word ")
return GenericParser.unexpected(reservedWordMsg + name)
}
return GenericParser(result: name)
}
return lexeme(identCheck.attempt)
}
/// The lexeme parser `reservedName(name)` parses `symbol(name)`, but it
/// also checks that the `name` is not a prefix of a valid identifier. A
/// _reserved_ word is treated as a single token using
/// `GenericParser.attempt`.
///
/// - parameter name: The reserved name to parse.
/// - returns: A parser returning nothing.
public func reservedName(
_ name: String
) -> GenericParser<String, UserState, ()> {
let lastChar = name.last!
let reserved = caseString(name) *>
languageDefinition.identifierLetter(lastChar).noOccurence <?>
LocalizedString("end of ") + name
return lexeme(reserved.attempt)
}
//
// Operators & reserved operators
//
/// This lexeme parser parses a legal operator and returns the name of the
/// operator. This parser will fail on any operators that are reserved
/// operators. Legal operator (start) characters and reserved operators are
/// defined in the `LanguageDefinition` that is passed to the initializer of
/// this token parser. An 'operator' is treated as a single token using
/// `GenericParser.attempt`.
public var legalOperator: GenericParser<String, UserState, String> {
let langDef = languageDefinition
let op: StrParser = langDef.operatorStart >>- { char in
langDef.operatorLetter.many >>- { chars in
let cs = chars.prepending(char)
return GenericParser(result: String(cs))
}
} <?> LocalizedString("operator")
let opCheck: StrParser = op >>- { name in
guard !langDef.reservedOperators.contains(name) else {
let reservedOperatorMsg = LocalizedString("reserved operator ")
return GenericParser.unexpected(reservedOperatorMsg + name)
}
return GenericParser(result: name)
}
return lexeme(opCheck.attempt)
}
/// The lexeme parser `reservedOperator(name)` parses `symbol(name)`, but it
/// also checks that the `name` is not a prefix of a valid operator. A
/// 'reservedOperator' is treated as a single token using
/// `GenericParser.attempt`.
///
/// - parameter name: The operator name.
/// - returns: A parser returning nothing.
public func reservedOperator(
_ name: String
) -> GenericParser<String, UserState, ()> {
let op = VoidParser.string(name) *>
languageDefinition.operatorLetter.noOccurence <?>
LocalizedString("end of ") + name
return lexeme(op.attempt)
}
//
// Characters & Strings
//
/// This lexeme parser parses a single literal character and returns the
/// literal character value. This parser deals correctly with escape
/// sequences.
public var characterLiteral: GenericParser<String, UserState, Character> {
let characterLetter = CharacterParser.satisfy { char in
char != "'" && char != "\\" && char != substituteCharacter
}
let defaultCharEscape = GenericParser.character("\\") *>
GenericTokenParser<UserState>.escapeCode
let characterEscape =
languageDefinition.characterEscape ?? defaultCharEscape
let character = characterLetter <|> characterEscape <?>
LocalizedString("literal character")
let quote = CharacterParser.character("'")
let endOfCharMsg = LocalizedString("end of character")
return lexeme(character.between(quote, quote <?> endOfCharMsg)) <?>
LocalizedString("character")
}
/// This lexeme parser parses a literal string and returns the literal
/// string value. This parser deals correctly with escape sequences and
/// gaps.
public var stringLiteral: GenericParser<String, UserState, String> {
let stringLetter = CharacterParser.satisfy { char in
char != "\"" && char != "\\" && char != substituteCharacter
}
let escapeGap: GenericParser<String, UserState, Character?> =
GenericParser.space.many1 *> GenericParser.character("\\") *>
GenericParser(result: nil) <?>
LocalizedString("end of string gap")
let escapeEmpty: GenericParser<String, UserState, Character?> =
GenericParser.character("&") *> GenericParser(result: nil)
let characterEscape = GenericParser.character("\\") *>
(escapeGap <|> escapeEmpty <|>
GenericTokenParser.escapeCode.map { $0 })
let stringEscape =
languageDefinition.characterEscape?.map { $0 } ?? characterEscape
let stringChar = stringLetter.map { $0 } <|> stringEscape
let doubleQuote = CharacterParser.character("\"")
let endOfStringMsg = LocalizedString("end of string")
let string = stringChar.many.between(
doubleQuote,
doubleQuote <?> endOfStringMsg
)
let literalString = string.map({ str in
str.reduce("") { (acc, char) in
guard let c = char else { return acc }
return acc.appending(c)
}
}) <?> LocalizedString("literal string")
return lexeme(literalString)
}
//
// Numbers
//
/// This lexeme parser parses a natural number (a positive whole number) and
/// returns the value of the number. The number can be specified in
/// 'decimal', 'hexadecimal' or 'octal'.
public var natural: GenericParser<String, UserState, Int> {
return lexeme(GenericTokenParser.naturalNumber) <?>
LocalizedString("natural")
}
/// This lexeme parser parses an integer (a whole number). This parser is
/// like `natural` except that it can be prefixed with sign (i.e. "-" or
/// "+"). It returns the value of the number. The number can be specified in
/// 'decimal', 'hexadecimal' or 'octal'.
public var integer: GenericParser<String, UserState, Int> {
let int = lexeme(GenericTokenParser.sign()) >>- { f in
GenericTokenParser.naturalNumber >>- {
GenericParser(result: f($0))
}
}
return lexeme(int) <?> LocalizedString("integer")
}
/// This lexeme parser parses an integer (a whole number). It is like
/// `integer` except that it can parse bigger numbers. Returns the value of
/// the number as a `Double`.
public var integerAsFloat: GenericParser<String, UserState, Double> {
let hexaPrefix = CharacterParser.oneOf(hexadecimalPrefixes)
let hexa = hexaPrefix *> GenericTokenParser.doubleWithBase(
16,
parser: GenericParser.hexadecimalDigit
)
let octPrefix = CharacterParser.oneOf(octalPrefixes)
let oct = octPrefix *> GenericTokenParser.doubleWithBase(
8,
parser: GenericParser.octalDigit
)
let decDigit = CharacterParser.decimalDigit
let dec = GenericTokenParser.doubleWithBase(10, parser: decDigit)
let zeroNumber = (GenericParser.character("0") *>
(hexa <|> oct <|> dec <|> GenericParser(result: 0))) <?> ""
let nat = zeroNumber <|> dec
let double = lexeme(GenericTokenParser.sign()) >>- { sign in
nat >>- { GenericParser(result: sign($0)) }
}
return lexeme(double) <?> LocalizedString("integer")
}
/// This lexeme parser parses a floating point value and returns the value
/// of the number.
public var float: GenericParser<String, UserState, Double> {
let intPart = GenericTokenParser<UserState>.doubleIntegerPart
let expPart = GenericTokenParser<UserState>.fractionalExponent
let f = intPart >>- { expPart($0) }
let double = lexeme(GenericTokenParser.sign()) >>- { sign in
f >>- { GenericParser(result: sign($0)) }
}
return lexeme(double) <?> LocalizedString("float")
}
/// This lexeme parser parses either `integer` or a `float` and returns the
/// value of the number. This parser deals with any overlap in the grammar
/// rules for integers and floats.
public var number: GenericParser<String, UserState, Either<Int, Double>> {
let intDouble = float.map({ Either.right($0) }).attempt <|>
integer.map({ Either.left($0) })
return lexeme(intDouble) <?> LocalizedString("number")
}
/// Parses a positive whole number in the decimal system. Returns the value
/// of the number.
public static var decimal: GenericParser<String, UserState, Int> {
return numberWithBase(10, parser: GenericParser.decimalDigit)
}
/// Parses a positive whole number in the hexadecimal system. The number
/// should be prefixed with "x" or "X". Returns the value of the number.
public static var hexadecimal: GenericParser<String, UserState, Int> {
return GenericParser.oneOf(hexadecimalPrefixes) *>
numberWithBase(16, parser: GenericParser.hexadecimalDigit)
}
/// Parses a positive whole number in the octal system. The number should be
/// prefixed with "o" or "O". Returns the value of the number.
public static var octal: GenericParser<String, UserState, Int> {
return GenericParser.oneOf(octalPrefixes) *>
numberWithBase(8, parser: GenericParser.octalDigit)
}
//
// White space & symbols
//
/// Lexeme parser `symbol(str)` parses `str` and skips trailing white space.
///
/// - parameter name: The name of the symbol to parse.
/// - returns: `name`.
public func symbol(
_ name: String
) -> GenericParser<String, UserState, String> {
return lexeme(StrParser.string(name))
}
/// `lexeme(parser)` first applies `parser` and than the `whiteSpace`
/// parser, returning the value of `parser`. Every lexical token (lexeme) is
/// defined using `lexeme`, this way every parse starts at a point without
/// white space. Parsers that use `lexeme` are called _lexeme_ parsers in
/// this document.
///
/// The only point where the 'whiteSpace' parser should be called explicitly
/// is the start of the main parser in order to skip any leading white
/// space.
///
/// let mainParser = sum <^> whiteSpace *> lexeme(digit) <* eof
///
/// - parameter parser: The parser to transform in a 'lexeme'.
/// - returns: The value of `parser`.
public func lexeme<Result>(
_ parser: GenericParser<String, UserState, Result>
) -> GenericParser<String, UserState, Result> {
return parser <* whiteSpace
}
/// Parses any white space. White space consists of _zero_ or more
/// occurrences of a 'space', a line comment or a block (multiline) comment.
/// Block comments may be nested. How comments are started and ended is
/// defined in the `LanguageDefinition` that is passed to the initializer of
/// this token parser.
public var whiteSpace: GenericParser<String, UserState, ()> {
let simpleSpace = CharacterParser.satisfy({ $0.isSpace }).skipMany1
let commentLineEmpty = languageDefinition.commentLine.isEmpty
let commentStartEmpty = languageDefinition.commentStart.isEmpty
if commentLineEmpty && commentStartEmpty {
return (simpleSpace <?> "").skipMany
}
if commentLineEmpty {
return (simpleSpace <|> multiLineComment <?> "").skipMany
}
if commentStartEmpty {
return (simpleSpace <|> oneLineComment <?> "").skipMany
}
return (
simpleSpace <|> oneLineComment <|> multiLineComment <?> ""
).skipMany
}
//
// Bracketing
//
/// Lexeme parser `parentheses(parser)` parses `parser` enclosed in
/// parentheses, returning the value of `parser`.
///
/// - parameter parser: The parser applied between the parentheses.
/// - returns: The value of `parser`.
public func parentheses<Result>(
_ parser: GenericParser<String, UserState, Result>
) -> GenericParser<String, UserState, Result> {
return parser.between(symbol("("), symbol(")"))
}
/// Lexeme parser `braces(parser)` parses `parser` enclosed in braces "{"
/// and "}", returning the value of `parser`.
///
/// - parameter parser: The parser applied between the braces.
/// - returns: The value of `parser`.
public func braces<Result>(
_ parser: GenericParser<String, UserState, Result>
) -> GenericParser<String, UserState, Result> {
return parser.between(symbol("{"), symbol("}"))
}
/// Lexeme parser `angles(parser)` parses `parser` enclosed in angle
/// brackets "<" and ">", returning the value of `parser`.
///
/// - parameter parser: The parser applied between the angles.
/// - returns: The value of `parser`.
public func angles<Result>(
_ parser: GenericParser<String, UserState, Result>
) -> GenericParser<String, UserState, Result> {
return parser.between(symbol("<"), symbol(">"))
}
/// Lexeme parser `brackets(parser)` parses `parser` enclosed in brackets
/// "[" and "]", returning the value of `parser`.
///
/// - parameter parser: The parser applied between the brackets.
/// - returns: The value of `parser`.
public func brackets<Result>(
_ parser: GenericParser<String, UserState, Result>
) -> GenericParser<String, UserState, Result> {
return parser.between(symbol("["), symbol("]"))
}
/// Lexeme parser `semicolon` parses the character ";" and skips any
/// trailing white space. Returns the string ";".
public var semicolon: GenericParser<String, UserState, String> {
return symbol(";")
}
/// Lexeme parser `comma` parses the character "," and skips any trailing
/// white space. Returns the string ",".
public var comma: GenericParser<String, UserState, String> {
return symbol(",")
}
/// Lexeme parser `colon` parses the character ":" and skips any trailing
/// white space. Returns the string ":".
public var colon: GenericParser<String, UserState, String> {
return symbol(":")
}
/// Lexeme parser `dot` parses the character "." and skips any trailing
/// white space. Returns the string ".".
public var dot: GenericParser<String, UserState, String> {
return symbol(".")
}
/// Lexeme parser `semicolonSeperated(parser)` parses _zero_ or more
/// occurrences of `parser` separated by `semicolon`. Returns an array of
/// values returned by `parser`.
///
/// - parameter parser: The parser applied between semicolons.
/// - returns: An array of values returned by `parser`.
public func semicolonSeparated<Result>(
_ parser: GenericParser<String, UserState, Result>
) -> GenericParser<String, UserState, [Result]> {
return parser.separatedBy(semicolon)
}
/// Lexeme parser `semicolonSeperated1(parser)` parses _one_ or more
/// occurrences of `parser` separated by `semicolon`. Returns an array of
/// values returned by `parser`.
///
/// - parameter parser: The parser applied between semicolons.
/// - returns: An array of values returned by `parser`.
public func semicolonSeparated1<Result>(
_ parser: GenericParser<String, UserState, Result>
) -> GenericParser<String, UserState, [Result]> {
return parser.separatedBy1(semicolon)
}
/// Lexeme parser `commaSeparated(parser)` parses _zero_ or more occurrences
/// of `parser` separated by `comma`. Returns an array of values returned by
/// `parser`.
///
/// - parameter parser: The parser applied between commas.
/// - returns: An array of values returned by `parser`.
public func commaSeparated<Result>(
_ parser: GenericParser<String, UserState, Result>
) -> GenericParser<String, UserState, [Result]> {
return parser.separatedBy(comma)
}
/// Lexeme parser `commaSeparated1(parser)` parses _one_ or more occurrences
/// of `parser` separated by `comma`. Returns an array of values returned by
/// `parser`.
///
/// - parameter parser: The parser applied between commas.
/// - returns: An array of values returned by `parser`.
public func commaSeparated1<Result>(
_ parser: GenericParser<String, UserState, Result>
) -> GenericParser<String, UserState, [Result]> {
return parser.separatedBy1(comma)
}
//
// Private methods. They sould be in a separate private extension but it
// causes problems with the internal typealiases.
//
private var oneLineComment: VoidParser {
let commentStart = StrParser.string(languageDefinition.commentLine)
return commentStart.attempt *>
GenericParser.satisfy({ $0 != "\n"}).skipMany *>
GenericParser(result: ())
}
private var multiLineComment: VoidParser {
return GenericParser {
let commentStart =
StrParser.string(self.languageDefinition.commentStart)
return commentStart.attempt *> self.inComment
}
}
private var inComment: VoidParser {
return languageDefinition.allowNestedComments ?
inNestedComment : inNonNestedComment
}
private var inNestedComment: VoidParser {
return GenericParser {
let langDef = self.languageDefinition
let startEnd = (
langDef.commentStart + langDef.commentEnd
).removingDuplicates()
let commentEnd = StrParser.string(langDef.commentEnd)
return commentEnd.attempt *> GenericParser(result: ()) <|>
self.multiLineComment *> self.inNestedComment <|>
GenericParser.noneOf(startEnd).skipMany1 *>
self.inNestedComment <|> GenericParser.oneOf(startEnd) *>
self.inNestedComment <?> LocalizedString("end of comment")
}
}
private var inNonNestedComment: VoidParser {
return GenericParser {
let langDef = self.languageDefinition
let startEnd = (
langDef.commentStart + langDef.commentEnd
).removingDuplicates()
let commentEnd = StrParser.string(langDef.commentEnd)
return commentEnd.attempt *> GenericParser(result: ()) <|>
GenericParser.noneOf(startEnd).skipMany1 *>
self.inNonNestedComment <|> GenericParser.oneOf(startEnd) *>
self.inNonNestedComment <?> LocalizedString("end of comment")
}
}
private static var escapeCode: CharacterParser {
return charEscape <|> charNumber <|> charAscii <|> charControl <?>
LocalizedString("escape code")
}
private static var charEscape: CharacterParser {
let parsers = escapeMap.map { escCode in
CharacterParser.character(escCode.esc) *>
GenericParser(result: escCode.code)
}
return GenericParser.choice(parsers)
}
private static var charNumber: CharacterParser {
let octalDigit = CharacterParser.octalDigit
let hexaDigit = CharacterParser.hexadecimalDigit
let num = decimal <|>
GenericParser.character("o") *>
numberWithBase(8, parser: octalDigit) <|>
GenericParser.character("x") *>
numberWithBase(16, parser: hexaDigit)
return num >>- { characterFromInt($0) }
}
private static var charAscii: CharacterParser {
let parsers = asciiCodesMap.map { control in
StrParser.string(control.esc) *> GenericParser(result: control.code)
}
return GenericParser.choice(parsers)
}
private static var charControl: CharacterParser {
let upper = "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
let ctrlCodes: CharacterParser =
GenericParser.oneOf(upper).flatMap { char in
let charA: Character = "A"
let value = char.unicodeScalar.value - charA.unicodeScalar.value + 1
let unicode = UnicodeScalar.fromUInt32(value)!
return GenericParser(result: Character(unicode))
}
return GenericParser.character("^") *> (ctrlCodes <|>
GenericParser.character("@") *> GenericParser(result: "\0") <|>
GenericParser.character("[") *>
GenericParser(result: "\u{001B}") <|>
GenericParser.character("]") *>
GenericParser(result: "\u{001C}") <|>
GenericParser.character("\\") *>
GenericParser(result: "\u{001D}") <|>
GenericParser.character("^") *>
GenericParser(result: "\u{001E}") <|>
GenericParser.character("_") *> GenericParser(result: "\u{001F}"))
}
static func characterFromInt(_ v: Int) -> CharacterParser {
guard let us = UnicodeScalar.fromInt(v) else {
let outsideMsg =
LocalizedString("value outside of Unicode codespace")
return GenericParser.fail(outsideMsg)
}
return GenericParser(result: Character(us))
}
private static func numberWithBase(
_ base: Int,
parser: CharacterParser
) -> IntParser {
return parser.many1 >>- { digits in
return integerWithDigits(String(digits), base: base)
}
}
static func integerWithDigits(_ digits: String, base: Int) -> IntParser {
guard let integer = Int(digits, radix: base) else {
let overflowMsg = LocalizedString("Int overflow")
return GenericParser.fail(overflowMsg)
}
return GenericParser(result: integer)
}
private static func doubleWithBase(
_ base: Int,
parser: CharacterParser
) -> DoubleParser {
let baseDouble = Double(base)
return parser.many1 >>- { digits in
let double = digits.reduce(0.0) { acc, d in
baseDouble * acc + Double(Int(String(d), radix: base)!)
}
return GenericParser(result: double)
}
}
private static var doubleIntegerPart: DoubleParser {
return GenericParser.decimalDigit.many1 >>- { digits in
GenericParser(result: Double(String(digits))!)
}
}
private static var naturalNumber: IntParser {
let zeroNumber = GenericParser.character("0") *>
(hexadecimal <|> octal <|> decimal <|> GenericParser(result: 0))
<?> ""
return zeroNumber <|> decimal
}
private static func sign<Number: SignedNumber>()
-> GenericParser<String, UserState, (Number) -> Number> {
return GenericParser.character("-") *> GenericParser(result: -) <|>
GenericParser.character("+") *> GenericParser(result: { $0 }) <|>
GenericParser(result: { $0 })
}
private static func fractionalExponent(_ number: Double) -> DoubleParser {
let fractionMsg = LocalizedString("fraction")
let fract = CharacterParser.character(".") *>
(GenericParser.decimalDigit.many1 <?> fractionMsg).map { digits in
digits.reduceRight(0) { frac, digit in
(frac + Double(String(digit))!) / 10
}
}
let exponentMsg = LocalizedString("exponent")
let expo = GenericParser.oneOf("eE") *> sign() >>- { sign in
(self.decimal <?> exponentMsg) >>- { exp in
GenericParser(result: power(sign(exp)))
}
}
let fraction = (fract <?> fractionMsg) >>- { frac in
(expo <?> exponentMsg).otherwise(1) >>- { exp in
return GenericParser(result: (number + frac) * exp)
}
}
let exponent = expo >>- { exp in
GenericParser(result: number * exp)
}
return fraction <|> exponent
}
private func caseString(_ name: String) -> StrParser {
if languageDefinition.isCaseSensitive {
return StrParser.string(name)
}
func walk(_ string: String) -> VoidParser {
let unit = VoidParser(result: ())
guard !string.isEmpty else { return unit }
var str = string
let c = str.popFirst()!
let charParser: VoidParser
if c.isAlpha {
charParser = (GenericParser.character(c.lowercase) <|>
GenericParser.character(c.uppercase)) *> unit
} else {
charParser = GenericParser.character(c) *> unit
}
return (charParser <?> name) >>- { _ in walk(str) }
}
return walk(name) *> GenericParser(result: name)
}
}
/// Generic implementation of the `TokenParser`.
public struct GenericTokenParser<UserState>: TokenParser {
/// Language definition parameterizing the lexer.
public let languageDefinition: LanguageDefinition<UserState>
/// Creates a `TokenParser` that contains lexical parsers that are defined
/// using the definitions in the `LanguageDefinition` structure.
///
/// One uses the appropiate language definition and selects the lexical
/// parsers that are needed from the resulting `GenericTokenParser`.
///
/// import SwiftParsec
///
/// // The lexer
/// let swiftDef = LanguageDefinition<()>.swift
/// let lexer = GenericTokenParser(languageDefinition: swiftDef)
///
/// // The parser
/// let expression = lexer.identifier <|>
/// lexer.legalOperator <|> ...
///
/// - parameter languageDefinition: Language definition for the lexical
/// parsers.
public init(languageDefinition: LanguageDefinition<UserState>) {
self.languageDefinition = languageDefinition
}
}
private let hexadecimalPrefixes = "xX"
private let octalPrefixes = "oO"
private let substituteCharacter: Character = "\u{001A}"
private let escapeMap: [(esc: Character, code: Character)] = [
("a", "\u{0007}"), ("b", "\u{0008}"), ("f", "\u{000C}"), ("n", "\n"),
("r", "\r"), ("t", "\t"), ("v", "\u{000B}"), ("\\", "\\"), ("\"", "\""),
("'", "'")
]
private let asciiCodesMap: [(esc: String, code:Character)] = [
("NUL", "\u{0000}"), ("SOH", "\u{0001}"), ("STX", "\u{0002}"),
("ETX", "\u{0003}"), ("EOT", "\u{0004}"), ("ENQ", "\u{0005}"),
("ACK", "\u{0006}"), ("BEL", "\u{0007}"), ("BS", "\u{0008}"),
("HT", "\u{0009}"), ("LF", "\u{000A}"), ("VT", "\u{000B}"),
("FF", "\u{000C}"), ("CR", "\u{000D}"), ("SO", "\u{000E}"),
("SI", "\u{000F}"), ("DLE", "\u{0010}"), ("DC1", "\u{0011}"),
("DC2", "\u{0012}"), ("DC3", "\u{0013}"), ("DC4", "\u{0014}"),
("NAK", "\u{0015}"), ("SYN", "\u{0016}"), ("ETB", "\u{0017}"),
("CAN", "\u{0018}"), ("EM", "\u{0019}"), ("SUB", "\u{001A}"),
("ESC", "\u{001B}"), ("FS", "\u{001C}"), ("GS", "\u{001D}"),
("RS", "\u{001E}"), ("US", "\u{001F}"), ("SP", "\u{0020}"),
("DEL", "\u{007F}")
]
private func power(_ exp: Int) -> Double {
if exp < 0 {
return 1.0 / power(-exp)
}
return pow(10.0, Double(exp))
}

View File

@@ -12,7 +12,7 @@
5703BE1D1BEB9A4F003FF5FA /* PermutationTests.swift in Sources */ = {isa = PBXBuildFile; fileRef = 5703BE1C1BEB9A4F003FF5FA /* PermutationTests.swift */; };
570B24891CD7F4400060E452 /* Parsec.swift in Sources */ = {isa = PBXBuildFile; fileRef = 570B24881CD7F4400060E452 /* Parsec.swift */; };
570DB5B51BC3422B0058D186 /* TokenParser.swift in Sources */ = {isa = PBXBuildFile; fileRef = 570DB5B41BC3422A0058D186 /* TokenParser.swift */; };
5714D8BE1BCE90DC00FE4BF0 /* TokenParserTests.swift in Sources */ = {isa = PBXBuildFile; fileRef = 5714D8BD1BCE90DC00FE4BF0 /* TokenParserTests.swift */; };
5714D8BE1BCE90DC00FE4BF0 /* DefaultTokenParserTests.swift in Sources */ = {isa = PBXBuildFile; fileRef = 5714D8BD1BCE90DC00FE4BF0 /* DefaultTokenParserTests.swift */; };
5714D8C01BCEAE3B00FE4BF0 /* LanguageDefinition.swift in Sources */ = {isa = PBXBuildFile; fileRef = 5714D8BF1BCEAE3A00FE4BF0 /* LanguageDefinition.swift */; };
57167F0D1B99E66800F35A29 /* SwiftParsec.framework in Frameworks */ = {isa = PBXBuildFile; fileRef = 57167F021B99E66800F35A29 /* SwiftParsec.framework */; };
57167F121B99E66800F35A29 /* GenericParserTests.swift in Sources */ = {isa = PBXBuildFile; fileRef = 57167F111B99E66800F35A29 /* GenericParserTests.swift */; };
@@ -29,6 +29,7 @@
577261F81BE14A3700E70BEF /* ErrorMessageTest.swift in Sources */ = {isa = PBXBuildFile; fileRef = 577261F71BE14A3700E70BEF /* ErrorMessageTest.swift */; };
578435561BF66B3A0067B3E9 /* PositionTests.swift in Sources */ = {isa = PBXBuildFile; fileRef = 578435551BF66B3A0067B3E9 /* PositionTests.swift */; };
578E3B101CA4B014005B83B8 /* SwiftParsec.podspec in Resources */ = {isa = PBXBuildFile; fileRef = 578E3B0F1CA4B014005B83B8 /* SwiftParsec.podspec */; };
579471751DA96D330046625A /* DefaultTokenParser.swift in Sources */ = {isa = PBXBuildFile; fileRef = 579471741DA96D330046625A /* DefaultTokenParser.swift */; };
579C00C81C0277CA009282A6 /* UInt16.swift in Sources */ = {isa = PBXBuildFile; fileRef = 579C00C71C0277CA009282A6 /* UInt16.swift */; };
579ED1211D3AE9C10078D6F4 /* JSONBenchmarkTests.swift in Sources */ = {isa = PBXBuildFile; fileRef = 579ED1201D3AE9C10078D6F4 /* JSONBenchmarkTests.swift */; };
57A86B0B1BC862CC00A7F45F /* CollectionAggregation.swift in Sources */ = {isa = PBXBuildFile; fileRef = 57A86B0A1BC862CC00A7F45F /* CollectionAggregation.swift */; };
@@ -62,7 +63,7 @@
5703BE1C1BEB9A4F003FF5FA /* PermutationTests.swift */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.swift; path = PermutationTests.swift; sourceTree = "<group>"; };
570B24881CD7F4400060E452 /* Parsec.swift */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.swift; path = Parsec.swift; sourceTree = "<group>"; };
570DB5B41BC3422A0058D186 /* TokenParser.swift */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.swift; path = TokenParser.swift; sourceTree = "<group>"; };
5714D8BD1BCE90DC00FE4BF0 /* TokenParserTests.swift */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.swift; path = TokenParserTests.swift; sourceTree = "<group>"; };
5714D8BD1BCE90DC00FE4BF0 /* DefaultTokenParserTests.swift */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.swift; path = DefaultTokenParserTests.swift; sourceTree = "<group>"; };
5714D8BF1BCEAE3A00FE4BF0 /* LanguageDefinition.swift */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.swift; path = LanguageDefinition.swift; sourceTree = "<group>"; };
57167F021B99E66800F35A29 /* SwiftParsec.framework */ = {isa = PBXFileReference; explicitFileType = wrapper.framework; includeInIndex = 0; path = SwiftParsec.framework; sourceTree = BUILT_PRODUCTS_DIR; };
57167F071B99E66800F35A29 /* Info.plist */ = {isa = PBXFileReference; lastKnownFileType = text.plist.xml; path = Info.plist; sourceTree = "<group>"; };
@@ -85,6 +86,7 @@
578435551BF66B3A0067B3E9 /* PositionTests.swift */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.swift; path = PositionTests.swift; sourceTree = "<group>"; };
578435571BF692380067B3E9 /* LICENSE */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = text; path = LICENSE; sourceTree = "<group>"; };
578E3B0F1CA4B014005B83B8 /* SwiftParsec.podspec */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = text; path = SwiftParsec.podspec; sourceTree = "<group>"; };
579471741DA96D330046625A /* DefaultTokenParser.swift */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.swift; path = DefaultTokenParser.swift; sourceTree = "<group>"; };
579C00C71C0277CA009282A6 /* UInt16.swift */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.swift; path = UInt16.swift; sourceTree = "<group>"; };
579C00CD1C03EC60009282A6 /* en.lproj */ = {isa = PBXFileReference; lastKnownFileType = folder; name = en.lproj; path = Sources/SwiftParsec/en.lproj; sourceTree = "<group>"; };
579C00D31C03F16E009282A6 /* README.md */ = {isa = PBXFileReference; lastKnownFileType = net.daringfireball.markdown; path = README.md; sourceTree = "<group>"; };
@@ -158,6 +160,7 @@
57E6F7821BAA1C260015E875 /* CharacterParsers.swift */,
57AA4E001BB777FF0026DF01 /* CombinatorParsers.swift */,
57C995DA1CF86906000DA292 /* Configuration.swift */,
579471741DA96D330046625A /* DefaultTokenParser.swift */,
5744A3A21D8EC52700CA7191 /* Either.swift */,
57C956681BDAE01D00057A4F /* ExpressionParser.swift */,
57167F1C1B99E6E200F35A29 /* GenericParser.swift */,
@@ -186,7 +189,7 @@
578435551BF66B3A0067B3E9 /* PositionTests.swift */,
57ABA23B1CF5F39B00C2B657 /* StringTests.swift */,
5768B5771BB0773A005FF951 /* TestUtilities.swift */,
5714D8BD1BCE90DC00FE4BF0 /* TokenParserTests.swift */,
5714D8BD1BCE90DC00FE4BF0 /* DefaultTokenParserTests.swift */,
57C459771CE2150D0014764F /* UnicodeScalarTests.swift */,
57167F131B99E66800F35A29 /* Info.plist */,
579ED1221D3AF4680078D6F4 /* SampleJSON.json */,
@@ -328,6 +331,7 @@
57E6F77F1BA7414B0015E875 /* SequenceAggregation.swift in Sources */,
57C995DB1CF86906000DA292 /* Configuration.swift in Sources */,
5744A3A31D8EC52700CA7191 /* Either.swift in Sources */,
579471751DA96D330046625A /* DefaultTokenParser.swift in Sources */,
57AA4E011BB777FF0026DF01 /* CombinatorParsers.swift in Sources */,
5714D8C01BCEAE3B00FE4BF0 /* LanguageDefinition.swift in Sources */,
579C00C81C0277CA009282A6 /* UInt16.swift in Sources */,
@@ -353,7 +357,7 @@
buildActionMask = 2147483647;
files = (
5768B5781BB0773A005FF951 /* TestUtilities.swift in Sources */,
5714D8BE1BCE90DC00FE4BF0 /* TokenParserTests.swift in Sources */,
5714D8BE1BCE90DC00FE4BF0 /* DefaultTokenParserTests.swift in Sources */,
577261F81BE14A3700E70BEF /* ErrorMessageTest.swift in Sources */,
57AA4E031BB77A0F0026DF01 /* CombinatorParsersTests.swift in Sources */,
57ABA23C1CF5F39B00C2B657 /* StringTests.swift in Sources */,

View File

@@ -1,5 +1,5 @@
//==============================================================================
// TokenParserTests.swift
// DefaultTokenParserTests.swift
// SwiftParsec
//
// Created by David Dufresne on 2015-10-14.
@@ -9,7 +9,7 @@
import XCTest
@testable import SwiftParsec
class TokenTests: XCTestCase {
class DefaultTokenParserTests: XCTestCase {
func testIdentifier() {
//