grammar CSV;
// start of parser
file : hdr row+ ;
hdr : row ;
row : field (',' field)* '\r'? '\n' ;
field : TEXT | STRING | ;
// start of lexer
TEXT : ~[,\n\r"]+ ;
STRING : '"' ('""'|~'"')* '"' ; // quote-quote is an escaped quote
p1 ~ p2 // sequencing: must match p1 followed by p2
p1 | p2 // alternation: must match either p1 or p2,
// with preference given to p1
p1 ||| p2 // alternation: If p and q both succeed,
// the parser that consumed the most characters accepts
p1.? // optionality: may match p1 or not
p1.+ // repetition: matches 1 or more repetitions of p1
p1.* // repetition: matches any number of repetitions of p1
p1 ~> p2 // a parser combinator for sequential
// composition which keeps only the right result.
p1 <~ p2 // a parser combinator for sequential
// composition which keeps only the left result.
import util.parsing.combinator.RegexParsers
trait CSVParser extends RegexParsers {
// adjust handling of white space to ANTLR 4 characteristics
override val skipWhitespace = false
override val whiteSpace = """[ \t]""".r
// start of parser
def file: Parser[List[List[String]]] = hdr ~ row.+ ^^ {
case header ~ rows => header :: rows
}
def hdr: Parser[List[String]] = row
def row: Parser[List[String]] = field ~ ("," ~> field).* <~ "\r".? <~ "\n" ^^ {
case field ~ fields => field :: fields
}
def field: Parser[String] = TEXT ||| STRING | EMPTY
// start of lexer
lazy val TEXT: Parser[String] = ("[^,\n\r\"]".r).+ ^^ makeText
lazy val STRING: Parser[String] = "\"" ~> ("\"\"" | "[^\"]".r).* <~ "\"" ^^ makeString
lazy val EMPTY: Parser[String] = "" ^^ makeEmpty
// signatures
def makeText: List[String] => String
def makeString: List[String] => String
def makeEmpty: String => String
}
trait CSVLexerAction {
// remove leading and trailing blanks
def makeText = (text: List[String]) => text.mkString("").trim
// remove embracing quotation marks
// replace to consecutive quotes by a single quote
def makeString = (string: List[String]) => string.mkString("").replaceAll("\"\"", "\"")
// modify result of EMPTY token if required
def makeEmpty = (string: String) => ""
}
import java.io.FileReader
object CSVParserCLI extends CSVParser with CSVLexerAction {
def main(args: Array[String]) {
println(parseAll(file, new FileReader(args(0))))
}
}
Ronan