Horrendously slow parsing, what did i miss?

35 views
Skip to first unread message

Raul Bache

unread,
May 30, 2016, 7:52:18 AM5/30/16
to parboiled2.org User List
Hello. I have a file consisting of ~50k lines like this:

"""package com.arne.test {
| class Majskorv {
| method public static java.lang.String majsa();
| field public static java.lang.String BANANKONSTANT = "Hurra";
| }
|}""".stripMargin

And a parser that looks like this: 

case class Packages(l: Seq[Package]) {
override def toString = "TODO"
}
case class Package(name: String, classes: Seq[Class])
case class Class(name: String, members: Seq[Member])

case class Parameter(typ: String, typeArguments: Seq[String])

sealed trait Member
case class Method(rtyp: String, name: String, params: Seq[Parameter]) extends Member
case class Field(typ: String, name: String) extends Member
case class Constructor(typ: String, params: Seq[Parameter]) extends Member

class MyParser(val input: ParserInput) extends Parser {
def Root: Rule1[Packages] = rule {
zeroOrMore(aPackage).separatedBy(pws) ~ EOI ~> ((l: Seq[Package]) => new Packages(l))
}
def aPackage: Rule1[Package] = rule {
("package" ~ s ~ packageName ~ s ~ "{" ~ pws ~ classes ~ "}") ~> ((name: String, clz: Seq[Class]) => new Package(name, clz))
}

def s = CharPredicate(" ")
def ps = rule { zeroOrMore(s) }
def ws = CharPredicate(" \t\n")
def pws = rule { zeroOrMore(ws) }

def packageName: Rule1[String] = rule {
capture(oneOrMore(CharPredicate.AlphaNum | anyOf("_.")))
}

def classes: Rule1[Seq[Class]] = rule {
zeroOrMore(clazz).separatedBy(pws)
}

def implements = rule {
s ~ "implements" ~ s ~ oneOrMore(classname).separatedBy(" ")
}

def extend = rule {
s ~ "extends" ~ s ~ classname
}

def clazz: Rule1[Class] = rule {
ps ~ modifiers ~ s ~ ("class" | "interface") ~ s ~ classname ~ optional(extend) ~ optional(implements) ~ s ~ "{" ~
ws ~ zeroOrMore(methodOrField) ~
"}" ~ pws ~> ((mods: Seq[String], clname: String, ext: Option[String], impl: Option[Seq[String]], members: Seq[Member]) => { println("class"); Class(clname, members) })
}

def validName = rule { CharPredicate.AlphaNum }

def paramName: Rule1[Parameter] = rule {
parameterizedType ~ optional(typeArgs) ~>
((typ: String, typeArgs: Option[Seq[String]]) => Parameter(typ, typeArgs.getOrElse(Nil)))
}
def classname: Rule1[String] = packageName
def parameterizedType: Rule1[String] = rule { packageName ~ optional(oneOrMore("[]")) }
def methodName: Rule1[String] = packageName
def methodOrField = rule { method | field | ctor }

def strOrInt = rule {
('"' ~ zeroOrMore(CharPredicate.All) ~ '"') | (optional('-') ~ oneOrMore(CharPredicate.Digit) ~ optional(anyOf("lLfFdD")))
}

def comment = rule {
ps ~ "//" ~ ws ~ zeroOrMore(CharPredicate.Visible | ' ')
}

def field: Rule1[Member] = rule {
ps ~ "field" ~ s ~ modifiers ~ s ~ parameterizedType ~ s ~ methodName ~ fieldBody ~ optional(comment) ~ ws ~>
((mods: Seq[String], ret: String, nme: String) => Field(ret, nme))
}

def method: Rule1[Member] = rule {
(ps ~ "method" ~ s ~ modifiers ~ s ~ parameterizedType ~ optional(typeArgs) ~ s ~ methodName ~
"(" ~ zeroOrMore(paramName).separatedBy(", ") ~ ")" ~ optional(throwsClause) ~ ";" ~ ws) ~>
((mods: Seq[String], ret: String, types: Option[Seq[String]], mname: String, params: Seq[Parameter], opt: Option[Seq[String]]) => new Method(ret, mname, params))
}

def ctor = rule {
ps ~ "ctor" ~ pws ~ modifiers ~ pws ~ classname ~ optional(typeArgs) ~ "(" ~ zeroOrMore(paramName).separatedBy(", ") ~ ");" ~ pws ~>
((mods: Seq[String], nme: String, types: Option[Seq[String]], params: Seq[Parameter]) => Constructor(nme, params))
}

def typeArg: Rule1[String] = rule {
wildcard | wildcard2 | parameterizedType
}
def wildcard: Rule1[String] = rule {
"? extends " ~ parameterizedType
}
def wildcard2: Rule1[String] = rule { capture("?") }

def typeArgs = rule {
"<" ~ oneOrMore(typeArg).separatedBy(", ") ~ ">"
}

def throwsClause = rule {
" throws " ~ oneOrMore(classname).separatedBy(", ")
}

def modifiers: Rule1[Seq[String]] = rule {
zeroOrMore(modifier).separatedBy(ws)
}
def modifier: Rule1[String] = rule {
capture("public" | "static" | "abstract" | "protected" | "private" | "final" | "deprecated")
}
def fieldBody = rule {
(" = " ~ strOrInt ~ ";") | ";"
}
}

The parsing will never end however. Any hint on how to debug or improve the speed of the parsing?

Thanks! / Raul

Mathias Doenitz

unread,
May 30, 2016, 7:55:42 AM5/30/16
to parboil...@googlegroups.com
Raul,

you probably have either a "no-progress-recursion" somewhere (i.e. a rule recursion that the parser enters without consuming at least a single character in each iteration)
or some exponential parsing behaviour.

I'd try to debug it by reducing the example input as much as possible and adding something `... ~ run(print(cursorChar))` at strategic points in your grammar.

HTH and cheers,
Mathias

---
mat...@parboiled.org
http://www.parboiled.org
> --
> You received this message because you are subscribed to the Google Groups "parboiled2.org User List" group.
> To unsubscribe from this group and stop receiving emails from it, send an email to parboiled-use...@googlegroups.com.
> Visit this group at https://groups.google.com/group/parboiled-user.
> To view this discussion on the web visit https://groups.google.com/d/msgid/parboiled-user/b694c5e8-8c3c-43be-8929-88df31f23fbb%40googlegroups.com.
> For more options, visit https://groups.google.com/d/optout.

Reply all
Reply to author
Forward
0 new messages