@grammar mathgrammar begin start = expr
number = r"([0-9]+)" expr = (term + op1 + expr) | term term = (factor + op2 + term) | factor factor = number | pfactor pfactor = ('(' + expr + ')') op1 = '+' | '-' op2 = '*' | '/'end
(node, pos, error) = parse(mathgrammar, "5*(2-6)")
math = Dict()
math["number"] = (node, children) -> float(node.value)math["expr"] = (node, children) -> length(children) == 1 ? children : eval(Expr(:call, children[2], children[1], children[3]))math["factor"] = (node, children) -> childrenmath["pfactor"] = (node, children) -> children[2]math["term"] = (node, children) -> length(children) == 1 ? children : eval(Expr(:call, children[2], children[1], children[3]))math["op1"] = (node, children) -> symbol(node.value)math["op2"] = (node, children) -> symbol(node.value)
result = transform(math, node) # will give the value of 20
The downside of the separation is that it adds some more complexity to the process.
If you are interested by testing your library on a concrete problem, you may want to parse comma separated value (csv) files. The bnf is in the specification RFC4180. http://tools.ietf.org/html/rfc4180
@grammar csv begin
start = data
data = record + *(crlf + record)
record = field + *(comma + field)
field = escaped_field | unescaped_field
escaped_field = dquote + *(textdata | comma | cr | lf | dqoute2) + dquote
unescaped_field = textdata
textdata = r"[ !#$%&'()*+\-./0-~]+"
cr = '\r'
lf = '\n'
crlf = cr + lf
dquote = '"'
dqoute2 = "\"\""
comma = ','
end
tr["crlf"] = (node, children) -> nothing
tr["comma"] = (node, children) -> nothing
tr["escaped_field"] = (node, children) -> node.children[2].value
tr["unescaped_field"] = (node, children) -> node.children[1].value
tr["field"] = (node, children) -> children
tr["record"] = (node, children) -> unroll(children)
tr["data"] = (node, children) -> unroll(children)
tr["textdata"] = (node, children) -> node.value
parse_data = """1,2,3\r\nthis is,a test,of csv\r\n"these","are","quotes ("")""""
(node, pos, error) = parse(csv, parse_data)
result = transform(tr, node)
{{"1","2","3"},{"this is","a test","of csv"},{"these","are","quotes (\"\")"}}Finally, one thing that I would like to change in the near future is to have transforms look something like:
html(node, children, :bold_open) = "<b>"
html(node, children, :bold_close) = "</b>"
html(node, children, :text) = node.value
html(node, children, :bold_text) = join(children)
result = transform(html, node)
If Julia gets dispatch on value, then this would be trivial to write. One possible workaround is to create a type per rule in the grammar. Then the functions can be written to dispatch on the type associated with the given rule.
@transform <name> begin
<label> = <action>
end
@grammar nodetest begin
start = +node_def
node_def = node_label + node_name + lbrace + data + rbrace
node_name = string_value + space
data = *(line + semicolon)
line = string_value + space
string_value = r"[_a-zA-Z][_a-zA-Z0-9]*"
lbrace = "{" + space
rbrace = "}" + space
semicolon = ";" + space
node_label = "node" + space
space = r"[ \t\n]*"
end
type MyNode
name
values
function MyNode(name, values)
new(name, values)
end
end
@transform tograph begin
# ignore these
lbrace = nothing
rbrase = nothing
semicolon = nothing
node_label = nothing
space = nothing
# special action so we don't have to define every label
default = children
string_value = node.value
value = node.value
line = children
data = MyNode("", children)
node_def = begin
local name = children[1]
local cnode = children[2]
cnode.name = name
return cnode
end
end
(ast, pos, error) = parse(nodetest, data)
result = apply(tograph, ast)
println(result) # {MyNode("foo",{"a","b"}),MyNode("bar",{"c","d"})}