this grammar is failing and I don't understand why

105 views
Skip to first unread message

Brannon King

unread,
Sep 17, 2013, 11:53:37 AM9/17/13
to eto-...@googlegroups.com
I have the code below for parsing a certain type of script. The code seems to error out with a large number of ambiguities. I expected that those would be eliminated through some kind of look-ahead. Can you see any changes I should make to successfully parse my example script? This is the error:

Index=4, Context="set >>>[pressure]"
Expected:
real action pressure
real action velocity with space
string action engine
combined for at
action: Sequence
telemetry: Sequence
row possibilities: Sequence

Code:
 
 
using System;
using System.Collections.Generic;
using System.Linq;
using Asi.Assets.Models;
using Asi.Services;
using Asi.Units;
using Eto.Parse;
using Eto.Parse.Parsers;
namespace Asi.Assignments.UI.Procedures
{
 public class EtoProcedureGrammar: Grammar
 {
  static EtoProcedureGrammar()
  {
   DefaultSeparator = +Terminals.SingleLineWhiteSpace; // should be grammar-specific
  }
  public EtoProcedureGrammar(IEnumerable<TelemetryDefinitionModel> telemetryDefinitions, bool allowCancel) : base("procedure")
  {
   var comment = ("#" & (-Terminals.AnyChar ^ Terminals.Eol)).Named("comment").Optional();
   var startRows = new List<Parser>();
   foreach (var @group in telemetryDefinitions.GroupBy(td => td.Executor))
   {
    var telemetry = ("until" & CreateTerminals(@group)).Named("telemetry").Optional();
    var incAction = (((Parser)"inc" | "dec" | "mul") & CreateTerminals(@group.Where(g => !g.IsReadonly), "by")).Named("action");
    var setAction = ("set" & CreateTerminals(@group.Where(g => !g.IsReadonly), "at")).Named("action");
    var row = ((incAction & telemetry) | (setAction & telemetry) | telemetry) & comment;
    startRows.Add(row.Named("row " + @group.Key));
   }
   var rowPossibilities = startRows[0];
   for (int i = 1; i < startRows.Count; i++)
    rowPossibilities |= startRows[i];
   rowPossibilities.Name = "row possibilities";
   Inner = +(rowPossibilities & (Terminals.Eol | Terminals.End));
  }
  private Parser CreateTerminals(IEnumerable<TelemetryDefinitionModel> actions, string connector = null)
  {
   var um = ServiceRegistry.Get<UnitsManager>();
   var connectorTerm = connector  != null ? (Parser)connector : null;
   var smallOp = (Parser)"==" | "!=";
   var bigOp =  smallOp | "<" | "<=" | ">" | ">=";
   Parser combined = null;
   foreach (var definition in actions)
   {
    Parser metric = null;
    if (definition.Metric != PhysicalType.Unitless && um != null)
    {
     var abbrevs = um.FindList(definition.Metric).SelectMany(u => u.Abbreviations).ToList();
     if (abbrevs.Count > 0)
     {
      abbrevs.Sort();
      var metricOp = (Parser)definition.Metric.ToString();
      for (int i = 1; i < abbrevs.Count; i++)
       metricOp |= abbrevs[i];
      metric = metricOp.Named("metric");
      metric.Optional();
     }
    }
    var action = ((Parser)("[" + definition.Name + "]"));
    string name;
    if (definition.TeleType == TelemetryType.String)
    {
     var op = connector != null ? connectorTerm : smallOp;
     var tail = new StringParser{QuoteCharacters = new[]{'"'}, AllowDoubleQuote = false, AllowEscapeCharacters = false, AllowNonQuoted = true};
     action &= op & tail;
     name = "string action";
    }
    else if (definition.TeleType == TelemetryType.Real)
    {
     var op = connector != null ? connectorTerm : bigOp;
     var tail = new NumberParser {AllowDecimal = true, AllowExponent = true, AllowSign = true};
     action &= op & tail;
     name = "real action";
    }
    else if (definition.TeleType == TelemetryType.Integer)
    {
     var op = connector != null ? connectorTerm : bigOp;
     // we can't use the pipe on terminals themselves and have the transients work
     // that seems to be an issue in Irony, but it's not too much work to make another NonTerminal here and mark it transient
     Parser tail = null;
     if (definition.Enumerations != null && definition.Enumerations.Length > 0)
     {
      for (int i = 0; i < definition.Enumerations.Length; i++)
      {
       var e = (Parser)definition.Enumerations[i];
       if (i == 0) tail = e;
       else tail |= e;
      }
      name = "enum action";
     }
     else
     {
      tail = new NumberParser{AllowDecimal = false, AllowExponent = false, AllowSign = true};
      name = "int action";
     }
     action &= op & tail;
    }
    else throw new NotImplementedException("definition.Type is out of range");
    if (metric != null)
     action &= metric;
    action.Name = name + " " + definition.Name;
    if (combined == null) combined = action;
    else combined |= action;
   }
   if (combined != null)
    combined.Name = "combined for " + connector;
   return combined;
  }
 }
}


using System.Diagnostics;
using Asi.Assets.Models;
using Asi.Assignments.UI.Procedures;
using Asi.PersistenceFramework;
using Asi.Services;
using Asi.Units;
using Eto.Parse;
using NUnit.Framework;
namespace Asi.Procedures.Tests
{
 [TestFixture]
 public class TestEtoGrammar
 {
  const string _testInput = @"set [pressure] at 50% until [velocity with space] >= 50
    set [velocity with space] at 40 m/s until [distance] >= 50 m # comment at end
    # I'm a fancy comment
    set [velocity with space] at -50.1e2 until [engine] == ""off""
     
#i'm a far left comment
    set [pressure] at 20
    set [velocity with space] at 45.2 mph
     until [distance] > -80
   ";
  static readonly TelemetryDefinitionModel[] _testDefinitions = // fill POCO containers:
   {
    new TelemetryDefinitionModel{ UniqueId = "pressureID", IsReadonly = false, Metric = PhysicalType.Percent, TeleType = TelemetryType.Real, ShortName = "pressure" },
    new TelemetryDefinitionModel{ UniqueId = "velocityID", IsReadonly = false, Metric = PhysicalType.Velocity, TeleType = TelemetryType.Real, ShortName = "velocity with space" },
    new TelemetryDefinitionModel{ UniqueId = "distanceID", IsReadonly = true, Metric = PhysicalType.Distance, TeleType = TelemetryType.Integer, ShortName = "distance" },
    new TelemetryDefinitionModel{ UniqueId = "engineID", IsReadonly = false, Metric = PhysicalType.Unitless, TeleType = TelemetryType.String, ShortName = "engine" },
   };
  [TestFixtureSetUp]
  public void Setup()
  {
   var pm = new PersistenceManager();
   var um = new UnitsManager();
   um.UnitsOfMeasurePathId = "Units-of-Measure";
   ServiceRegistry.AddRange(new IService[] { pm, um });
   ServiceRegistry.Start();
  }
  [TestFixtureTearDown]
  public void TearDown()
  {
   ServiceRegistry.Stop();
  }
  private static void DisplayTree(Match node, int level = 0)
  {
   for (int i = 0; i < level; i++)
    Trace.Write("  ");
   Trace.WriteLine(node);
   foreach(var match in node.Matches)
    DisplayTree(match, level + 1);
  }
  [Test]
  public void VerifyNoErrors()
  {
   var parser = new EtoProcedureGrammar(_testDefinitions, false);
   parser.Initialize();
   //Assert.IsTrue(string.IsNullOrEmpty(parser.GetErrorMessage())); // fails
   var tree = parser.Match(_testInput);
   Assert.IsTrue(tree.Success); // fails here
   DisplayTree(tree);
  }
 }
}

Curtis Wensley

unread,
Sep 17, 2013, 12:04:27 PM9/17/13
to eto-...@googlegroups.com
Do you have the code for TelemetryDefinitionModel, ServiceRegistry, etc?  it seems to be missing to be able to run this code..

One note that could help debug is to turn on errors for all parsers.. you can do this on your grammar after defining it by calling:

myGrammar.SetError<Parser>(true);

This will give you more than just the named parsers as errors, and help show what exactly the parser is choking on.

Cheers!
Curtis.

Brannon King

unread,
Sep 17, 2013, 12:05:43 PM9/17/13
to eto-...@googlegroups.com
I found the first issue: "50%" instead of "50 %". Can I make the leading white space separator optional for my metric terminal?

Curtis Wensley

unread,
Sep 17, 2013, 12:11:14 PM9/17/13
to eto-...@googlegroups.com
Yeah, just do this:

    if (metric != null)
        action &= -Terminals.WhiteSpace & metric;

instead of:

    if (metric != null)
        action &= metric;

Hope this helps!
Curtis.

Brannon King

unread,
Sep 17, 2013, 12:21:38 PM9/17/13
to eto-...@googlegroups.com
I found some more errors:
 
var metricOp = (Parser)definition.Metric.ToString();
for (int i = 0; i < abbrevs.Count; i++)
 metricOp |= abbrevs[i];
metric = metricOp.Optional().Named("metric");
 
I didn't realize the "Optional()" returned a new instance. I also had the wrong index on the abbrevs iterator.
 
How does this not automatically insert the default separator?

Brannon King

unread,
Sep 17, 2013, 12:27:24 PM9/17/13
to eto-...@googlegroups.com
>myGrammar.SetError<Parser>(true);
 
The data type to pass into SetError is not at all obvious. Does the increased error information affect performance at all in the no-error situation? If not, let's have it always on and ditch the methods to turn on additional error reporting. This is true of the AddError flag as well. When would I use that and why?

Curtis Wensley

unread,
Sep 17, 2013, 12:27:50 PM9/17/13
to eto-...@googlegroups.com
Ah you're right, I didn't see that you have a default separator set..

You can change it for any sequence by using the SeparatedBy() fluent api like so:

action = (action & metric).Separate().SeparatedBy(-Terminals.WhiteSpace);

Note, I use the 'Separate()' function since you do not want SeparatedBy() to change the existing action.

Brannon King

unread,
Sep 17, 2013, 12:37:08 PM9/17/13
to eto-...@googlegroups.com
The next issue I'm having is that the ">" is parsed in favor of ">=". I need the bigOp to do one character of lookahead on that. How do I force that?

Curtis Wensley

unread,
Sep 17, 2013, 12:37:23 PM9/17/13
to eto-...@googlegroups.com
You are right, it's not obvious.  The type you pass is the type of parser to set errors for..  e.g. you can set that you want errors generated for all number parsers like:  grammar.SetError<NumberParser>(true);

This is just a helper to set the AddError of all child parsers.

There is a performance hit regardless of when the input follows your grammar, since there will be alternates and optional parsers that fail and add to the error list even when it eventually succeeds with a different alternate.  This is because it does not know that the other alternates will be successful or not.

This is a performance hit if you're adding an error to the list for (potentially) every character.

I use the SetError mainly for debugging.. typically once your grammar is fully baked, the named error sections would typically be 'enough' description of what is wrong with the input.

Hope this helps!
Curtis.

Curtis Wensley

unread,
Sep 17, 2013, 12:42:17 PM9/17/13
to eto-...@googlegroups.com
You can use the LookAheadParser to do that.. 

Or, just re-arrange the operators in your alternate to make ">=" come before ">".

Brannon King

unread,
Sep 17, 2013, 2:03:37 PM9/17/13
to eto-...@googlegroups.com
I was able to get my test to parse successfully. One thing that was tricky: I had to include the leading white space in the optional sections or I would get ambiguities. And do I need to specifically call out Terminals.End? This one seems to work:
 
using System;
using System.Collections.Generic;
using System.Linq;
using Asi.Assets.Models;
using Asi.Services;
using Asi.Units;
using Eto.Parse;
using Eto.Parse.Parsers;
namespace Asi.Assignments.UI.Procedures
{
 public class EtoProcedureGrammar: Grammar
 {
  public EtoProcedureGrammar(IEnumerable<TelemetryDefinitionModel> telemetryDefinitions, bool allowCancel) : base("procedure")
  {
   var comment = (WS0 & "#" & (-Terminals.AnyChar ^ Terminals.Eol)).Named("comment").Optional();
   var startRows = new List<Parser>();
   foreach (var @group in telemetryDefinitions.GroupBy(td => td.Executor))
   {
    var telemetry = ("until" & WS1 & CreateTerminals(@group)).Named("telemetry");
    var incAction = (((Parser)"inc" | "dec" | "mul") & WS1 & CreateTerminals(@group.Where(g => !g.IsReadonly), "by")).Named("action");
    var setAction = ("set" & WS1 & CreateTerminals(@group.Where(g => !g.IsReadonly), "at")).Named("action");
    var row = WS0 & ((incAction & (WS1 & telemetry).Optional()) | (setAction & (WS1 & telemetry).Optional()) | telemetry.Optional()) & comment;
    startRows.Add(row.Named("row " + @group.Key));
   }
   var rowPossibilities = startRows[0];
   for (int i = 1; i < startRows.Count; i++)
    rowPossibilities |= startRows[i];
   rowPossibilities.Name = "row possibilities";
   Inner = -(rowPossibilities & (Terminals.Eol | Terminals.End));
  }
  private static readonly Parser WS1 = +Terminals.SingleLineWhiteSpace;
  private static readonly Parser WS0 = -Terminals.SingleLineWhiteSpace;


  private Parser CreateTerminals(IEnumerable<TelemetryDefinitionModel> actions, string connector = null)
  {
   var um = ServiceRegistry.Get<UnitsManager>();
   var connectorTerm = connector  != null ? (Parser)connector : null;
   var smallOp = (Parser)"==" | "!=";
   var bigOp = smallOp | ">=" | "<=" | ">" | "<"; // keep single character deals on the right
   Parser combined = null;
   foreach (var definition in actions)
   {
    Parser metric = null;
    if (definition.Metric != PhysicalType.Unitless && um != null)
    {
     var abbrevs = um.FindList(definition.Metric).SelectMany(u => u.Abbreviations).ToList();
     if (abbrevs.Count > 0)
     {
      abbrevs.Sort();
      var metricOp = (Parser)definition.Metric.ToString();
      for (int i = 0; i < abbrevs.Count; i++)
       metricOp |= abbrevs[i];
      metric = metricOp.Named("metric");
     }
    }
    var action = ((Parser)("[" + definition.Name + "]"));
    string name;
    if (definition.TeleType == TelemetryType.String)
    {
     var op = connector != null ? connectorTerm : smallOp;
     var tail = new StringParser{QuoteCharacters = new[]{'"'}, AllowDoubleQuote = false, AllowEscapeCharacters = false, AllowNonQuoted = true};
     action &= WS1 & op & WS1 & tail;

     name = "string action";
    }
    else if (definition.TeleType == TelemetryType.Real)
    {
     var op = connector != null ? connectorTerm : bigOp;
     var tail = new NumberParser {AllowDecimal = true, AllowExponent = true, AllowSign = true};
     action &= WS1 & op & WS1 & tail;

     name = "real action";
    }
    else if (definition.TeleType == TelemetryType.Integer)
    {
     var op = connector != null ? connectorTerm : bigOp;
     Parser tail = null;
     if (definition.Enumerations != null && definition.Enumerations.Length > 0)
     {
      for (int i = 0; i < definition.Enumerations.Length; i++)
      {
       var e = (Parser)definition.Enumerations[i];
       if (i == 0) tail = e;
       else tail |= e;
      }
      name = "enum action";
     }
     else
     {
      tail = new NumberParser{AllowDecimal = false, AllowExponent = false, AllowSign = true};
      name = "int action";
     }
     action &= WS1 & op & WS1 & tail;

    }
    else throw new NotImplementedException("definition.Type is out of range");
    if (metric != null)
     action &= (WS0 & metric).Optional();

Curtis Wensley

unread,
Sep 17, 2013, 3:02:21 PM9/17/13
to eto-...@googlegroups.com
Great!  I'm glad it's working for you.

You don't need to specifically call out Terminals.End.  There's a Grammar.AllowPartialMatch (which by default is false), which ensures that your grammar matches the entire input string.

You do have to include any leading/trailing whitespace (Grammar.DefaultSeparator doesn't cover this). Eto.Parse is fast, but it comes at a bit of a cost: you must be very explicit.

I hope you are enjoying Eto.Parse.. It's really new, though a lot of the code has been sitting around for a very long time - so it's nice to see people using it. (;

Cheers,
Curtis.
Reply all
Reply to author
Forward
0 new messages