Metamath Zero

1,297 views
Skip to first unread message

Mario Carneiro

unread,
Feb 20, 2019, 6:32:56 AM2/20/19
to metamath
I'm experimenting with a new language, Metamath Zero, an amalgamation of Metamath, Ghilbert and Lean syntax. The goal is to balance human readability and ease of writing a verifier, while also making some attempts to patch the bugs in metamath soundness (not that it's wrong, but there is a lot of soundness justification currently happening outside the metamath verifier kernel). Without giving too much explanation of what the syntax means (it's all fictional syntax at the moment, there is no parser yet), I would like to use some of you as guinea pigs for the "readability" aspect. How obvious is it what things mean? I will put some more explanatory comments at the end.

  nonempty sort wff;

  var ph ps ch: wff*;

  term wi (ph ps: wff): wff; infixr wi: -> 25;

  term wn (ph: wff): wff; prefix wn: ~ 40;

  axiom ax-1: << ph -> ps -> ph >>

    (wi ph (wi ps ph));

  axiom ax-2: << (ph -> ps -> ch) -> (ph -> ps) -> ph -> ch >>

    (wi (wi ph (wi ps ch)) (wi (wi ph ps) (wi ph ch)));

  axiom ax-3: << (~ph -> ~ps) -> ps -> ph >>

    (wi (wi (wn ph) (wn ps)) (wi ps ph));

  axiom ax-mp:

    << ph >> ph ->

    << ph -> ps >> (wi ph ps) ->

    << ps >> ps;


  theorem a1i: << ph >> ph -> << ps -> ph >> (wi ps ph);


  def wb (ph ps: wff): wff :=

    << ~((ph -> ps) -> ~(ps -> ph)) >>

    (wn (wi (wi ph ps) (wn (wi ps ph)))) {

    infix wb: <-> 20;


    theorem bi1: << (ph <-> ps) -> ph -> ps >>

      (wi (wb ph ps) (wi ph ps));

    theorem bi2: << (ph <-> ps) -> ps -> ph >>

      (wi (wb ph ps) (wi ps ph));

    theorem bi3: << (ph -> ps) -> (ps -> ph) -> (ph <-> ps) >>

      (wi (wi ph ps) (wi (wi ps ph) (wb ph ps)));

  }


  def wa (ph ps: wff): wff := << ~(ph -> ~ps) >> (wn (wi ph (wn ps))) {

    infix wa: /\ 20;

    theorem df-an: << (ph /\ ps) <-> ~(ph -> ~ps) >>

      (wi (wa ph ps) (wn (wi ph (wn ps))));

  }


  def wtru (bound p: wff): wff := << p <-> p >> (wb p p) {

    notation wtru := << T. >>;

    theorem df-tru: << T. <-> (ph <-> ph) >> (wb wtru (wb ph ph));

  }


  pure nonempty sort set;

  bound x y z w: set;


  term wal (x: set) (ph: wff x): wff; prefix wal: A. 30;


  def wex (x: set) (ph: wff x): wff :=

    << ~A. x ~ph >> (wn (wal x (wn ph))) {

    prefix wex: E. 30;


    theorem df-ex: << E. x ph <-> ~A. x ~ph >>

      (wb (wex x ph) (wn (wal x (wn ph))));

  }


  axiom ax-gen: << ph >> ph -> << A. x ph >> (wal x ph);

  axiom ax-4: << A. x (ph -> ps) -> A. x ph -> A. x ps >>

    (wi (wal x (wi ph ps)) (wi (wal x ph) (wal x ps)));

  axiom ax-5 (ph: wff): << ph -> A. x ph >> (wi ph (wal x ph));


  var a b c: set;

  term weq (a b: set): wff;

  term wel (a b: set): wff;


  {

    local infix weq: = 50;

    local infix wel: e. 40;


    def weu (x: set) (ph: wff x): wff :=

      << E. y A. x (ph <-> x = y) >>

      (wex y (wal x (wb ph (weq x y)))) {

      prefix wex: E! 30;


      theorem df-ex: << E! x ph <-> E. y A. x (ph <-> x = y) >>

        (wb (weu x ph) (wex y (wal x (wb ph (weq x y)))));

    }


    axiom ax-6: << E. x x = a >> (wex x (weq x a));

    axiom ax-7: << a = b -> a = c -> b = c >>

      (wi (weq a b) (wi (weq a c) (weq b c)));


    axiom ax-8: << a = b -> a e. c -> b e. c >>

      (wi (weq a b) (wi (wel a c) (wel b c)));

    axiom ax-9: << a = b -> c e. a -> c e. b >>

      (wi (weq a b) (wi (wel c a) (wel c b)));


    axiom ax-10: << ~A. x ph -> A. x ~ A. x ph >>

      (wi (wal x ph) (wal x (wn (wal x ph))));

    axiom ax-11: << A. x A. y ph -> A. y A. x ph >>

      (wi (wal x (wal y ph)) (wal y (wal x ph)));

    axiom ax-12 (ph: wff y): << A. y ph -> A. x (x = y -> ph) >>

      (wi (weq x y) (wi (wal y ph) (wal x (wi (weq x y) ph))));


    axiom ax-ext: << A. x (x e. a <-> x e. b) -> a = b >>

      (wi (wal x (wb (wel x a) (wel x b))) (weq a b));

    axiom ax-rep (ph: wff y z):

      << A. y E. x A. z (ph -> z = x) ->

         E. x A. z (z e. x <-> E. y (y e. a /\ ph)) >>

      (wi (wal y (wex x (wal z (wi ph (weq z x)))))

          (wex x (wal z (wb (wel z x) (wex y (wa (wel y a) ph))))));

    axiom ax-pow: << E. x A. y (A. z (z e. y -> z e. a) -> y e. x) >>

      (wex x (wal y (wi (wal z (wi (wel z y) (wel z a))) (wel y x))));

    axiom ax-un: << E. x A. y (E. z (y e. z /\ z e. a) -> y e. x) >>

      (wex x (wal y (wi (wex z (wa (wel y z) (wel z a))) (wel y x))));

    axiom ax-reg:

      << E. x x e. a -> E. x (x e. a /\ A. y (y e. x -> ~ y e. a)) >>

      (wi (wex x (wel x a))

          (wex x (wa (wel x z) (wal y (wi (wel y x) (wn (wel y a)))))));

    axiom ax-inf:

      << E. x (a e. x /\ A. y (y e. x -> E. z (y e. z /\ z e. x))) >>

      (wex x (wa (wel a x) (wal y (wi (wel y x)

        (wex z (wa (wel y z) (wel z x)))))));

    axiom ax-ac:

      << E. x A. y (y e. a -> E. z z e. y ->

         E! z (z e. y /\ E. w (w e. x /\ y e. w /\ z e. w))) >>

      (wex x (wal y (wi (wel y a) (wi (wex z (wel z y))

        (weu z (wa (wel z y) (wex w

          (wa (wa (wel w x) (wel y w)) (wel z w)))))))));

  }


  nonempty sort class;

  var A B: class*;

  term cab (x: set) (ph: wff x): class;

  term welc (a: set) (A: class): wff;

  notation cab (x: set) (ph: wff x) := << { x | ph } >>;


  {

    local infix welc: e. 50;

    axiom ax-8b: << a = b -> a e. A -> b e. A >>

      (wi (weq a b) (wi (welc a A) (welc b A)));


    axiom ax-clab: << x e. {x | ph} <-> ph >>

      (wb (welc x (cab x ph)) ph);


    def wceq (A B: class): wff :=

      << A. x (x e. A <-> x e. B) >>

      (wal x (welc x A) (welc x B)) {

      infix wceq: = 50;


      theorem df-ceq: << A = B <-> A. x (x e. A <-> x e. B) >>

        (wb (wceq A B) (wal x (welc x A) (welc x B)));

    }


    def cv (a: set): class := << {x | wel x a} >>

      (cab x (wel x a)) {

      coercion cv: set -> class;


      theorem df-cv: << a e. b <-> wel a b >>

        (wb (welc a (cv b)) (wel a b));

    }

  }


  def wcel (A B: class): wff :=

    << E. x (x = A /\ welc x B) >>

    (wex x (wceq (cv x) A) (welc x B)) {

    infix wcel: e. 50;


    theorem df-cel: << A e. B <-> E. x (x = A /\ welc x B) >>

      (wb (wcel A B) (wex x (wceq (cv x) A) (welc x B)));

  }


* The "sort" keyword declares a new typecode.
* "var" is like "$f".
* "bound" is another kind of variable, that can appear in binders. (Each mm0 sort is represented by two mm sorts, one for bound variables and one for terms. Only bound variables can participate in DV conditions.)
* "term" declares a new axiomatic term constructor.
* DV conditions are implicitly declared by saying what a variable depends on. The global wff variables are declared with type " wff* " meaning that they are open terms, they are not disjoint from anything. The global set variables a,b are declared as "set" meaning that they are disjoint from all bound variables in theorems containing these variables. Bound variables are always disjoint from each other.
* "def" declares a definition. Different from both Ghilbert and Metamath, definitions here are "limited lifetime unfolded". Each def has a block after it containing one or more theorems about the definition. These theorems are proven where the definition is replaced by the definiendum, and once the block is closed these theorems are the only access to the definition. (This enables some information privacy if desired.) See wb for example, which uses characterizing theorems instead of an iff.
* The notation system turned out a bit more complicated than I'd hoped. Each "math expression" is stated twice, once in chevrons and once in s-expression notation. The intent is that the system actually uses the s-expressions to represent the axioms (so there is no parse tree funny business), and because they form part of the trust base they are displayed to the user. The notation in the chevrons is checked only in the reverse direction - the s-expression is printed and compared to the given string. (In an IDE there would be a mode where you only type the notation and the computer generates the s-expression.) There is still a danger of ambiguous parsing, but at least you can see the parse directly in the case of a mismatch, and the worst effect of a bad parse is that the theorem being proven is not what the user thought - the ambiguity can't be further exploited in the proof.

You might wonder where the proofs are. They go in a separate file, not human readable. The idea is that this file contains all the information that is necessary to appraise the content and correctness of a theorem, and the proof file is just directions to the verifier to produce a proof of the claimed theorems. In particular, if this "specification file" is inspected and correct, then there is no possible input for the proof file that makes the verifier falsely accept a bad theorem. (So no adding axioms, although new definitions are okay.)

I'm curious to hear what you all think of a project such as this. I'm especially interested in ways to simplify the grammar if we can retain readability.

Mario Carneiro

ookami

unread,
Feb 20, 2019, 8:11:20 AM2/20/19
to Metamath


Am Mittwoch, 20. Februar 2019 12:32:56 UTC+1 schrieb Mario Carneiro:
I'm experimenting with a new language, Metamath Zero, an amalgamation of Metamath, Ghilbert and Lean syntax. The goal is to balance human readability and ease of writing a verifier, while also making some attempts to patch the bugs in metamath soundness (not that it's wrong, but there is a lot of soundness justification currently happening outside the metamath verifier kernel). Without giving too much explanation of what the syntax means (it's all fictional syntax at the moment, there is no parser yet), I would like to use some of you as guinea pigs for the "readability" aspect. How obvious is it what things mean? I will put some more explanatory comments at the end.

  nonempty sort wff;

  var ph ps ch: wff*;

  term wi (ph ps: wff): wff; infixr wi: -> 25;


What does the number say? Is it a precedence weight? Is the right-associativity hinted somewhere?

 

  term wn (ph: wff): wff; prefix wn: ~ 40;

  axiom ax-1: << ph -> ps -> ph >>

    (wi ph (wi ps ph));

Is the line above an interpretation of the human readable infix, right-associative implication chain within double-angle brackets? In UPN format?

  axiom ax-2: << (ph -> ps -> ch) -> (ph -> ps) -> ph -> ch >>

    (wi (wi ph (wi ps ch)) (wi (wi ph ps) (wi ph ch)));

  axiom ax-3: << (~ph -> ~ps) -> ps -> ph >>

    (wi (wi (wn ph) (wn ps)) (wi ps ph));

  axiom ax-mp:

    << ph >> ph ->

    << ph -> ps >> (wi ph ps) ->

    << ps >> ps;


I'm at work, and have no time to go through the whole grammar file now, but I will after work.

Wolf

Sauer, Andrew Jacob

unread,
Feb 20, 2019, 11:00:26 AM2/20/19
to meta...@googlegroups.com
That looks awesome! And it's not much harder to read than Metamath, if at all.

--
You received this message because you are subscribed to the Google Groups "Metamath" group.
To unsubscribe from this group and stop receiving emails from it, send an email to metamath+u...@googlegroups.com.
To post to this group, send email to meta...@googlegroups.com.
Visit this group at https://groups.google.com/group/metamath.
For more options, visit https://groups.google.com/d/optout.

Raph Levien

unread,
Feb 20, 2019, 11:08:57 AM2/20/19
to Metamath Mailing List
I haven't dug deeply into understanding this, but it warms my heart that ideas from Ghilbert are being considered to push Metamath forward, and this direction looks fairly solid to me. I completely agree that the soundness story in current Metamath is rough, and this was one of the main motivations for Ghilbert. On the other hand, I bit off more than I could chew, and mostly never quite figured out an appropriate module system, or a way to reconcile types (of a dependent flavor) with the mostly untyped approach of Metamath. I think an intermediate approach like this could fly.

Raph
Message has been deleted

David A. Wheeler

unread,
Feb 20, 2019, 1:48:33 PM2/20/19
to metamath, metamath
On Wed, 20 Feb 2019 06:32:39 -0500, Mario Carneiro <di....@gmail.com> wrote:
> I'm experimenting with a new language, Metamath Zero, an amalgamation of
> Metamath, Ghilbert and Lean syntax.

I'm hoping it'll be designed so databases like set.mm and iset.mm can be
automatically converted into it. It looks like that's the case.

Syntax should be simple, though "simple" is in the eye of the beholder :-).

> var ph ps ch: wff*;

I don't see why you need a "*" here.

> term wi (ph ps: wff): wff; infixr wi: -> 25;

Why do you need to repeat "wi:" here?
I wonder if adding a "precedence: 25" would help, else the number is a mystery.

> axiom ax-1: << ph -> ps -> ph >>
> (wi ph (wi ps ph));

I see the s-expression below the axiom, but isn't that trivial to re-discover?
I think you should apply "Don't repeat yourself (DRY)", or a verifier will
have to recheck something that shouldn't need to be said at all.

> axiom ax-mp:
>
> << ph >> ph ->
> << ph -> ps >> (wi ph ps) ->
> << ps >> ps;

That's a lot of chevrons, and that seems much harder to follow
than metamath's current approach.
Axioms, in particular, are a big deal. It should be
"as obvious as possible" to the reader that they're correct.

I'm fine with s-expressions, but a lot of people DO NOT understand them.
If you make people input s-expressions, you might lose
some people entirely. In cases where the s-expression can be easily determined,
you shouldn't make a human create them.

You might consider using neoteric expressions instead of s-expressions.
They have the same meaning, but look more like math.
Basically "x(a b c)" means the same thing as "(x a b c)".
More details available here from Readable Lisp (my project):
https://readable.sourceforge.io/

That would convert this:
> (wn (wi (wi ph ps) (wn (wi ps ph))))
into this:
> wn(wi(wi(ph ps) wn(wi(ps ph))))


> theorem bi2: << (ph <-> ps) -> ps -> ph >>
> (wi (wb ph ps) (wi ps ph));

Again, I don't see why you need to expressly state the syntax proof.
It's noise for the human, and the computer can recalculate & check that trivially.

I see infixr. Are you planning to support arbitrary-length expressions of the
same operator, e.g., "( 1 + 2 + 3 + 4 )"?

> * The notation system turned out a bit more complicated than I'd hoped.
> Each "math expression" is stated twice, once in chevrons and once in
> s-expression notation. The intent is that the system actually uses the
> s-expressions to represent the axioms (so there is no parse tree funny
> business), and because they form part of the trust base they are displayed
> to the user. The notation in the chevrons is checked only in the reverse
> direction - the s-expression is printed and compared to the given string.
> (In an IDE there would be a mode where you only type the notation and the
> computer generates the s-expression.) There is still a danger of ambiguous
> parsing, but at least you can see the parse directly in the case of a
> mismatch, and the worst effect of a bad parse is that the theorem being
> proven is not what the user thought - the ambiguity can't be further
> exploited in the proof.

Wouldn't it be simpler to simply require that you cannot add a new
axiom or definition if it results in a syntactic ambiguity?
I *think* that would get rid of the "funny business" while eliminating duplicate
declarations. But that depends on what "funny business" you mean,
and I'm not sure I know what you're referring to.

> You might wonder where the proofs are. They go in a separate file, not
> human readable. The idea is that this file contains all the information
> that is necessary to appraise the content and correctness of a theorem, and
> the proof file is just directions to the verifier to produce a proof of the
> claimed theorems.

I think it's important that the proofs include the specific step-by-step instructions
as metamath does now - that makes it trivial to verify by humans and computers.
If it *also* has higher-level instructions (say, to try to reconstruct a proof if some
definitions have changed) that's fine.

--- David A. Wheeler

David A. Wheeler

unread,
Feb 20, 2019, 10:40:36 PM2/20/19
to metamath, metamath
On Wed, 20 Feb 2019 06:32:39 -0500, Mario Carneiro <di....@gmail.com> wrote:
> I'm experimenting with a new language, Metamath Zero, an amalgamation of
> Metamath, Ghilbert and Lean syntax. The goal is to balance human
> readability and ease of writing a verifier, while also making some attempts
> to patch the bugs in metamath soundness (not that it's wrong, but there is
> a lot of soundness justification currently happening outside the metamath
> verifier kernel).

If possible, I think it'd be good to have an additional reason to create a new language,
or at least argue why similar results can't be done with tweaks to
existing mechanisms like $j.

1. Putting more soundness justifications into the underlying verifier
is indeed a worthy goal. But most of set.mm is proofs, not definitions,
and there are very few "axioms" in the traditional sense... so it's not clear
that all the verifiers need to check that so much.
I can also imagine tweaking existing systems instead of creating a new language.

2. Making the input "more readable" is worthwhile, but I'm not sure that
by *itself* this is enough. Making it so that we can be more flexible
about parentheses would *certainly* be a nice improvement.
Supporting ph /\ ps /\ ch /\ ... would be nice as well.

Expanding on #2: If it can manage to easily support arbitrary-length arguments like
ph /\ ps /\ ch /\ th /\ ... then I can suddenly see that it's able to do something
that Metamath today can't. Is that easy? Worth it?
I don't know, but that *would* go beyond our current setup.

No matter what, I recommend that you do NOT depend
too much on precedence, even when available.
"Developer beliefs about binary operator precedence (part 1)” by Derek M. Jones
http://www.knosof.co.uk/cbook/accu06.html
"a survey of developers at the 2006 ACCU conference to determine if they correctly applied the precedence and associativity of the binary operators common to C, C++, C#, Java, Perl, and PHP. In this experiment only 66.7% of the answers were correct (standard deviation 8.8, poorest performer 45.2% correct, best performer 80.5% correct); this was not much better than random chance (50%). Even many widely-used operator pairs had relatively poor results; only 94% of the problems were correctly solved when they combined * and +, 92% when they combined * and -, and 69% when they combined / and +."
Professional mathematicians probably do get precedence correct, but where possible I'd like the results to be understood by others.

When we developed my "Readable Lisp" we supported infix but decided to NOT support precedence. The trade-offs are very different here, so I don't think that reasoning really applies here. But my point is that supporting lots of precedence levels isn't really, by itself, a big win.

> * "def" declares a definition. Different from both Ghilbert and Metamath,
> definitions here are "limited lifetime unfolded". Each def has a block
> after it containing one or more theorems about the definition. These
> theorems are proven where the definition is replaced by the definiendum,
> and once the block is closed these theorems are the only access to the
> definition. (This enables some information privacy if desired.) See wb for
> example, which uses characterizing theorems instead of an iff.

That makes sense. However, in some cases you will want a way to say
"don't use this theorem" so you can prove statements that you don't use.

But what about proving theorems that have to "reach into" multiple definitions to work?
You might need a way to express that you're intentionally breaking the rules.

> * The notation system turned out a bit more complicated than I'd hoped.
> Each "math expression" is stated twice, once in chevrons and once in
> s-expression notation.

As I noted earlier, stating things twice is undesirable,
and I don't think it's necessary in this case.

--- David A. Wheeler

Mario Carneiro

unread,
Feb 20, 2019, 10:48:01 PM2/20/19
to metamath
Thank you everyone for all the suggestions. I will see if I can respond to each of them.

On Wed, Feb 20, 2019 at 8:11 AM 'ookami' via Metamath <meta...@googlegroups.com> wrote:
  nonempty sort wff;

  var ph ps ch: wff*;

  term wi (ph ps: wff): wff; infixr wi: -> 25;


What does the number say? Is it a precedence weight? Is the right-associativity hinted somewhere?

It is indeed a precedence number. The right associativity is the "r" suffix, but this command and its cousins have been called out by several people, so I think I need to rethink it. Maybe even having precedences in a numeric order is a bad idea, although I am really wary about making the notation system any more complicated than it already is (and if I can simplify it dramatically that would be even better).

  term wn (ph: wff): wff; prefix wn: ~ 40;

  axiom ax-1: << ph -> ps -> ph >>

    (wi ph (wi ps ph));

Is the line above an interpretation of the human readable infix, right-associative implication chain within double-angle brackets? In UPN format?

What's UPN? The <<quoted>> expression is what it looks like with the notation system, and the s-expression is what the actual axiom is for the verifier.
 

On Wed, Feb 20, 2019 at 11:18 AM j4n bur53 <burs...@gmail.com> wrote:
How do you forbid quantification over classes. Does this happen
since it is a new sort (your nonempty sort class;)

Problem is, you might end up with something else than ZFC.

The "nonempty" modifier provides the ability to use bound variables of this sort (i.e. dummy variables). In metamath all sorts are nonempty in this sense. You can't declare a "bound X: class" if the sort "class" wasn't defined as nonempty.

But this might seem strange then, because I'm saying that you *can* quantify over classes. Why isn't it stronger than ZFC? In order to use this power, you also have to declare a binding syntax constructor for the sort. For example,
 

term wal (x: set) (ph: wff x): wff; term cab (x: set) (ph: wff x): class;


This declares wal and cab as binding operators (ph depends on the variable x), so there is a way to quantify over sets in the wff and class sorts. Without a similar operator for classes, you can't quantify over classes in the term language.

But why are we allowing bound wffs and classes anyway? Consider the definition of wtru:

  def wtru (bound p: wff): wff := << p <-> p >> (wb p p) {

    notation wtru := << T. >>;

    theorem df-tru: << T. <-> (ph <-> ph) >> (wb wtru (wb ph ph));

  }


The red coloration of the wff is deliberate here: p is a "bound wff" in the definition. As a syntax operator, "wtru" has 0 arguments, not 1. This is no different from a dummy bound variable in a definition like weu:

  def weu (x: set) (ph: wff x) (bound y: wff): wff :=

    << E. y A. x (ph <-> x = y) >>

    (wex y (wal x (wb ph (weq x y)))) {

    prefix wex: E! 30;


    theorem df-eu: << E! x ph <-> E. y A. x (ph <-> x = y) >>

      (wb (weu x ph) (wex y (wal x (wb ph (weq x y)))));

  }


(In the original post I didn't include the (bound y) quantifier but it's implicitly pulled in by the "bound y" top level declaration. I will need to work on the exact rules for how these variables get inferred, a process similar to the "optional $f" hypotheses in metamath axioms and theorems.) The meaning of a bound quantifier is that it doesn't appear in the syntax constructor (weu gets 2 arguments, wtru gets 0) but it does appear in the definition itself. This is a weird thing to do in a HOL context, but the idea is that if you know the sort is nonempty in the usual sense, then you can use this to choose values for the dummy variables. I haven't said how these theorems get translated to proof obligations but the actual theorem df-tru to be proven is

proof df-tru: (ps <-> ps) <-> (ph <-> ph) := ...

(the concrete syntax here is irrelevant, this is actually part of the binary companion file). Similarly for df-eu - the dummy bound variables are renamed and the fixed arguments aren't:

proof df-eu: E. z A. x (ph <-> x = z) <-> E. y A. x (ph <-> x = y) := ...

For most definitions this proof will be some variety of biid, but for dummy variable renames like this it will be a metatheorem which can be automated by a prover.

Here is a wish for a new system dubbed "Shullivan":
The ability to directly work with infix, etc.. no redundancy
 
with chevrons. I guess chevrons are only comments?

The chevrons are "checked comments", if you like. My idea for avoiding parsing is to take the s-expression, print it, and compare the result token-for-token with the input string (ignoring whitespace). There is still some potential for abuse if you write an ambiguous grammar, where the danger is that an ambiguous expression is disambiguated in the s-expression in a way other than what the reader expected, but the s-expression is at least available for reference.


On Wed, Feb 20, 2019 at 1:48 PM David A. Wheeler <dwhe...@dwheeler.com> wrote:
On Wed, 20 Feb 2019 06:32:39 -0500, Mario Carneiro <di....@gmail.com> wrote:
> I'm experimenting with a new language, Metamath Zero, an amalgamation of
> Metamath, Ghilbert and Lean syntax.

I'm hoping it'll be designed so databases like set.mm and iset.mm can be
automatically converted into it.  It looks like that's the case.

It is indeed a design goal that metamath zero files can be translated to metamath files, as well as to HOL, without much encoding overhead. This means that the primitive constructs are chosen so that they both have a reasonable interpretation as a multi-sorted first order theory, as well as having a cheap metamath interpretation (so unlike HOL proper substitution will not be a one-step operation).

Translating from metamath to mm0 is a bit trickier, because some annotation information in the mm0 file is not present in mm files. If you skip some of the foundational material (roughly the contents of this file), translating a set.mm-like file should be straightforward and automatable.
 
Syntax should be simple, though "simple" is in the eye of the beholder :-).

To clarify: syntax should be "simple" in the sense of being easy to write a small program to check it, and it should be "readable" in the sense of "look it does what I said".
 
>   var ph ps ch: wff*;

I don't see why you need a "*" here.

This declares these variables as "open wffs", meaning that by default they are assumed not to be disjoint from any bound variables in the axioms and theorems. Notice that ax-5 redeclares (ph: wff), while it would normally be imported as (ph: wff x). This means that ph does *not* depend on x, and hence there is a DV condition between ph and x.
 
>   term wi (ph ps: wff): wff; infixr wi: -> 25;

Why do you need to repeat "wi:" here?
I wonder if adding a "precedence: 25" would help, else the number is a mystery.

This is actually two commands, I probably should have put them on separate lines. You are allowed to declare a notation separately from the syntax itself, and this is used for example with wel = "e." to use it as a local notation in the block and then redeclare e. to mean welc and then wcel (which is the global notation, once the class setup is complete). Metamath does a slightly sketchy thing here, where all three of these definitions are used via wcel, where first we axiomatize wcel (cv x) (cv y), then we generalize to wcel (cv x) A and finally to wcel A B, and the equivalence of these three definitions on their common domain is implicitly assumed.
 
>   axiom ax-1: << ph -> ps -> ph >>
>     (wi ph (wi ps ph));

I see the s-expression below the axiom, but isn't that trivial to re-discover?
I think you should apply "Don't repeat yourself (DRY)", or a verifier will
have to recheck something that shouldn't need to be said at all.

Here is an important point: This language is intended to be "readable", not "writable". This means that things like boilerplate are okay as long as a mistake in the boilerplate isn't compromising to correctness. In fact, I anticipate that this file will be compiled from a slightly less verbose version (not much, because it needs to be directly inspectable) where various things can be omitted. See the next point.
 
>   axiom ax-mp:
>
>     << ph >> ph ->
>     << ph -> ps >> (wi ph ps) ->
>     << ps >> ps;

That's a lot of chevrons, and that seems much harder to follow
than metamath's current approach.
Axioms, in particular, are a big deal.  It should be
"as obvious as possible" to the reader that they're correct.

I didn't demonstrate this in the file, but the chevrons are optional. So you could just write:

axiom ax-mp: ph -> (wi ph ps) -> ps;

and it would be understood just as well. In such a small axiom as this that might be a reasonable thing to do (and in particular for single wffs the << ph >> ph business just looks silly). However the s-expressions are NOT optional. This is one of the things that would be omitted in the "less verbose" version of the file - you might just write

axiom ax-mp: << ph >> -> << ph -> ps >> -> << ps >>; 

and the compiler, which has a proper parser, would produce the s-expressions for you.

The problem with metamath's current approach is that it's subtly unsafe, as we saw recently with the example of syntax ambiguity causing proofs of false. Metamath treats statements as lists of symbols, which means that you can do some shenanigans with ambiguity if the grammar supports it. To prevent this, the verifier here works entirely with trees (encoded as s-expressions). So the worst that can happen is that a theorem doesn't say what you think, it can't be exploited in the middle of a proof.

So like I said to Jan, you should think of the chevron expressions as "checked comments"; they are there to help you understand the s-expression, but it's available if you need to double check.

I'm fine with s-expressions, but a lot of people DO NOT understand them.
If you make people input s-expressions, you might lose
some people entirely.  In cases where the s-expression can be easily determined,
you shouldn't make a human create them.

People shouldn't have to write s-expressions, although they may need to look at them as in this file. (Of course I wrote all the s-expressions in the file, perhaps if I make a parser for this file it will point out where I've got it wrong. But that's just because the tool doesn't exist yet.)
 
You might consider using neoteric expressions instead of s-expressions.
They have the same meaning, but look more like math.
Basically "x(a b c)" means the same thing as "(x a b c)".
More details available here from Readable Lisp (my project):
  https://readable.sourceforge.io/

That would convert this:
>     (wn (wi (wi ph ps) (wn (wi ps ph))))
into this:
>     wn(wi(wi(ph ps) wn(wi(ps ph))))

I don't like that spaces are significant with this layout. Also the beginning and end of an s-expression is clear because of matching parentheses, which makes all the rest of the parsing that much easier. (I'm personally also used to this layout from functional programming - this is the usual notation for the lambda calculus.)

>     theorem bi2: << (ph <-> ps) -> ps -> ph >>
>       (wi (wb ph ps) (wi ps ph));

Again, I don't see why you need to expressly state the syntax proof.
It's noise for the human, and the computer can recalculate & check that trivially.

Not if the verifier doesn't have a parser! I'm worried that if I add a parser this will be a large fraction of the code, and it will be a large moving part that could contribute to uncertainty that the program has no bugs. (There will be a parser of course, for the code syntax itself, but it will be statically known so you can write it in a few functions.)

I see infixr.  Are you planning to support arbitrary-length expressions of the
same operator, e.g., "( 1 + 2 + 3 + 4 )"?

That would be handled by declaring "infixl cadd: + 60;" to say that + is left associative with precedence 60. Then "1 + 2 + 3 + 4" would be the printed form of (cadd (cadd (cadd c1 c2) c3) c4).
 
> * The notation system turned out a bit more complicated than I'd hoped.
> Each "math expression" is stated twice, once in chevrons and once in
> s-expression notation. The intent is that the system actually uses the
> s-expressions to represent the axioms (so there is no parse tree funny
> business), and because they form part of the trust base they are displayed
> to the user. The notation in the chevrons is checked only in the reverse
> direction - the s-expression is printed and compared to the given string.
> (In an IDE there would be a mode where you only type the notation and the
> computer generates the s-expression.) There is still a danger of ambiguous
> parsing, but at least you can see the parse directly in the case of a
> mismatch, and the worst effect of a bad parse is that the theorem being
> proven is not what the user thought - the ambiguity can't be further
> exploited in the proof.

Wouldn't it be simpler to simply require that you cannot add a new
axiom or definition if it results in a syntactic ambiguity?
I *think* that would get rid of the "funny business" while eliminating duplicate
declarations.  But that depends on what "funny business" you mean,
and I'm not sure I know what you're referring to.

How will you know if new notation (not axiom/definition) results in a syntax ambiguity? The general problem is undecidable. What I want to avoid is a gnarly grammar with a complicated ambiguity being exploited somewhere deep in the proof (which isn't even being displayed) and causing something to be provable that shouldn't be.
 
> You might wonder where the proofs are. They go in a separate file, not
> human readable. The idea is that this file contains all the information
> that is necessary to appraise the content and correctness of a theorem, and
> the proof file is just directions to the verifier to produce a proof of the
> claimed theorems.

I think it's important that the proofs include the specific step-by-step instructions
as metamath does now - that makes it trivial to verify by humans and computers.
If it *also* has higher-level instructions (say, to try to reconstruct a proof if some
definitions have changed) that's fine.

This language is not intended as a replacement for metamath. It's not designed for showing you proofs. It is intended to convince you *that* a fact is true, not *why* a fact is true. The trustedness of the verifier is an essential component of the design, which is why it has to be so simple. My intention is to use this as the base of a bootstrap chain for more complicated compilers and verifiers, possibly including metamath itself.

There are a variety of sublanguages of mm0 that can be investigated. For example, dropping the notation system makes everything a lot simpler, but now you're staring at s-expressions. Dropping variable declarations means you have to declare them in every axiom/theorem, which is unergonomic but doesn't harm readability. Some of the sort modifiers are not strictly necessary but make translations to FOL easier. Maybe the lowest level of the bootstrap will actually be one of these simplified languages, but if I start asking people to read hexadecimal for the sake of simplifying the program then it makes it much harder to understand what was proven, even if they believe that the program checked *something*. I'm looking for a minimal point on this tradeoff curve - the program is dead simple, the axioms are easy to understand, the theorems are easy to interpret, so if you believe your computer is reliable enough to execute instructions then a theorem has to be true. It's "extreme formal verification".

[Translated from Portuguese]
On Wed, Feb 20, 2019 at 3:09 PM André L F S Bacci <andrebac...@gmail.com> wrote:
On Wed, Feb 20, 2019 at 8:32 AM Mario Carneiro <di....@gmail.com> wrote:
I'm experimenting with a new language, Metamath Zero, an amalgamation of Metamath, Ghilbert and Lean syntax. The goal is to balance human readability and ease of writing a verifier, while also making some attempts to patch the bugs in metamath soundness (not that it's wrong, but there is a lot of soundness justification currently happening outside the metamath verifier kernel). Without giving too much explanation of what the syntax means (it's all fictional syntax at the moment, there is no parser yet), I would like to use some of you as guinea pigs for the "readability" aspect. How obvious is it what things mean? I will put some more explanatory comments at the end.

  nonempty sort wff;

Easy to write a verifier: Stop right here.

A large number of keywords will make any verifier complex. Think about how to be able to declare syntax, in the language itself. This will leave the compiler "soft", but will also allow expansions not to make the collection invalid. Look at proofs in HOL that are no longer possible to run today.

In a language I started, almost all statements started with "def" or "var". This allowed for having zillions of keywords within def, but which were not reserved words in the language itself.

In basic definitions, verbose:


typecode wff ::nonempty;

I agree that lots of keywords makes things harder. But actually there doesn't need to be a notion of "keyword" here, in the sense that just because a command starts with a word like "nonempty" or "term" doesn't mean that these words are unavailable as names of variables or names of axioms/theorems. Each command begins and ends in a definite way, so you should never be guessing about the token class of the next word. That said, it's true that prefix modifiers like "pure nonempty sort set;" can be problematic. In this case I think it's okay since there are a finite number of ways to start a command, so for example the first word of the file can only be "pure", "nonempty", "sort", "var", "def", "{", and so on without any "open classes" like identifiers. After reading "var" or "def" or "sort" we expect an identifier, or in the case of "var" we expect an identifier or ":". It's all expressible as a finite state machine on the token classes. Sentinel tokens like ":" and ";" and "<<" can't be legal identifiers, but beyond that there shouldn't be much constraint necessary. For safety's sake I think identifiers will be limited to roughly alphanumeric sequences anyway, but command-starting "keywords" will be legal identifiers as well.

  var ph ps ch: wff*;


The : is also a keyword from what I understand. Style suggestion. Use var a b c :wff instead of var a b c: wff. That is, leave the : more attached to the type than to one of the variables.

By the way, only one type per variable? Think about adding something like partial types, or to use concepts of statically typed languages, declaring interfaces (::)?


The ":" is indeed a keyword here. I'm not sure I like to align the colon like that, but in Lean syntax (which is the inspiration for most of the concrete syntax here) the colon has spaces on both sides, as in:

var ph ps ch : wff*;

If I separate out all the tokens, it should rightly be

var ph ps ch : wff * ;

(oh, I guess "*" is also a keyword here, that is, identifiers can't contain a * in them) but I think this is a bit ugly, and metamath has often been criticized for its gratuitive spacing. Assuming there is a lexer that can handle lack of spaces, it shouldn't matter how it's aligned. There won't be a "multiple var" statement where you can have different types anyway, you just use "var" multiple times.

  term wi (ph ps: wff): wff; infixr wi: -> 25;

  term wn (ph: wff): wff; prefix wn: ~ 40;


Here I suggest being as verbose as possible, more syntactic than logical.

syntax wi ( ph :wff "->" ps :wff) :wff infix.right 25;

Alternatively:

syntax wi :wff ( ph :wff "->" ps :wff) infix.right 25;

That is, types closest to the name, definition inside the parentheses, operator defined within quotes (escaped by \)

Operators like this, in quotes, allow a bit of language abuse, but also allow you to create things like writing Latex-style operations: \dot

The advantage of having a separate command for prefix/infix/suffix precedence is to tweak the priorities in advance. But it is asking for confusion. I will not suggest taking the command, but at least have the option to put priority in the syntax command itself to encourage it to stay in one place.

For me part of the appeal of the order

term wi (ph ps: wff): wff;

or equivalently

term wi: wff -> wff -> wff;
 
is the direct connection to typing terms in simple type theory. (It's not real STT, so (wff -> wff) -> wff or other things are not allowed. But in as much as I can suggest the connection, I think it is helpful to organize things so the analogy makes sense.) With binders it gets harder to do this so directly, and I think this might be unavoidable given the way metamath handles binding variables and dummy variables, but I try to keep the notation suggestive.

As for putting the syntax in the term, I originally played with the following syntax:

@[infixr -> 25] term wi: wff -> wff -> wff;

The @[...] notation is used for annotations in lean, and it seemed like a nice use for them. But when I wanted to have separate notations I needed a standalone command, and having two syntactic constructs for the same thing seemed like a bad idea. Lean also has a way to apply annotations in a standalone context:

attribute [infixr -> 25] wi; local attribute [infixr -> 25] wi;

But this seems like a lot of extra infrastructure for little readability gain. (I think term -> syntax and infixr -> infix.right are good ideas, though, given that someone already missed the "r" when asking about why this means -> is right assoc.) I grant that having two commands and repeating the name of the term is boilerplate, but I'm not sure that the alternative is  that much more readable. If every term had a mandatory notation I could see that adding them together would be a good idea, but I don't think this is necessary. (I am still not sure what the best approach is here.)

For parsing the arrow properly, my original intent was to use whitespace to separate the token, whatever it is. There should be some lattitude in what is allowed at this spot, in order to allow as many kinds of math notation as possible. Alternatively it could be surrounded by chevrons to reinforce the idea that this is intended for use in the math strings (but it would still need to have spaces around it so that the arrow doesn't lexically collide with the chevrons).

  axiom ax-1: << ph -> ps -> ph >>

    (wi ph (wi ps ph));


Here a weird suggestion, but... Axioms -> syntax. Then use the command, much like...

syntax ax-1 : "ph -> ps -> ph" :: (wi ph (wi ps ph)) ;

Here : and :: are reused, with space to the right (without a type), only to denote clear points for the parser.

Or if you find overloading the syntax ugly (and it is), the suggestion above about "def":

def.syntax wi :wff ( ph :wff "->" ps :wff) infix.right 25;
def.axiom ax-1 : "ph -> ps -> ph" :: (wi ph (wi ps ph)) ;

I don't think this is a good idea. syntax and axiom have different interpretations and different parsing, so there isn't much gain for the parser to use the same word for both. And making them semi-namespaced under "def" doesn't make anything easier - we still have to handle them as different constructs.

Your notation for math string + s-expression is a bit nicer to read than mine, I think, although I hope there won't be too much string escaping going on. (It's supposed to be directly readable so there's no point in escaping things.) Also the string tends to suggest that whitespace is significant and newlines are not allowed, at least to me. (I think set.mm uses " for function image, but we could use '' (two single quotes) instead. Also <" "> (my fault) might be replaceable with <' '> if it's not too confusing. With those replacements, I think you can completely get away with quoting math expressions, which would make everything a lot easier.)

  axiom ax-2: << (ph -> ps -> ch) -> (ph -> ps) -> ph -> ch >>

    (wi (wi ph (wi ps ch)) (wi (wi ph ps) (wi ph ch)));

  axiom ax-3: << (~ph -> ~ps) -> ps -> ph >>

    (wi (wi (wn ph) (wn ps)) (wi ps ph));

  axiom ax-mp:

    << ph >> ph ->

    << ph -> ps >> (wi ph ps) ->

    << ps >> ps;


  theorem a1i: << ph >> ph -> << ps -> ph >> (wi ps ph);


def.theorem a1i: << ph >> ph -> << ps -> ph >> (wi ps ph);

That is, def is effectively a reserved word, and therefore can be specialized via .special. There will be def without specialization for everything else.

I think if we are looking at unifying different keywords, the most similar are "axiom" and "theorem". In metamath the only difference is the presence of a proof in one, and in this case there isn't even that since the proof is elsewhere. But I don't have any good names for super-commands.

"def" is actually a bit unusual in this language, because it ends with a block instead of a semicolon like the others, and we have this required "definition block" thing that prepares the definition for use. This is one of the things I intend to have in the "less verbose" language - you are allowed to omit definition blocks by ending with ; instead, and the compiler will prove the equational theorem for the definition (which requires it to know a bit about equality) and insert the definitional theorem in a block.
 
Other than that, general suggestions.

- The notion of local blocks, delimited by {} is good, but will probably clash with other uses. When building the parser pay attention to this. Starting everything by "def" helps mitigate this problem.

Lean uses "section ... end" in place of "{ ... }" for this purpose, which is nice and wordy and makes it clear that it's another command keyword. I am pretty sure that it won't cause any clashes, since they can only appear when a new command is starting, but I will keep an eye out.

- The notion of separate and "solid" files is very important. I strongly suggest putting this in the language from the first version. A simple:

file "path/name";

in the solid file indicates where or to where the next lines go in case of split, and those lines, and a "master" file, containing only:

file.include "path/name";

is produced when split, allowing reconstruction.

Indeed, dealing with modularization seems pretty important and apparently caused problems for Raph, so perhaps I should give it more thought.

In a specification file such as this, we don't have to worry about proof dependencies, but we do have to be able to import axiom systems and syntax from another file. But now I'm worried about the possible security vulnurabilities this causes. A specification file is supposed to be self contained in the sense that if you know the language then it says what it means. If a specification file imports another, then the meaning of the first file depends on the meaning of that other file as well. It's even worse if the path is ambiguous in some way, and we don't see exactly what it's referring to. If the includes are relative to a path provided at the command line, then it's not even definite what file is being referred to.

One way to make sure the import is right is to reproduce the axioms and theorems that are expected to be imported. This way the verifier can double check that all the files are stitched together correctly. But that may well be the whole file or nearly so (theorems don't need to be imported, but axioms and definitions do). And that's a hell of a lot of boilerplate, and also obscures the actual content of the file if it starts with a lengthy recap section.

The specification file / proof file dichotomy is supposed to represent the distinction between *what* is being proven and *how* it should be proven. The proof file is a fine place for putting in instructions for how to find other files, but the specification files should be more focused on content and cross checks.

I will return to this topic in a bit with a more concrete syntax proposal.

- Instead of using << and >>, which certainly already has use, I suggest separating parts of syntax by <? and ?>, common from XML.

That... doesn't look as bad as I thought it would at first.

- In syntax, even within <? and ?>, everything that is expected to be fixed must be enclosed in quotation marks, and anything outside the quotation marks has some programmatic meaning.

def.syntax wi :wff <? "(" ph :wff "->" ps :wff ")" ?> infix.right 25 ;
def.axiom ax-1 <? ph -> ps -> ph ?> (wi ph (wi ps ph)) ;

Which is more verbose than before, but leaves the parser simultaneously simpler and more flexible. In the first case the compiler knows that it is defining a thing, given by the quotation marks. In the second case everything is expected, and should come from the construction by s-expressions.

I agree that uniformity of purpose like this is a good thing. But isn't declaring notation like that, with concrete "(" ")", just going to lead to another metamath parenthesis-fest?

Looking at this now...  (wi ph (wi ps ph)) will create a lot of "(" and ")" that does not exist in <? ph -> ps -> ph ?>. You realize that, I guess.

Yes, that's pretty normal for s-expressions. Because each syntax constructor takes a known number of arguments, it is possible to omit all the parentheses and use prefix polish notation (or RPN like metamath does), but this is absolutely unreadable and extremely difficult to get right by hand.

Mario

Mario Carneiro

unread,
Feb 20, 2019, 11:16:49 PM2/20/19
to metamath
There is a risk in responding to this that I've already addressed your point in the previous email, but just in case:

On Wed, Feb 20, 2019 at 10:40 PM David A. Wheeler <dwhe...@dwheeler.com> wrote:
On Wed, 20 Feb 2019 06:32:39 -0500, Mario Carneiro <di....@gmail.com> wrote:
> I'm experimenting with a new language, Metamath Zero, an amalgamation of
> Metamath, Ghilbert and Lean syntax. The goal is to balance human
> readability and ease of writing a verifier, while also making some attempts
> to patch the bugs in metamath soundness (not that it's wrong, but there is
> a lot of soundness justification currently happening outside the metamath
> verifier kernel).

If possible, I think it'd be good to have an additional reason to create a new language,
or at least argue why similar results can't be done with tweaks to
existing mechanisms like $j.

1. Putting more soundness justifications into the underlying verifier
is indeed a worthy goal.  But most of set.mm is proofs, not definitions,
and there are very few "axioms" in the traditional sense... so it's not clear
that all the verifiers need to check that so much.
I can also imagine tweaking existing systems instead of creating a new language.

You will notice that this language is not proof-focused at all. It's actually just a specification language, so it's all axioms, definitions, and theorem statements.

Additionally, I want to disallow many things that are legal (but weird) metamath, and have it properly specified. The original goal for this language had nothing to do with metamath at all; I just wanted a reliable and simple specification language, but not many systems come close to metamath in terms of simplicity. This language had to make some things more complicated than metamath, but I believe that it's only where necessary to increase trustworthiness.

Yes, I'm sure with a sufficiently complicated layer on top of metamath you can achieve something similar. But there is a lot of stuff in a metamath file that isn't necessary for understanding what is being proven, and there is a lot of stuff that is really important for correctness but doesn't look like it. The foundation needs to change, in a way that retains compatibility. It's not just a makeover.
 
2. Making the input "more readable" is worthwhile, but I'm not sure that
by *itself* this is enough.  Making it so that we can be more flexible
about parentheses would *certainly* be a nice improvement.
Supporting ph /\ ps /\ ch /\  ... would be nice as well.

Expanding on #2: If it can manage to easily support arbitrary-length arguments like
ph /\ ps /\ ch /\ th /\ ... then I can suddenly see that it's able to do something
that Metamath today can't.  Is that easy? Worth it?
I don't know, but that *would* go beyond our current setup.

It is actually possible to support precedence and parsing in all its glory in today's metamath. It's just a huge pain, because you have to encode each precedence level as a typecode, and parentheses will be an explicit operation in the logic (the identity function, of course, but you will have to apply a theorem for that).
 
No matter what, I recommend that you do NOT depend
too much on precedence, even when available.
"Developer beliefs about binary operator precedence (part 1)” by Derek M. Jones
http://www.knosof.co.uk/cbook/accu06.html
"a survey of developers at the 2006 ACCU conference to determine if they correctly applied the precedence and associativity of the binary operators common to C, C++, C#, Java, Perl, and PHP. In this experiment only 66.7% of the answers were correct (standard deviation 8.8, poorest performer 45.2% correct, best performer 80.5% correct); this was not much better than random chance (50%). Even many widely-used operator pairs had relatively poor results; only 94% of the problems were correctly solved when they combined * and +, 92% when they combined * and -, and 69% when they combined / and +."
Professional mathematicians probably do get precedence correct, but where possible I'd like the results to be understood by others.

When we developed my "Readable Lisp" we supported infix but decided to NOT support precedence.  The trade-offs are very different here, so I don't think that reasoning really applies here.  But my point is that supporting lots of precedence levels isn't really, by itself, a big win.

I completely agree with this assessment, although I'm not sure how to act on this information. Programming languages use far more complicated precedence structures than mathematicians are used to, so I doubt your assertion about them is correct. Everyone knows that + is lower precedence than * , but not everyone knows whether /\ or \/ is more tightly binding, or how they interact with -> and <-> . (There *are* standard precedence orders here, but they aren't well known.)

My friend Simon Hudon suggests that it is better to use a partial order for precedence. That is, you specify "* binds tighter than +" and so on, rather than giving numbers (which implicitly defines a total order). If you have two operators that have incomparable precedence, then you have to use parentheses. This allows you to be much more vague about legal precedence patterns, and prevents people from taking too much advantage of precedence ordering to create confusing statements.

That said, I think that having no precedence ever has its own issues, as anyone who has used metamath can attest. Especially as it pertains to decreased readability, I think this is something that should be addressed, although I am not at all satisfied with my own solution.
 
> * "def" declares a definition. Different from both Ghilbert and Metamath,
> definitions here are "limited lifetime unfolded". Each def has a block
> after it containing one or more theorems about the definition. These
> theorems are proven where the definition is replaced by the definiendum,
> and once the block is closed these theorems are the only access to the
> definition. (This enables some information privacy if desired.) See wb for
> example, which uses characterizing theorems instead of an iff.

That makes sense.  However, in some cases you will want a way to say
"don't use this theorem" so you can prove statements that you don't use.

But what about proving theorems that have to "reach into" multiple definitions to work?
You might need a way to express that you're intentionally breaking the rules.

It's not a perfect solution to the information hiding problem, and I'm not sure I want it to be. The main reason this feature exists is because I wanted a way to insert justification theorems without baking in the notion of equality. There are possible extensions to this language to support this sort of thing, but it's not essential for expressivity (because you can always just prove an equality theorem) and it complicates the verifier, so I'll pass on it for this version.
 
> * The notation system turned out a bit more complicated than I'd hoped.
> Each "math expression" is stated twice, once in chevrons and once in
> s-expression notation.

As I noted earlier, stating things twice is undesirable,
and I don't think it's necessary in this case.

If you have a solution to:

* Simple static parser
* Readable math
* Guaranteed unambiguity
* Minimal parsing "surprises"

then I'm all ears.

David A. Wheeler

unread,
Feb 20, 2019, 11:52:37 PM2/20/19
to metamath
On Wed, 20 Feb 2019 22:47:43 -0500, Mario Carneiro <di....@gmail.com> wrote:

> The chevrons are "checked comments", if you like. My idea for avoiding
> parsing is to take the s-expression, print it, and compare the result
> token-for-token with the input string (ignoring whitespace). There is still
> some potential for abuse if you write an ambiguous grammar, where the
> danger is that an ambiguous expression is disambiguated in the s-expression
> in a way other than what the reader expected, but the s-expression is at
> least available for reference.

I think people will ignore the s-expressions in that case, so they will be more an
opportunity for mischief than help. Best to avoid them.


> Translating from metamath to mm0 is a bit trickier, because some annotation
> information in the mm0 file is not present in mm files. If you skip some of
> the foundational material (roughly the contents of this file), translating
> a set.mm-like file should be straightforward and automatable.

It's always possible to slip in some info in comments if all are agreeable.
It's how $j statements work now.

> I didn't demonstrate this in the file, but the chevrons are optional. So
> you could just write:
>
> axiom ax-mp: ph -> (wi ph ps) -> ps;

That switch back-and-forth is rather confusing for readability.

> and it would be understood just as well. In such a small axiom as this that
> might be a reasonable thing to do (and in particular for single wffs the <<
> ph >> ph business just looks silly). However the s-expressions are NOT
> optional. This is one of the things that would be omitted in the "less
> verbose" version of the file - you might just write
>
> axiom ax-mp: << ph >> -> << ph -> ps >> -> << ps >>;
>
> and the compiler, which has a proper parser, would produce the
> s-expressions for you.

Notationally this seems much worse than current Metamath:

min $e |- ph $.
maj $e |- ( ph -> ps ) $.
ax-mp $a |- ps $.

In current Metamath I have a name for every part (so I can refer to each one),
I can see the syntax directly, and I don't have a festoon of << ... >> everywhere
that makes it hard to see what's acting. Chevrons are TERRIBLE
when there are arrows, they look too much alike.
Also, don't you want -> to mean implication? It seems to be used in two different ways here.

> The problem with metamath's current approach is that it's subtly unsafe, as
> we saw recently with the example of syntax ambiguity causing proofs of
> false. Metamath treats statements as lists of symbols, which means that you
> can do some shenanigans with ambiguity if the grammar supports it. To
> prevent this, the verifier here works entirely with trees (encoded as
> s-expressions). So the worst that can happen is that a theorem doesn't say
> what you think, it can't be exploited in the middle of a proof.

I *agree* with you that processing-as-trees is the better approach.
That drops MIU, but I think supporting MIU isn't really necessary.

I disagree that the s-expressions needs to be exposed to the poor hapless user.
The verifier can trivially figure out what -> refers to; users should not
need to look up "wi" unless they're debugging a verifier.

Re: The shenanighans have already been dealt with: demand an unambiguous
parse by some criteria. If you can't show it's unambiguous, then it's not okay.
I think that'd be reasonable here too.


> People shouldn't have to write s-expressions

They shouldn't have to read them either, unless there's some reason they *must*
be there. Which I haven't seen yet.

S-expressions can be valuable, obviously. But we should
not ask people to read OR write "(wi ph ps)" when they
could use "ph -> ps".

> I don't like that spaces are significant with this layout.

Fair enough. "ph -> ps" is better anyway.


> > theorem bi2: << (ph <-> ps) -> ps -> ph >>
> > > (wi (wb ph ps) (wi ps ph));
> >
> > Again, I don't see why you need to expressly state the syntax proof.
> > It's noise for the human, and the computer can recalculate & check that
> > trivially.
> >
>
> Not if the verifier doesn't have a parser! I'm worried that if I add a
> parser this will be a large fraction of the code, and it will be a large
> moving part that could contribute to uncertainty that the program has no
> bugs. (There will be a parser of course, for the code syntax itself, but it
> will be statically known so you can write it in a few functions.)

I agree that you don't want an overly *complicated* parser.
But I think you're trading away too much. If your goal is readability,
then "(wi ph ps)" is *terrible* compared to "ph -> ps".
Metamath maximizes simple parsing, and it manages to support ph -> ps.

I don't think you need to go that far backwards.
A relatively simple tree parser should work just fine.

> I see infixr. Are you planning to support arbitrary-length expressions of
> > the
> > same operator, e.g., "( 1 + 2 + 3 + 4 )"?
> >
>
> That would be handled by declaring "infixl cadd: + 60;" to say that + is
> left associative with precedence 60. Then "1 + 2 + 3 + 4" would be the
> printed form of (cadd (cadd (cadd c1 c2) c3) c4).

Okay. That, at least, provides some notational extras over
current Metamath.


> > Wouldn't it be simpler to simply require that you cannot add a new
> > axiom or definition if it results in a syntactic ambiguity?
> > I *think* that would get rid of the "funny business" while eliminating
> > duplicate
> > declarations. But that depends on what "funny business" you mean,
> > and I'm not sure I know what you're referring to.
> >
>
> How will you know if new notation (not axiom/definition) results in a
> syntax ambiguity? The general problem is undecidable. What I want to avoid
> is a gnarly grammar with a complicated ambiguity being exploited somewhere
> deep in the proof (which isn't even being displayed) and causing something
> to be provable that shouldn't be.

If you are trying to solve the general problem you're trying too hard :-).

If you use a traditional LR parser, then make a table, and if you have shift/shift or
shift/reduce ambiguities given N tokens (whatever N is) and the precedence rules,
it's ambiguous. Done. You don't need - or want - infinite lookahead.
Parsing is well-studied.

Most languages have fixed operations instead of adding operations as they go.
I've not tried to parse language where it adds operators with precedence values,
so I don't know how difficult it is. Prolog does it, though, so I suspect it's not too hard.

Another approach would be to put all the syntax definitions in a separate file,
and use that to generate the parser to be used later.
That can be fast; most of the time the syntax definitions won't change, so you can
cache the results & regenerate the table/code only when the definitions change.
That would be an entirely conventional way to implement a parser, so there
are lots of tools you can use (which would reduce the code to be written).

Even easier: just drop precedence entirely. You don't need it;
people often add "unnecessary" parentheses for clarity.
If your goal is simplicity, that would be simple.
Implementing precedence is a lot of work for a mechanism that is
rarely used in practice. At the least, you could start *without* precedence,
and then add it later if desired.

I can see using s-expressions for the *proofs* but that is a different matter.

Anyway, you asked for comments, I hope you find these interesting.

--- David A. Wheeler

David A. Wheeler

unread,
Feb 21, 2019, 12:07:15 AM2/21/19
to metamath, metamath
On Wed, 20 Feb 2019 23:16:33 -0500, Mario Carneiro <di....@gmail.com> wrote:
> Everyone knows that + is lower precedence than *

Well, no.

You & I know that. But only 94% of professional programmers with 5+ years
got the correct answer when surveyed at a conference.
And random chance gives you 50%.

Sigh.

> If you have a solution to:
>
> * Simple static parser
> * Readable math
> * Guaranteed unambiguity
> * Minimal parsing "surprises"

Well, let's start with an easy approach & see if it works.
LR parsers are extensively studied & implemented, let's start there.

* Simple: If you make syntax a separate file, you can generate a simple static parser.
* Readable math: Most math systems use LR or LL parsers, so you're no worse off than most.
* Guaranteed unambiguity: They *require* unambiguity or a tie-breaking rule - so if you say that
only precedence rules can break ties, that gives you unambiguity. Basically treat a shift/shift or a shift/reduce conflict as an error if precedence doesn't break it.
* Minimal parsing "surprises": These are very widely used, so they shouldn't surprise anyone.

You could make k=5 (that is, LR(5)).

I'm guessing you believe that's not enough. Can you help me understand why?

--- David A. Wheeler

Mario Carneiro

unread,
Feb 21, 2019, 12:59:02 AM2/21/19
to metamath
On Wed, Feb 20, 2019 at 11:52 PM David A. Wheeler <dwhe...@dwheeler.com> wrote:
On Wed, 20 Feb 2019 22:47:43 -0500, Mario Carneiro <di....@gmail.com> wrote:
> Translating from metamath to mm0 is a bit trickier, because some annotation
> information in the mm0 file is not present in mm files. If you skip some of
> the foundational material (roughly the contents of this file), translating
> a set.mm-like file should be straightforward and automatable.

It's always possible to slip in some info in comments if all are agreeable.
It's how $j statements work now.

I agree; this will require some tuning if/when this actually gets up and running, but I think it should be possible to eventually get such a translation being fully automatic.
 
> I didn't demonstrate this in the file, but the chevrons are optional. So
> you could just write:
>
> axiom ax-mp: ph -> (wi ph ps) -> ps;

That switch back-and-forth is rather confusing for readability.

The arrow between the arguments is not an implication arrow, it is a metalogical arrow that serves the same purpose as $e. It is also possible to use another kind of arrow like =>, which is what they do in Isabelle. But that arrow is fixed by the syntax of the language, while the other arrow (representing wi) is declared in the file and can be whatever.

> and it would be understood just as well. In such a small axiom as this that
> might be a reasonable thing to do (and in particular for single wffs the <<
> ph >> ph business just looks silly). However the s-expressions are NOT
> optional. This is one of the things that would be omitted in the "less
> verbose" version of the file - you might just write
>
> axiom ax-mp: << ph >> -> << ph -> ps >> -> << ps >>;
>
> and the compiler, which has a proper parser, would produce the
> s-expressions for you.

Notationally this seems much worse than current Metamath:

    min $e |- ph $.
    maj $e |- ( ph -> ps ) $.
    ax-mp $a |- ps $.

In current Metamath I have a name for every part (so I can refer to each one),
I can see the syntax directly, and I don't have a festoon of << ... >> everywhere
that makes it hard to see what's acting.  Chevrons are TERRIBLE
when there are arrows, they look too much alike.
Also, don't you want -> to mean implication?  It seems to be used in two different ways here.

Notationally it's about the same as the metamath example, except for the different choice of sentinel character. I guess the $ is a "darker" character so it visually sets it off a bit more. André suggested using <? ?> which is also visually distinctive, although it still uses angle brackets. Another way to write the same thing without the meta-arrows is

axiom ax-mp (min: <? ph ?>) (maj: <? ph -> ps ?>): <? ps ?>;

although this style requires giving (unnecessary) names to the hypotheses.
 
> The problem with metamath's current approach is that it's subtly unsafe, as
> we saw recently with the example of syntax ambiguity causing proofs of
> false. Metamath treats statements as lists of symbols, which means that you
> can do some shenanigans with ambiguity if the grammar supports it. To
> prevent this, the verifier here works entirely with trees (encoded as
> s-expressions). So the worst that can happen is that a theorem doesn't say
> what you think, it can't be exploited in the middle of a proof.

I *agree* with you that processing-as-trees is the better approach.
That drops MIU, but I think supporting MIU isn't really necessary.

I disagree that the s-expressions needs to be exposed to the poor hapless user.
The verifier can trivially figure out what -> refers to; users should not
need to look up "wi" unless they're debugging a verifier.

I thought about using tokens directly as names, i.e. calling the syntax constructor "->" instead of "wi", but it wouldn't fix the lispyness, i.e. you would be looking at (-> ph (-> ps ph)) which is only slightly better, and I am also worried about lexing here because if you have constants that involve parens then it gets really messy to parse things like ((,] ph ps) correctly.

Re: The shenanighans have already been dealt with: demand an unambiguous
parse by some criteria.  If you can't show it's unambiguous, then it's not okay.
I think that'd be reasonable here too.

I agree, I'm just not clear on the method.
 
> >     theorem bi2: << (ph <-> ps) -> ps -> ph >>
> > >       (wi (wb ph ps) (wi ps ph));
> >
> > Again, I don't see why you need to expressly state the syntax proof.
> > It's noise for the human, and the computer can recalculate & check that
> > trivially.
> >
>
> Not if the verifier doesn't have a parser! I'm worried that if I add a
> parser this will be a large fraction of the code, and it will be a large
> moving part that could contribute to uncertainty that the program has no
> bugs. (There will be a parser of course, for the code syntax itself, but it
> will be statically known so you can write it in a few functions.)

I agree that you don't want an overly *complicated* parser.
But I think you're trading away too much.  If your goal is readability,
then "(wi ph ps)"  is *terrible* compared to "ph -> ps".
Metamath maximizes simple parsing, and it manages to support ph -> ps.

Metamath does it at a cost that I'm not sure I want to pay, though. Parsing is easy because it's nonexistent, and it supports infix operators and anything you can dream up, but it also relies on a fact that isn't checked for its correctness, and that's not good.

It is easy to use metamath style math strings in this language, *provided* you can ensure unambiguity by some means. How can you do that? Well, you could build a parser! It's a bit funny to build a parser without using it to parse things, but this might be a solution.
 
I don't think you need to go that far backwards.
A relatively simple tree parser should work just fine.

I like the sound of "a relatively simple tree parser", but that's how I ended up in s-expression land.
 
> > Wouldn't it be simpler to simply require that you cannot add a new
> > axiom or definition if it results in a syntactic ambiguity?
> > I *think* that would get rid of the "funny business" while eliminating
> > duplicate
> > declarations.  But that depends on what "funny business" you mean,
> > and I'm not sure I know what you're referring to.
> >
>
> How will you know if new notation (not axiom/definition) results in a
> syntax ambiguity? The general problem is undecidable. What I want to avoid
> is a gnarly grammar with a complicated ambiguity being exploited somewhere
> deep in the proof (which isn't even being displayed) and causing something
> to be provable that shouldn't be.

If you are trying to solve the general problem you're trying too hard :-).

If you use a traditional LR parser, then make a table, and if you have shift/shift or
shift/reduce ambiguities given N tokens (whatever N is) and the precedence rules,
it's ambiguous. Done. You don't need - or want - infinite lookahead.
Parsing is well-studied.

Most languages have fixed operations instead of adding operations as they go.
I've not tried to parse language where it adds operators with precedence values,
so I don't know how difficult it is.  Prolog does it, though, so I suspect it's not too hard.

Right. The problem here isn't having a parser per se; as I said, as long as the grammar is fixed you can do a recursive descent parser in a very readable way. The problem is doing parsing for a grammar which is only provided dynamically, in the file you are in the middle of reading. (Lean also does it BTW; it makes the whole grammar not a CFG though, so I'm not a fan of this approach if I can avoid it.)
 
Another approach would be to put all the syntax definitions in a separate file,
and use that to generate the parser to be used later.
That can be fast; most of the time the syntax definitions won't change, so you can
cache the results & regenerate the table/code only when the definitions change.
That would be an entirely conventional way to implement a parser, so there
are lots of tools you can use (which would reduce the code to be written).

This makes it much harder to trust what's going on; JIT compiling seems like a lot of magic for the problem. I originally started with all the syntax at the start of the file, which seems to be the Ghilbert style as well, but then I ran into trouble with local notations. That will also be a problem for doing static parser generation.

One possibility for local notations is to declare the parser up front, and only allow parser actions to be disabled or replaced. That is, I declare that "e." has precedence 50 and it stays that way the whole time, but it's initially unassigned (not actually valid in math strings) and then attached to wel, then later welc and then finally wcel.
 
Even easier: just drop precedence entirely.  You don't need it;
people often add "unnecessary" parentheses for clarity.
If your goal is simplicity, that would be simple.
Implementing precedence is a lot of work for a mechanism that is
rarely used in practice. At the least, you could start *without* precedence,
and then add it later if desired.

Maybe not precedence, but some parsing seems to be pretty important in cleaning up math notation. Being able to have optional spaces will be nice but that only requires a smarter lexer, not a parser. But stuff like optional parentheses is super helpful for making math readable.

On Thu, Feb 21, 2019 at 12:07 AM David A. Wheeler <dwhe...@dwheeler.com> wrote:
On Wed, 20 Feb 2019 23:16:33 -0500, Mario Carneiro <di....@gmail.com> wrote:
> Everyone knows that + is lower precedence than *

Well, no.

You & I know that. But only 94% of professional programmers with 5+ years
got the correct answer when surveyed at a conference.
And random chance gives you 50%.

I read "94%" as "everyone". It's a lot better than the 60% figures for other stuff. It's basically the only precedence ordering which is literally taught in elementary school.

> If you have a solution to:
>
> * Simple static parser
> * Readable math
> * Guaranteed unambiguity
> * Minimal parsing "surprises"

Well, let's start with an easy approach & see if it works.
LR parsers are extensively studied & implemented, let's start there.

* Simple: If you make syntax a separate file, you can generate a simple static parser.
* Readable math: Most math systems use LR or LL parsers, so you're no worse off than most.
* Guaranteed unambiguity: They *require* unambiguity or a tie-breaking rule - so if you say that
only precedence rules can break ties, that gives you unambiguity.  Basically treat a shift/shift or a shift/reduce conflict as an error if precedence doesn't break it.
* Minimal parsing "surprises": These are very widely used, so they shouldn't surprise anyone.

You could make k=5 (that is, LR(5)).

I'm guessing you believe that's not enough.  Can you help me understand why?

Actually I don't think that's not enough at all, I just don't think it's simple enough. In fact I would go with an LR(0) parser. Metamath can be parsed with an LR(0) parser but it needs composite parser actions - that's what the KLR thing is about. (set.mm isn't strictly LR(k) if I recall correctly.)

Here's a thought: LR parsers have guaranteed unambiguity, provided they exist. So they effectively act as certificates of unambiguity. Certificate = proof = put it in the proof file. How well does that work? Is verifying a parser table easier than generating one from scratch? If so, if might be possible to support more complicated parsers, since the onus of construction is taken off the verifier.

André L F S Bacci

unread,
Feb 21, 2019, 9:43:59 AM2/21/19
to meta...@googlegroups.com
On Thu, Feb 21, 2019 at 12:48 AM Mario Carneiro <di....@gmail.com> wrote:
Indeed, dealing with modularization seems pretty important and apparently caused problems for Raph, so perhaps I should give it more thought.

In a specification file such as this, we don't have to worry about proof dependencies, but we do have to be able to import axiom systems and syntax from another file. But now I'm worried about the possible security vulnurabilities this causes. A specification file is supposed to be self contained in the sense that if you know the language then it says what it means. If a specification file imports another, then the meaning of the first file depends on the meaning of that other file as well. It's even worse if the path is ambiguous in some way, and we don't see exactly what it's referring to. If the includes are relative to a path provided at the command line, then it's not even definite what file is being referred to.

One way to make sure the import is right is to reproduce the axioms and theorems that are expected to be imported. This way the verifier can double check that all the files are stitched together correctly. But that may well be the whole file or nearly so (theorems don't need to be imported, but axioms and definitions do). And that's a hell of a lot of boilerplate, and also obscures the actual content of the file if it starts with a lengthy recap section.

The specification file / proof file dichotomy is supposed to represent the distinction between *what* is being proven and *how* it should be proven. The proof file is a fine place for putting in instructions for how to find other files, but the specification files should be more focused on content and cross checks.

I will return to this topic in a bit with a more concrete syntax proposal.

I was thinking more in terms of development than modularization / meaning. The "correct" file to attest meaning and validation of proofs is the solid, self contained file, which in turn could be hashed / signed, (and so, proof files need a syntax to permit their coexistence on a self contained file).

The `file` and `file.import` statements are only for the easy of development -- think the mega files of Metamath versus GitHub, and source control in general. In this context, `file` directives will be treated as comments, only utilized on spliting and joining files.

Modularization comes last, as it is better treated in syntax / language itself. May be a Python-like `import` statement.

André

OlivierBinda

unread,
Feb 21, 2019, 10:05:04 AM2/21/19
to metamath

I can relate to "make the metamath parser simple, unambiguous and efficient"

I have just written an Antlr4 grammar for an old version of metamath. But the performance was so bad (it took minutes to parse 4000 set.mm statements, with some statements like

|- ( ( ( ( ( ∨1c e. RR* /\\ ∨2c e. RR* ) /\\ -. ( ∨1c = 0 \\/ ∨2c = 0 ) ) /\\ -. ( ( ( 0 < ∨2c /\\ ∨1c = +oo ) \\/ ( ∨2c < 0 /\\ ∨1c = -oo ) ) \\/ ( ( 0 < ∨1c /\\ ∨2c = +oo ) \\/ ( ∨1c < 0 /\\ ∨2c = -oo ) ) ) ) /\\ -. ( ( ( 0 < ∨2c /\\ ∨1c = -oo ) \\/ ( ∨2c < 0 /\\ ∨1c = +oo ) ) \\/ ( ( 0 < ∨1c /\\ ∨2c = -oo ) \\/ ( ∨1c < 0 /\\ ∨2c = +oo ) ) ) ) -> ∨1c e. RR )"

that took like 9s to parse, I did not try to optimize it though)... the parser failed epically, by throwing an out of stack space exception (too much garbage to collect)

I had another go with the same parser BUT with doing a first pass of the statement finding opening and closing braces,
so that I could parse subgroups and rebuild the tree with the result

It worked a lot better (like 30x) : parsing set.mm in 10s ?


I would welcome being able to write a simple parser without using a tool like antlr4 (that only has a few language targets and not Kotlin, my favorite language) but it would be nice to be able to find groups that can be parsed independently in the metamath language as it obviously speeds things up 30x, and makes it also possible to use many threads to parse 1 sentence (another speed up possible there)


Yet, the parser that I wrote isn't a dynamic parser and I will have to look into/implement that if I want the maths it can handle to be extanded.
(At one point, I'll have to implement the same wikipedia algorithm)

So, what I am trying to say is :
a) groups (stuff with easily found bounds like () ) are nice to have whereas rules like (wiso statement)

c Isom c COMMA c LEFT_PARENTHESIS c COMMA c RIGHT_PARENTHESIS  {$op="wiso";} 

make things more difficult

things with prefix and clean separation are nice, things with adjacency are complicated :/

b) if it is easy to make things computer-understandable (build a tree), it might easy (or not :/)to turn those things into human-friendly stuff (display, write and interact with it nicely) with another software layer that isn't in the language

c) writing things twice should be a big no no for any programmer and mathematician

d) even if you write things twice in your database, you'll still have to build a parser to handle human input anyway :)

e) maybee there are other solutions ? like use colored spaces to separate tokens and handle nesting ?
I have been playing 8 months with metamath, It took me a lot of time to adapt and I am not sure the new language is better at readibility, it hurt the first time I saw it (But I still love you Mario, I'm a great fan, have been using your stuff all over the place)

f) I have been doing university level maths for 20 years and software development the last 6 years and the only precedence order I know is about +-*/^(). So...

g) I would love to have metamath look more like the maths I write on the blackboard (those braces !) but I would rather have a language that is understood by a computer and easy to work with


Cheers,

Olivier Binda

grammar Parser2;
statement : provenStatement | wffStatement | classStatement | setStatement;
setClassOrWff : s | w | c ;

setStatement : Set s;
classStatement : Class c;
provenStatement : Provable w;
wffStatement : Wff w;

c : classHolder | s | cC | cC2 | c0 | cC3 | cC7 | cSC2 | cS3W | cSCSC2 | cC4 | cC5 | cSCW | cSC3 | cC8 | cCSC | cSW | cC3S | cWC2 | cS2W | cC2S | cS | cC6 ;
w : wffHolder | wW | wW2 | wSW | wC2 | wSC2 | wC | wW12 | w0 | wW3 | wS2 | wW4 | wW6 | wW9 | wSCW | wS2W | wW8 | wSC | wCSW | wC3 | wW5 | wW10 | wW11 | wC5 | wW7 ;
s : setHolder  ;

wW locals[String op] : HYPHEN_MINUS_FULL_STOP w  {$op="wn";}  |
LESS_THAN_SIGN_GREATER_THAN_SIGN w  {$op="wdia";}  |
LEFT_SQUARE_BRACKET_FULL_STOP_RIGHT_SQUARE_BRACKET w  {$op="wbox";}  |
LEFT_PARENTHESIS_RIGHT_PARENTHESIS w  {$op="wcirc";}  ;


wW2 locals[String op] : LEFT_PARENTHESIS w LESS_THAN_SIGN_HYPHEN_MINUS_GREATER_THAN_SIGN w RIGHT_PARENTHESIS  {$op="wb";}  |
LEFT_PARENTHESIS w HYPHEN_MINUS_SOLIDUS_REVERSE_SOLIDUS w RIGHT_PARENTHESIS  {$op="wnan";}  |
LEFT_PARENTHESIS w HYPHEN_MINUS_GREATER_THAN_SIGN w RIGHT_PARENTHESIS  {$op="wi";}  |
LEFT_PARENTHESIS w SOLIDUS_REVERSE_SOLIDUS w RIGHT_PARENTHESIS  {$op="wa";}  |
LEFT_PARENTHESIS w Until w RIGHT_PARENTHESIS  {$op="wunt";}  |
LEFT_PARENTHESIS_FULL_STOP w HYPHEN_MINUS_GREATER_THAN_SIGN_FULL_STOP w RIGHT_PARENTHESIS_FULL_STOP  {$op="wvd1";}  |
LEFT_PARENTHESIS w REVERSE_SOLIDUS_SOLIDUS w RIGHT_PARENTHESIS  {$op="wo";}  |
LEFT_PARENTHESIS w REVERSE_SOLIDUS_SOLIDUS_LOW_LINE w RIGHT_PARENTHESIS  {$op="wxo";}  |
LEFT_PARENTHESIS_FULL_STOP w COMMA_FULL_STOP w RIGHT_PARENTHESIS_FULL_STOP  {$op="wvhc2";}  ;


wSW locals[String op] : A_FULL_STOP s w  {$op="wal";}  |
E_EXCLAMATION_MARK s w  {$op="weu";}  |
E_ASTERISK s w  {$op="wmo";}  |
F_SOLIDUS s w  {$op="wnf";}  |
E_FULL_STOP s w  {$op="wex";}  ;


wC2 locals[String op] : c EQUALS_SIGN_SOLIDUS_EQUALS_SIGN c  {$op="wne";}  |
c ESOLIDUS c  {$op="wnel";}  |
c Se c  {$op="wse";}  |
c DefAt c  {$op="wdfat";}  |
c C_FULL_STOP c  {$op="wpss";}  |
c LOW_LINESe c  {$op="w-bnj13";}  |
c Po c  {$op="wpo";}  |
c Fn c  {$op="wfn";}  |
c Or c  {$op="wor";}  |
c Er c  {$op="wer";}  |
c EQUALS_SIGN c  {$op="wceq";}  |
c EFULL_STOP c  {$op="wcel";}  |
c C_LOW_LINE c  {$op="wss";}  |
c Fr c  {$op="wfr";}  |
c We c  {$op="wwe";}  |
c LOW_LINEFrSe c  {$op="w-bnj15";}  ;


cC locals[String op] : LEFT_CURLY_BRACKET c RIGHT_CURLY_BRACKET  {$op="csn";}  |
Suc c  {$op="csuc";}  |
OR c  {$op="cofr";}  |
ORVC c  {$op="corvc";}  |
FullFun c  {$op="cfullfn";}  |
Dom c  {$op="cdm";}  |
OF c  {$op="cof";}  |
HYPHEN_MINUSu c  {$op="cneg";}  |
TILDEP c  {$op="cpw";}  |
HYPHEN_MINUS_FULL_STOPg c  {$op="cgon";}  |
Image c  {$op="cimage";}  |
Funpart c  {$op="cfunpart";}  |
Tpos c  {$op="ctpos";}  |
Curry c  {$op="ccur";}  |
Uncurry c  {$op="cunc";}  |
AC_LOW_LINE c  {$op="wacn";}  |
Locally c  {$op="clly";}  |
U_FULL_STOP c  {$op="cuni";}  |
VERTICAL_LINE_CIRCUMFLEX_ACCENT_VERTICAL_LINE c  {$op="cint";}  |
HYPHEN_MINUSe c  {$op="cxne";}  |
LESS_THAN_SIGN_QUOTATION_MARK c QUOTATION_MARK_GREATER_THAN_SIGN  {$op="cs1";}  |
N_HYPHEN_MINUSLocally c  {$op="cnlly";}  |
OFC c  {$op="cofc";}  |
Word c  {$op="cword";}  |
Slot c  {$op="cslot";}  |
Fix c  {$op="cfix";}  |
GRAVE_ACCENT_APOSTROPHE c  {$op="ccnv";}  |
Ran c  {$op="crn";}  |
Recs LEFT_PARENTHESIS c RIGHT_PARENTHESIS  {$op="crecs";}  ;


wSC2 locals[String op] : DisjLOW_LINE s EFULL_STOP c c  {$op="wdisj";}  ;


cC2 locals[String op] : LEFT_PARENTHESIS c X_FULL_STOP c RIGHT_PARENTHESIS  {$op="cxp";}  |
LEFT_CURLY_BRACKET c COMMA c RIGHT_CURLY_BRACKET  {$op="cpr";}  |
LEFT_PARENTHESIS c GRAVE_ACCENT c RIGHT_PARENTHESIS  {$op="cfv";}  |
Pprod LEFT_PARENTHESIS c COMMA c RIGHT_PARENTHESIS  {$op="cpprod";}  |
GcdOLD LEFT_PARENTHESIS c COMMA c RIGHT_PARENTHESIS  {$op="cgcdOLD";}  |
LEFT_PARENTHESIS c OFULL_STOP c RIGHT_PARENTHESIS  {$op="ccom";}  |
A_FULL_STOPg c c  {$op="cgol";}  |
LESS_THAN_SIGN_FULL_STOP c COMMA c GREATER_THAN_SIGN_FULL_STOP  {$op="cop";}  |
Seqom LEFT_PARENTHESIS c COMMA c RIGHT_PARENTHESIS  {$op="cseqom";}  |
LOW_LINE c c  {$op="cdp2";}  |
LEFT_PARENTHESIS c ICIRCUMFLEX_ACCENTi c RIGHT_PARENTHESIS  {$op="cin";}  |
LEFT_PARENTHESIS c QUOTATION_MARK c RIGHT_PARENTHESIS  {$op="cima";}  |
LESS_THAN_SIGN_LESS_THAN_SIGN c COMMA c GREATER_THAN_SIGN_GREATER_THAN_SIGN  {$op="caltop";}  |
LEFT_PARENTHESIS c XX_FULL_STOP c RIGHT_PARENTHESIS  {$op="caltxp";}  |
LESS_THAN_SIGN_QUOTATION_MARK c c QUOTATION_MARK_GREATER_THAN_SIGN  {$op="cs2";}  |
PtDf LEFT_PARENTHESIS c COMMA c RIGHT_PARENTHESIS  {$op="cptdfc";}  |
Rec LEFT_PARENTHESIS c COMMA c RIGHT_PARENTHESIS  {$op="crdg";}  |
LEFT_SQUARE_BRACKET c RIGHT_SQUARE_BRACKET c  {$op="cec";}  |
LEFT_PARENTHESIS c SOLIDUS_FULL_STOP c RIGHT_PARENTHESIS  {$op="cqs";}  |
LEFT_PARENTHESIS c LEFT_PARENTHESIS_PLUS_SIGN_PLUS_SIGN_RIGHT_PARENTHESIS c RIGHT_PARENTHESIS  {$op="csymdif";}  |
LEFT_PARENTHESIS c REVERSE_SOLIDUS c RIGHT_PARENTHESIS  {$op="cdif";}  |
LEFT_PARENTHESIS c UFULL_STOP c RIGHT_PARENTHESIS  {$op="cun";}  |
LEFT_PARENTHESIS c VERTICAL_LINE_GRAVE_ACCENT c RIGHT_PARENTHESIS  {$op="cres";}  |
OrdIso LEFT_PARENTHESIS c COMMA c RIGHT_PARENTHESIS  {$op="coi";}  |
SEMICOLON c c  {$op="cdc";}  |
E_FULL_STOPg c c  {$op="cgox";}  |
LEFT_PARENTHESIS c LEFT_PARENTHESISxRIGHT_PARENTHESIS c RIGHT_PARENTHESIS  {$op="ctxp";}  |
LEFT_PARENTHESIS c APOSTROPHE_APOSTROPHE_APOSTROPHE c RIGHT_PARENTHESIS  {$op="cafv";}  ;


wC locals[String op] : Rel c  {$op="wrel";}  |
Smo c  {$op="wsmo";}  |
Ord c  {$op="word";}  |
Lim c  {$op="wlim";}  |
Tr c  {$op="wtr";}  |
Fun c  {$op="wfun";}  |
Prt c  {$op="wprt";}  ;


c0 locals[String op] : Fi  {$op="cfi";}  |
Card  {$op="ccrd";}  |
Q_FULL_STOP  {$op="cnq";}  |
DIGIT_ONER  {$op="c1r";}  |
PLUS_SIGNR  {$op="cplr";}  |
CC  {$op="cc";}  |
LESS_THAN_SIGN  {$op="clt";}  |
Splice  {$op="csplice";}  |
Reverse  {$op="creverse";}  |
Gcd  {$op="cgcd";}  |
OdZ  {$op="codz";}  |
Struct  {$op="cstr";}  |
C_omp  {$op="cco";}  |
DIGIT_ZEROg  {$op="c0g";}  |
XsFULL_STOP  {$op="cxps";}  |
Iso  {$op="ciso";}  |
HomA  {$op="choma";}  |
SetCat  {$op="csetc";}  |
Dirset  {$op="cdrs";}  |
Toset  {$op="ctos";}  |
Lat  {$op="clat";}  |
CLat  {$op="ccla";}  |
ODual  {$op="codu";}  |
TosetRel  {$op="ctsr";}  |
Tail  {$op="ctail";}  |
GrpIso  {$op="cgim";}  |
Cntr  {$op="ccntr";}  |
OppG  {$op="coppg";}  |
VarFGrp  {$op="cvrgp";}  |
DProd  {$op="cdprd";}  |
MulGrp  {$op="cmgp";}  |
RingIso  {$op="crs";}  |
PID  {$op="cpid";}  |
Eval  {$op="cevl";}  |
EvalSubDIGIT_ONE  {$op="ces1";}  |
Proj  {$op="cpj";}  |
TopBases  {$op="ctb";}  |
TILDE_TILDE_GREATER_THAN_SIGNt  {$op="clm";}  |
UFil  {$op="cufil";}  |
TopVec  {$op="ctvc";}  |
ASTERISKMetSp  {$op="cxme";}  |
OmN  {$op="comn";}  |
Cau  {$op="cca";}  |
CMetSp  {$op="ccms";}  |
Coeff  {$op="ccoe";}  |
Area  {$op="carea";}  |
Lam  {$op="cvma";}  |
SOLIDUSL  {$op="clgs";}  |
TPLUS_SIGN  {$op="ctcl";}  |
GId  {$op="cgi";}  |
CIRCUMFLEX_ACCENTg  {$op="cgx";}  |
ExId  {$op="cexid";}  |
CVecOLD  {$op="cvc";}  |
FULL_STOPsOLD  {$op="cns";}  |
SubSp  {$op="css";}  |
TILDEH  {$op="chil";}  |
Cauchy  {$op="ccau";}  |
PLUS_SIGNop  {$op="chos";}  |
FULL_STOPfn  {$op="chft";}  |
LinOp  {$op="clo";}  |
Bra  {$op="cbr";}  |
LESS_THAN_SIGN_LOW_LINEop  {$op="cleo";}  |
SigAlgebra  {$op="csiga";}  |
Retr  {$op="cretr";}  |
HYPHEN_MINUS_GREATER_THAN_SIGNg  {$op="cgoi";}  |
Qp  {$op="cqp";}  |
No  {$op="csur";}  |
Bigcup  {$op="cbigcup";}  |
Btwn  {$op="cbtwn";}  |
TransportTo  {$op="ctransport";}  |
InnerFiveSeg  {$op="cifs";}  |
Line  {$op="cline2";}  |
PresetRel  {$op="cpresetrel";}  |
Ub  {$op="cub";}  |
Lb  {$op="clb";}  |
DIGIT_ZEROcv  {$op="c0cv";}  |
ProdObj  {$op="cprodo";}  |
CodSetCat  {$op="ccodcase";}  |
SetCatOLD  {$op="ccaset";}  |
LOW_LINE_VERTICAL_LINE_LOW_LINEc  {$op="cfals";}  |
Bic  {$op="cbic";}  |
Derv  {$op="cderv";}  |
Halfplane  {$op="chalfp";}  |
Segtrg  {$op="csegtrg";}  |
Neug  {$op="cneug";}  |
Rn  {$op="crrn";}  |
PrRing  {$op="cprrng";}  |
RmY  {$op="crmy";}  |
LineDIGIT_THREE  {$op="cline3";}  |
Trails  {$op="ctrail";}  |
Circuits  {$op="ccrct";}  |
LogLOW_LINE  {$op="clog_";}  |
CvLat  {$op="clc";}  |
Trn  {$op="ctrnN";}  |
EDRingR  {$op="cedring-rN";}  |
Mapd  {$op="cmpd";}  |
DIGIT_TWOo  {$op="c2o";}  |
DIGIT_FOURo  {$op="c4o";}  |
CIRCUMFLEX_ACCENTo  {$op="coe";}  |
TILDE_TILDE  {$op="cen";}  |
TILDE_LESS_THAN_SIGN_LOW_LINE  {$op="cdom";}  |
TILDE_LESS_THAN_SIGN  {$op="csdm";}  |
Har  {$op="char";}  |
TC  {$op="ctc";}  |
FULL_STOPpQ  {$op="cmpq";}  |
SOLIDUSQ  {$op="cerq";}  |
FULL_STOPQ  {$op="cmq";}  |
LESS_THAN_SIGNQ  {$op="cltq";}  |
FULL_STOPpR  {$op="cmpr";}  |
DIGIT_ONE  {$op="c1";}  |
HYPHEN_MINUSoo  {$op="cmnf";}  |
DIGIT_TWO  {$op="c2";}  |
DIGIT_SIX  {$op="c6";}  |
DIGIT_NINE  {$op="c9";}  |
LEFT_SQUARE_BRACKET_COMMA_RIGHT_PARENTHESIS  {$op="cico";}  |
FULL_STOP_FULL_STOP_FULL_STOP  {$op="cfz";}  |
Mod  {$op="cmo";}  |
Concat  {$op="cconcat";}  |
Substr  {$op="csubstr";}  |
TILDE_TILDE_GREATER_THAN_SIGNr  {$op="crli";}  |
O_LEFT_PARENTHESIS_DIGIT_ONE_RIGHT_PARENTHESIS  {$op="co1";}  |
Exp  {$op="ce";}  |
Sin  {$op="csin";}  |
Cos  {$op="ccos";}  |
Tan  {$op="ctan";}  |
Sadd  {$op="csad";}  |
Z_LEFT_SQUARE_BRACKETiRIGHT_SQUARE_BRACKET  {$op="cgz";}  |
VERTICAL_LINE_GRAVE_ACCENTs  {$op="cress";}  |
ASTERISKr  {$op="cstv";}  |
FULL_STOPs  {$op="cvsca";}  |
Dist  {$op="cds";}  |
VERTICAL_LINE_GRAVE_ACCENTt  {$op="crest";}  |
RR_ASTERISKs  {$op="cxrs";}  |
QTop  {$op="cqtop";}  |
MrCls  {$op="cmrc";}  |
Id  {$op="ccid";}  |
Comf  {$op="ccomf";}  |
OppCat  {$op="coppc";}  |
Inv  {$op="cinv";}  |
Func  {$op="cfunc";}  |
IdFunc  {$op="cidfu";}  |
VERTICAL_LINE_GRAVE_ACCENTf  {$op="cresf";}  |
FuncCat  {$op="cfuc";}  |
Arrow  {$op="carw";}  |
CompA  {$op="ccoa";}  |
PairF  {$op="cprf";}  |
Yon  {$op="cyon";}  |
Preset  {$op="cpreset";}  |
Meet  {$op="cmee";}  |
ToInc  {$op="cipo";}  |
PosetRel  {$op="cps";}  |
Infw  {$op="cinf";}  |
LatRel  {$op="cla";}  |
LSSum  {$op="clsm";}  |
FreeGrp  {$op="cfrgp";}  |
CycGrp  {$op="ccyg";}  |
DIGIT_ONEr  {$op="cur";}  |
DivRing  {$op="cdr";}  |
SubRing  {$op="csubrg";}  |
RingSpan  {$op="crgspn";}  |
LSpan  {$op="clspn";}  |
RingLMod  {$op="crglmod";}  |
MetOpen  {$op="cmopn";}  |
ZMod  {$op="czlm";}  |
Chr  {$op="cchr";}  |
PreHil  {$op="cphl";}  |
Hil  {$op="chs";}  |
OBasis  {$op="cobs";}  |
Cls  {$op="ccl";}  |
CnP  {$op="ccnp";}  |
KolDIGIT_TWO  {$op="ct0";}  |
Nrm  {$op="cnrm";}  |
KQ  {$op="ckq";}  |
Homeo  {$op="chmeo";}  |
TILDE_EQUALS_SIGN  {$op="chmph";}  |
UFL  {$op="cufl";}  |
FClusf  {$op="cfcf";}  |
TopMnd  {$op="ctmd";}  |
Tsums  {$op="ctsu";}  |
TopDRing  {$op="ctdrg";}  |
ToMetSp  {$op="ctmt";}  |
Norm  {$op="cnm";}  |
NGHom  {$op="cnghm";}  |
HYPHEN_MINUScnHYPHEN_MINUS_GREATER_THAN_SIGN  {$op="ccncf";}  |
ASTERISKp  {$op="cpco";}  |
ToCHil  {$op="ctch";}  |
Ban  {$op="cbn";}  |
CHil  {$op="chl";}  |
LimCC  {$op="climc";}  |
DegDIGIT_ONE  {$op="cdg1";}  |
AA  {$op="caa";}  |
TILDE_TILDE_GREATER_THAN_SIGNu  {$op="culm";}  |
Arctan  {$op="catan";}  |
Plig  {$op="cplig";}  |
GrpOp  {$op="cgr";}  |
SOLIDUSg  {$op="cgs";}  |
AbelOp  {$op="cablo";}  |
SemiGrp  {$op="csem";}  |
MndOp  {$op="cmndo";}  |
RingOps  {$op="crngo";}  |
IndMet  {$op="cims";}  |
LnOp  {$op="clno";}  |
CBan  {$op="ccbn";}  |
PLUS_SIGNH  {$op="cph";}  |
Normfn  {$op="cnmf";}  |
Adjh  {$op="cado";}  |
Eigval  {$op="cel";}  |
CHStates  {$op="chst";}  |
Measures  {$op="cmeas";}  |
RRndVar  {$op="crrv";}  |
LogLOW_LINEG  {$op="clgam";}  |
PCon  {$op="cpcon";}  |
UMGrph  {$op="cumg";}  |
REVERSE_SOLIDUS_SOLIDUSg  {$op="cgoo";}  |
LESS_THAN_SIGN_HYPHEN_MINUS_GREATER_THAN_SIGNg  {$op="cgob";}  |
AxExt  {$op="cgze";}  |
AxRep  {$op="cgzr";}  |
AxUn  {$op="cgzu";}  |
AxReg  {$op="cgzg";}  |
SplitFldDIGIT_ONE  {$op="csf1";}  |
SplitFld  {$op="csf";}  |
PolySplitLim  {$op="cpsl";}  |
ZRing  {$op="czr";}  |
TASTERISKrec  {$op="crtrcl";}  |
Limits  {$op="climits";}  |
Singleton  {$op="csingle";}  |
Apply  {$op="capply";}  |
Cup  {$op="ccup";}  |
Restrict  {$op="crestrict";}  |
OuterFiveSeg  {$op="cofs";}  |
CgrDIGIT_THREE  {$op="ccgr3";}  |
FiveSeg  {$op="cfs";}  |
CurDIGIT_ONE  {$op="ccur1";}  |
AntiDir  {$op="cantidir";}  |
BndLat  {$op="clbl";}  |
CIRCUMFLEX_ACCENTmd  {$op="clsg";}  |
ZeroDiv  {$op="czerodiv";}  |
Action  {$op="cact";}  |
Vec  {$op="cvec";}  |
RAffSp  {$op="craffsp";}  |
IsolatedPt  {$op="cisopt";}  |
Dgra  {$op="cmgra";}  |
Alg  {$op="calg";}  |
IdLOW_LINE  {$op="cid_";}  |
Ded  {$op="cded";}  |
CinvOLD  {$op="ccinv";}  |
FuncOLD  {$op="cfuncOLD";}  |
InitObj  {$op="ciobj";}  |
Natural  {$op="cntrl";}  |
Tar  {$op="ctar";}  |
DWords  {$op="cdwords";}  |
Grammar  {$op="cgrm";}  |
Prdct  {$op="cgprdct";}  |
Phc  {$op="cphc";}  |
PLines  {$op="cplines";}  |
C_on  {$op="ccon2";}  |
Segments  {$op="cSeg";}  |
Angle  {$op="cangle";}  |
Angc  {$op="cangc";}  |
Cut  {$op="ccut";}  |
LocFin  {$op="clocfin";}  |
TotBnd  {$op="ctotbnd";}  |
TILDE_EQUALS_SIGNr  {$op="crisc";}  |
MzPolyCld  {$op="cmzpcl";}  |
MzPoly  {$op="cmzp";}  |
Dioph  {$op="cdioph";}  |
LEFT_SQUARE_BRACKET_RIGHT_SQUARE_BRACKETNN  {$op="csquarenn";}  |
LFinGen  {$op="clfig";}  |
FreeLMod  {$op="cfrlm";}  |
Monic  {$op="cmnc";}  |
PmSgn  {$op="cpsgn";}  |
CytP  {$op="ccytp";}  |
TopSep  {$op="ctopsep";}  |
Walks  {$op="cwalk";}  |
SPaths  {$op="cspath";}  |
PathOn  {$op="cpthon";}  |
GREATER_THAN_SIGN  {$op="cgt";}  |
Cosh  {$op="ccosh";}  |
FULL_STOP  {$op="cdp";}  |
Ceiling  {$op="ccei";}  |
LSHyp  {$op="clsh";}  |
LESS_THAN_SIGNo  {$op="ccvr";}  |
HL  {$op="chlt";}  |
LPlanes  {$op="clpl";}  |
Lines  {$op="clines";}  |
LTrn  {$op="cltrn";}  |
TGrp  {$op="ctgrp";}  |
VA  {$op="cdjaN";}  |
DIsoH  {$op="cdih";}  |
LCDual  {$op="clcd";}  |
LEFT_SQUARE_BRACKETC_FULL_STOP_RIGHT_SQUARE_BRACKET  {$op="crpss";}  |
Undef  {$op="cund";}  |
DIGIT_THREEo  {$op="c3o";}  |
PLUS_SIGNc  {$op="ccda";}  |
LESS_THAN_SIGNN  {$op="clti";}  |
DIGIT_ONEQ  {$op="c1q";}  |
DIGIT_ZERO  {$op="cc0";}  |
PLUS_SIGNoo  {$op="cpnf";}  |
FULL_STOPi  {$op="cip";}  |
MrInd  {$op="cmri";}  |
ACS  {$op="cacs";}  |
Nat  {$op="cnat";}  |
XcFULL_STOP  {$op="cxpc";}  |
UncurryF  {$op="cuncf";}  |
Poset  {$op="cpo";}  |
Lub  {$op="club";}  |
Grp  {$op="cgrp";}  |
HYPHEN_MINUSg  {$op="csg";}  |
GrpAct  {$op="cga";}  |
PGrp  {$op="cpgp";}  |
DProj  {$op="cdpj";}  |
CRing  {$op="ccrg";}  |
AbsVal  {$op="cabv";}  |
LMHom  {$op="clmhm";}  |
LVec  {$op="clvec";}  |
EvalSub  {$op="ces";}  |
OrdPwSer  {$op="copws";}  |
SelectVars  {$op="cslv";}  |
Top  {$op="ctop";}  |
Clsd  {$op="ccld";}  |
Int  {$op="cnt";}  |
TX  {$op="ctx";}  |
TopGrp  {$op="ctgp";}  |
NrmVec  {$op="cnvc";}  |
DIGIT_ZEROp  {$op="c0p";}  |
C_CIRCUMFLEX_ACCENTn  {$op="ccpn";}  |
MDeg  {$op="cmdg";}  |
Deg  {$op="cdgr";}  |
Log  {$op="clog";}  |
I_nv  {$op="cgn";}  |
DivRingOps  {$op="cdrng";}  |
ASTERISK_HYPHEN_MINUSFld  {$op="csfld";}  |
ComDIGIT_TWO  {$op="ccm2";}  |
HmOp  {$op="chmo";}  |
DIGIT_ZEROh  {$op="c0v";}  |
FULL_STOPop  {$op="chot";}  |
LinFn  {$op="clf";}  |
EulPaths  {$op="ceup";}  |
VDeg  {$op="cvdg";}  |
AxPow  {$op="cgzp";}  |
HomLimB  {$op="chlb";}  |
Prj  {$op="cproj";}  |
CurDIGIT_TWO  {$op="ccur2";}  |
LeR  {$op="cse";}  |
ComDIGIT_ONE  {$op="ccm1";}  |
SubSemiGrpGen  {$op="csbsgrg";}  |
FreeSemiGrp  {$op="cfsm";}  |
SubVec  {$op="csvec";}  |
FULL_STOPm  {$op="csmat";}  |
TermObj  {$op="ctobj";}  |
IdSetCat  {$op="cidcase";}  |
Words  {$op="cwrd";}  |
Sym  {$op="csym";}  |
SOLIDUS_REVERSE_SOLIDUSc  {$op="cands";}  |
Psc  {$op="clpsc";}  |
Ors  {$op="cors";}  |
Prop  {$op="cprop";}  |
PPoints  {$op="cpoints";}  |
Coln  {$op="ccol";}  |
Ibg  {$op="cibg";}  |
Slices  {$op="cslices";}  |
Ismty  {$op="cismty";}  |
Dmn  {$op="cdmn";}  |
NoeACS  {$op="cnacs";}  |
LIndS  {$op="clinds";}  |
MinPolyAA  {$op="cmpaa";}  |
IntgOver  {$op="citgo";}  |
MaDet  {$op="cmdat";}  |
Neighbors  {$op="cnbgra";}  |
Paths  {$op="cpath";}  |
Sinh  {$op="csinh";}  |
LFnl  {$op="clfn";}  |
Pmap  {$op="cpmap";}  |
LAut  {$op="claut";}  |
EDRing  {$op="cedring";}  |
DVecH  {$op="cdvh";}  |
HGMap  {$op="chg";}  |
LOW_LINEE  {$op="cep";}  |
Om  {$op="com";}  |
DIGIT_TWOnd  {$op="c2nd";}  |
R_DIGIT_ONE  {$op="cr1";}  |
Aleph  {$op="cale";}  |
Cf  {$op="ccf";}  |
WUni  {$op="cwun";}  |
TarskiMap  {$op="ctskm";}  |
PLUS_SIGNN  {$op="cpli";}  |
LESS_THAN_SIGNP  {$op="cltp";}  |
R_FULL_STOP  {$op="cnr";}  |
DIGIT_ZEROR  {$op="c0r";}  |
LESS_THAN_SIGNRR  {$op="cltrr";}  |
SOLIDUS  {$op="cdiv";}  |
DIGIT_SEVEN  {$op="c7";}  |
DIGIT_EIGHT  {$op="c8";}  |
EXCLAMATION_MARK  {$op="cfa";}  |
Re  {$op="cre";}  |
Pi  {$op="cpi";}  |
VERTICAL_LINE_VERTICAL_LINE  {$op="cdivides";}  |
Numer  {$op="cnumer";}  |
PCnt  {$op="cpc";}  |
MonoAP  {$op="cvdwm";}  |
Scalar  {$op="csca";}  |
TopSet  {$op="cts";}  |
TopOpen  {$op="ctopn";}  |
QUOTATION_MARKs  {$op="cimas";}  |
Epi  {$op="cepi";}  |
CatCat  {$op="ccatc";}  |
DiagFunc  {$op="cdiag";}  |
Glb  {$op="cglb";}  |
Supw  {$op="cspw";}  |
PLUS_SIGNf  {$op="cplusf";}  |
PSyl  {$op="cslw";}  |
SOLIDUSr  {$op="cdvr";}  |
LIdeal  {$op="clidl";}  |
MPoly  {$op="cmpl";}  |
PwSerDIGIT_ONE  {$op="cps1";}  |
PolyDIGIT_ONE  {$op="cpl1";}  |
ToHL  {$op="cthl";}  |
Nei  {$op="cnei";}  |
KGen  {$op="ckgen";}  |
FBas  {$op="cfbas";}  |
FilGen  {$op="cfg";}  |
FilMap  {$op="cfm";}  |
FLim  {$op="cflim";}  |
FLimf  {$op="cflf";}  |
FClus  {$op="cfcls";}  |
TopMod  {$op="ctlm";}  |
ToNrmGrp  {$op="ctng";}  |
PiDIGIT_ONE  {$op="cpi1";}  |
PiN  {$op="cpin";}  |
CMod  {$op="cclm";}  |
CauFil  {$op="ccfil";}  |
Vol  {$op="cvol";}  |
QuotDIGIT_ONEp  {$op="cq1p";}  |
IdlGenDIGIT_ONEp  {$op="cig1p";}  |
Quot  {$op="cquot";}  |
Tayl  {$op="ctayl";}  |
Theta  {$op="ccht";}  |
Ass  {$op="cass";}  |
Fld  {$op="cfld";}  |
BaseSet  {$op="cba";}  |
NormCV  {$op="cnmcv";}  |
DIGIT_ZEROop  {$op="c0o";}  |
Normh  {$op="cno";}  |
Iop  {$op="chio";}  |
ConOp  {$op="ccop";}  |
States  {$op="cst";}  |
BrSiga  {$op="cbrsiga";}  |
EQUALS_SIGNg  {$op="cgoq";}  |
GF_LOW_LINEoo  {$op="cgfo";}  |
Zp  {$op="czp";}  |
Img  {$op="cimg";}  |
Domain  {$op="cdomain";}  |
SegLESS_THAN_SIGN_LOW_LINE  {$op="csegle";}  |
Ray  {$op="cray";}  |
Mxl  {$op="cmxl";}  |
SemiGrpHom  {$op="csmhom";}  |
Tofld  {$op="ctofld";}  |
FLimfrs  {$op="cflimfrs";}  |
TopFld  {$op="ctopfld";}  |
HYPHEN_MINUScv  {$op="cmcv";}  |
Der  {$op="cder";}  |
SumObj  {$op="csumo";}  |
RoSetCat  {$op="crocase";}  |
Notc  {$op="cnotc";}  |
Idl  {$op="cidl";}  |
MaxIdl  {$op="cmaxidl";}  |
LNoeM  {$op="clnm";}  |
LEFT_PARENTHESIS_PLUS_SIGN_RIGHT_PARENTHESISm  {$op="cdsmm";}  |
UnitVec  {$op="cuvc";}  |
MaMul  {$op="cmmul";}  |
ComplUSGrph  {$op="ccusgra";}  |
WalkOn  {$op="cwlkon";}  |
LKer  {$op="clk";}  |
LDual  {$op="cld";}  |
Cm  {$op="ccmtN";}  |
Atoms  {$op="catm";}  |
LOW_LINE_VERTICAL_LINE_LOW_LINEP  {$op="cpolN";}  |
LPol  {$op="clpoN";}  |
LEFT_PARENTHESIS_SOLIDUS_RIGHT_PARENTHESIS  {$op="c0";}  |
FULL_STOPo  {$op="comu";}  |
CIRCUMFLEX_ACCENTpm  {$op="cpm";}  |
TILDE_LESS_THAN_SIGN_LOW_LINE_ASTERISK  {$op="cwdom";}  |
Univ  {$op="cgru";}  |
N_FULL_STOP  {$op="cnpi";}  |
LESS_THAN_SIGNpQ  {$op="cltpq";}  |
ASTERISKQ  {$op="crq";}  |
HYPHEN_MINUS_DIGIT_ONER  {$op="cm1r";}  |
LOW_LINEi  {$op="ci";}  |
HYPHEN_MINUS  {$op="cmin";}  |
NN_DIGIT_ZERO  {$op="cn0";}  |
QQ  {$op="cq";}  |
LEFT_PARENTHESIS_COMMA_RIGHT_PARENTHESIS  {$op="cioo";}  |
LEFT_SQUARE_BRACKET_COMMA_RIGHT_SQUARE_BRACKET  {$op="cicc";}  |
FULL_STOP_FULL_STOP_CIRCUMFLEX_ACCENT  {$op="cfzo";}  |
Sqr  {$op="csqr";}  |
Bits  {$op="cbits";}  |
Ndx  {$op="cnx";}  |
Oc  {$op="coc";}  |
Hom  {$op="chom";}  |
OrdTop  {$op="cordt";}  |
Moore  {$op="cmre";}  |
IdA  {$op="cida";}  |
HomF  {$op="chof";}  |
SubMnd  {$op="csubmnd";}  |
VarFMnd  {$op="cvrmd";}  |
SymGrp  {$op="csymg";}  |
Ring  {$op="crg";}  |
OppR  {$op="coppr";}  |
Unit  {$op="cui";}  |
ASTERISKrf  {$op="cstf";}  |
ASTERISKRing  {$op="csr";}  |
TILDE_EQUALS_SIGNm  {$op="clmic";}  |
EvalDIGIT_ONE  {$op="ce1";}  |
CoeDIGIT_ONE  {$op="cco1";}  |
ASTERISKMet  {$op="cxmt";}  |
Ball  {$op="cbl";}  |
Z_SOLIDUSnZ  {$op="czn";}  |
TopSpOLD  {$op="ctpsOLD";}  |
TopSp  {$op="ctps";}  |
Con  {$op="ccon";}  |
DIGIT_TWOndc  {$op="c2ndc";}  |
TopRing  {$op="ctrg";}  |
MetSp  {$op="cmt";}  |
NrmRing  {$op="cnrg";}  |
PHtpy  {$op="cphtpy";}  |
VolASTERISK  {$op="covol";}  |
Ana  {$op="cana";}  |
RPrime  {$op="crpm";}  |
TASTERISK  {$op="crtcl";}  |
Magma  {$op="cmagm";}  |
BLnOp  {$op="cblo";}  |
PLUS_SIGNh  {$op="cva";}  |
TILDE_TILDE_GREATER_THAN_SIGNv  {$op="chli";}  |
CH  {$op="cch";}  |
LOW_LINE_VERTICAL_LINE_LOW_LINE  {$op="cort";}  |
Span  {$op="cspn";}  |
DIGIT_ZEROH  {$op="c0h";}  |
Normop  {$op="cnop";}  |
MH  {$op="cmd";}  |
Logb  {$op="clogb";}  |
SigaGen  {$op="csigagen";}  |
SCon  {$op="cscon";}  |
EFULL_STOPg  {$op="cgoe";}  |
Fmla  {$op="cfmla";}  |
GF  {$op="cgf";}  |
CIRCUMFLEX_ACCENTr  {$op="crelexp";}  |
Colinear  {$op="ccolin";}  |
OutsideOf  {$op="coutsideof";}  |
Pr  {$op="cpro";}  |
OrHom  {$op="corhom";}  |
SubSemiGrp  {$op="csubsmg";}  |
IdlNEW  {$op="cidln";}  |
RVec  {$op="cvr";}  |
PLUS_SIGNm  {$op="cmmat";}  |
DomLOW_LINE  {$op="cdom_";}  |
CodLOW_LINE  {$op="ccod_";}  |
T_r  {$op="ctr";}  |
GraphSetCat  {$op="cgraphcase";}  |
IndCls  {$op="clincl";}  |
HYPHEN_MINUS_FULL_STOPc  {$op="cnots";}  |
Trcng  {$op="ctrcng";}  |
RngHom  {$op="crnghom";}  |
PolyLESS_THAN_SIGN  {$op="cplylt";}  |
PmTrsp  {$op="cpmtr";}  |
Cycles  {$op="ccycl";}  |
Tanh  {$op="ctanh";}  |
OML  {$op="coml";}  |
LLines  {$op="clln";}  |
LHyp  {$op="clh";}  |
OcA  {$op="cocaN";}  |
DIsoC  {$op="cdic";}  |
HVMap  {$op="chvm";}  |
LOW_LINEV  {$op="cvv";}  |
LOW_LINEI  {$op="cid";}  |
PLUS_SIGNo  {$op="coa";}  |
CNF  {$op="ccnf";}  |
Rank  {$op="crnk";}  |
FinDIGIT_THREE  {$op="cfin3";}  |
FULL_STOPP_FULL_STOP  {$op="cmp";}  |
TILDER  {$op="cer";}  |
FULL_STOPR  {$op="cmr";}  |
PLUS_SIGN  {$op="caddc";}  |
DIGIT_FIVE  {$op="c5";}  |
DIGIT_ONE_DIGIT_ZERO  {$op="c10";}  |
LOW_LINEe  {$op="ceu";}  |
FULL_STOPr  {$op="cmulr";}  |
XsLOW_LINE  {$op="cprds";}  |
Gsum  {$op="cgsu";}  |
CodA  {$op="ccoda";}  |
DIGIT_TWOndF  {$op="c2ndf";}  |
DLat  {$op="cdlat";}  |
TILDEQG  {$op="cqg";}  |
ProjDIGIT_ONE  {$op="cpj1";}  |
Field  {$op="cfield";}  |
FULL_STOPsf  {$op="cscaf";}  |
AssAlg  {$op="casa";}  |
ToPolyDIGIT_ONE  {$op="ctp1";}  |
ZRHom  {$op="czrh";}  |
Ocv  {$op="cocv";}  |
Fre  {$op="ct1";}  |
Haus  {$op="cha";}  |
CNrm  {$op="ccnrm";}  |
Fil  {$op="cfil";}  |
OmDIGIT_ONE  {$op="comi";}  |
MblFn  {$op="cmbf";}  |
S_FULL_STOP_DIGIT_ONE  {$op="citg1";}  |
MonicDIGIT_ONEp  {$op="cmn1";}  |
RemDIGIT_ONEp  {$op="cr1p";}  |
CIRCUMFLEX_ACCENTc  {$op="ccxp";}  |
Gamma  {$op="cem";}  |
Psi  {$op="cchp";}  |
DChr  {$op="cdchr";}  |
NrmCVec  {$op="cnv";}  |
NormOpOLD  {$op="cnmoo";}  |
C_LOW_LINEH  {$op="ccm";}  |
Projh  {$op="cpjh";}  |
HrmOp  {$op="cho";}  |
Null  {$op="cnl";}  |
ConFn  {$op="ccnfn";}  |
SOLIDUSe  {$op="cxdiv";}  |
Prob  {$op="cprb";}  |
Cprob  {$op="ccprob";}  |
CplMetSp  {$op="ccpms";}  |
LOW_LINEQp  {$op="cqpa";}  |
LinesEE  {$op="clines2";}  |
OrIso  {$op="coriso";}  |
Mnl  {$op="cmnl";}  |
Xm  {$op="cxmat";}  |
UnifSp  {$op="cunifsp";}  |
Frf  {$op="cfrf";}  |
Intvl  {$op="cintvl";}  |
Pc  {$op="cPc";}  |
Triangle  {$op="ctriangle";}  |
Ss  {$op="csas";}  |
Segc  {$op="csegc";}  |
IdlGen  {$op="cigen";}  |
PellDIGIT_ONE_DIGIT_TWO_DIGIT_THREE_DIGIT_FOURQR  {$op="cpell1234qr";}  |
PellFund  {$op="cpellfund";}  |
PLUS_SIGNr  {$op="cplusr";}  |
USGrph  {$op="cusg";}  |
GREATER_THAN_SIGN_LOW_LINE  {$op="cge-real";}  |
LVols  {$op="clvol";}  |
PLUS_SIGNP  {$op="cpadd";}  |
PSubCl  {$op="cpscN";}  |
LDil  {$op="cldil";}  |
DIsoA  {$op="cdia";}  |
HLHil  {$op="chlh";}  |
DIGIT_ONEo  {$op="c1o";}  |
FinDIGIT_ONEa  {$op="cfin1a";}  |
FinDIGIT_FOUR  {$op="cfin4";}  |
FinDIGIT_FIVE  {$op="cfin5";}  |
InaccW  {$op="cwina";}  |
Inacc  {$op="cina";}  |
WUniCl  {$op="cwunm";}  |
DIGIT_ONEP  {$op="c1p";}  |
DIGIT_FOUR  {$op="c4";}  |
ZZ  {$op="cz";}  |
PLUS_SIGNe  {$op="cxad";}  |
LEFT_PARENTHESIS_COMMA_RIGHT_SQUARE_BRACKET  {$op="cioc";}  |
NUMBER_SIGN  {$op="chash";}  |
Im  {$op="cim";}  |
Abs  {$op="cabs";}  |
Limsup  {$op="clsp";}  |
Smul  {$op="csmu";}  |
Denom  {$op="cdenom";}  |
PolyAP  {$op="cvdwp";}  |
Unif  {$op="cunif";}  |
TopGen  {$op="ctg";}  |
Cat  {$op="ccat";}  |
C_LOW_LINEcat  {$op="cssc";}  |
DIGIT_ONEstF  {$op="c1stf";}  |
EvalF  {$op="cevlf";}  |
DIGIT_ZERO_FULL_STOP  {$op="cp0";}  |
Mnd  {$op="cmnd";}  |
TILDE_EQUALS_SIGNg  {$op="cgic";}  |
Od  {$op="cod";}  |
Irred  {$op="cir";}  |
LMod  {$op="clmod";}  |
LBasis  {$op="clbs";}  |
DIGIT_TWOIdeal  {$op="c2idl";}  |
LPIdeal  {$op="clpidl";}  |
RLReg  {$op="crlreg";}  |
AlgSpan  {$op="casp";}  |
AlgInd  {$op="cai";}  |
VarDIGIT_ONE  {$op="cv1";}  |
Met  {$op="cme";}  |
FULL_STOPif  {$op="cipf";}  |
LimPt  {$op="clp";}  |
Cn  {$op="ccn";}  |
Comp  {$op="ccmp";}  |
NrmMod  {$op="cnlm";}  |
II  {$op="cii";}  |
TILDE_EQUALS_SIGNph  {$op="cphtpc";}  |
CPreHil  {$op="ccph";}  |
LOW_LINED  {$op="cdv";}  |
Mmu  {$op="cmu";}  |
GrpOpIso  {$op="cgiso";}  |
PLUS_SIGNv  {$op="cpv";}  |
FULL_STOPiOLD  {$op="cdip";}  |
CHilOLD  {$op="chlo";}  |
FULL_STOPih  {$op="csp";}  |
DIGIT_ZEROhop  {$op="ch0o";}  |
UniOp  {$op="cuo";}  |
Ketbra  {$op="ck";}  |
SX  {$op="csx";}  |
MblFnM  {$op="cmbfm";}  |
IblFnM  {$op="cibfm";}  |
CovMap  {$op="ccvm";}  |
Sat  {$op="csat";}  |
SOLIDUS_REVERSE_SOLIDUSg  {$op="cgoa";}  |
PolyFld  {$op="cpfl";}  |
TILDEQp  {$op="ceqp";}  |
Cp  {$op="ccp";}  |
Funs  {$op="cfuns";}  |
Cap  {$op="ccap";}  |
Cgr  {$op="ccgr";}  |
BernPoly  {$op="cbp";}  |
Ge  {$op="cge";}  |
HYPHEN_MINUSucv  {$op="cnegcv";}  |
FULL_STOPcv  {$op="csmcv";}  |
H_om  {$op="chomOLD";}  |
Epic  {$op="cepiOLD";}  |
MonoOLD  {$op="cmonOLD";}  |
SubCat  {$op="csubcat";}  |
DomSetCat  {$op="cdomcase";}  |
Ig  {$op="cig";}  |
Seg  {$op="cseg";}  |
Circle  {$op="ccircle";}  |
Fne  {$op="cfne";}  |
Bnd  {$op="cbnd";}  |
CRingOps  {$op="ccring";}  |
PellDIGIT_ONE_DIGIT_FOURQR  {$op="cpell14qr";}  |
RmX  {$op="crmx";}  |
LNoeR  {$op="clnr";}  |
Mat  {$op="cmat";}  |
FULL_STOPv  {$op="ctimesr";}  |
RR_DIGIT_THREE  {$op="crr3c";}  |
UnivVertex  {$op="cuvtx";}  |
FriendGrph  {$op="cfrgra";}  |
OL  {$op="col";}  |
Dil  {$op="cdilN";}  |
JoinH  {$op="cdjh";}  |
DIGIT_ONEst  {$op="c1st";}  |
CIRCUMFLEX_ACCENTm  {$op="cmap";}  |
Fin  {$op="cfn";}  |
FinDIGIT_SEVEN  {$op="cfin7";}  |
GCH  {$op="cgch";}  |
FULL_STOPN  {$op="cmi";}  |
TILDEQ  {$op="ceq";}  |
PLUS_SIGNpR  {$op="cplpr";}  |
RR_ASTERISK  {$op="cxr";}  |
LESS_THAN_SIGN_LOW_LINE  {$op="cle";}  |
NN  {$op="cn";}  |
ZZ_GREATER_THAN_SIGN_EQUALS_SIGN  {$op="cuz";}  |
ASTERISKe  {$op="cxmu";}  |
LOW_LINEC  {$op="cbc";}  |
Shift  {$op="cshi";}  |
ASTERISK  {$op="ccj";}  |
TILDE_TILDE_GREATER_THAN_SIGN  {$op="cli";}  |
Prime  {$op="cprime";}  |
AP  {$op="cvdwa";}  |
Ramsey  {$op="cram";}  |
Base  {$op="cbs";}  |
PLUS_SIGNg  {$op="cplusg";}  |
XtLOW_LINE  {$op="cpt";}  |
SOLIDUSs  {$op="cqus";}  |
Homf  {$op="chomf";}  |
Sect  {$op="csect";}  |
VERTICAL_LINE_GRAVE_ACCENTcat  {$op="cresc";}  |
Full  {$op="cful";}  |
Faith  {$op="cfth";}  |
DomA  {$op="cdoma";}  |
CurryF  {$op="ccurf";}  |
DirRel  {$op="cdir";}  |
Invg  {$op="cminusg";}  |
FULL_STOPg  {$op="cmg";}  |
FreeMnd  {$op="cfrmd";}  |
GrpHom  {$op="cghm";}  |
GEx  {$op="cgex";}  |
TILDEFG  {$op="cefg";}  |
VERTICAL_LINE_VERTICAL_LINEr  {$op="cdsr";}  |
Invr  {$op="cinvr";}  |
RingHom  {$op="crh";}  |
LSubSp  {$op="clss";}  |
NzRing  {$op="cnzr";}  |
IDomn  {$op="cidom";}  |
MVar  {$op="cmvr";}  |
MHomP  {$op="cmhp";}  |
CSubSp  {$op="ccss";}  |
Perf  {$op="cperf";}  |
Reg  {$op="creg";}  |
CIRCUMFLEX_ACCENTko  {$op="cxko";}  |
NrmGrp  {$op="cngp";}  |
NormOp  {$op="cnmo";}  |
NMHom  {$op="cnmhm";}  |
L_CIRCUMFLEX_ACCENT_DIGIT_ONE  {$op="cibl";}  |
Poly  {$op="cply";}  |
Arcsin  {$op="casin";}  |
SubGrpOp  {$op="csubgo";}  |
Adj  {$op="caj";}  |
CPreHilOLD  {$op="ccphlo";}  |
HYPHEN_MINUSh  {$op="cmv";}  |
SH  {$op="csh";}  |
REVERSE_SOLIDUS_SOLIDUSH  {$op="chsup";}  |
PLUS_SIGNfn  {$op="chfs";}  |
LOW_LINEInd  {$op="cind";}  |
Zeta  {$op="czeta";}  |
LOW_LINEG  {$op="cgam";}  |
HomLim  {$op="chlim";}  |
Bday  {$op="cbday";}  |
SSet  {$op="csset";}  |
Range  {$op="crange";}  |
Succ  {$op="csuccf";}  |
Hf  {$op="chf";}  |
Cset  {$op="ccst";}  |
LatAlg  {$op="clatalg";}  |
TopX  {$op="ctopx";}  |
Opfn  {$op="copfn";}  |
SOLIDUScv  {$op="cdivcv";}  |
OLOW_LINE  {$op="co_";}  |
Isofunc  {$op="cifunc";}  |
LimCat  {$op="clmct";}  |
Kleene  {$op="ckln";}  |
Conc  {$op="cconc";}  |
REVERSE_SOLIDUS_SOLIDUSc  {$op="clors";}  |
LESS_THAN_SIGN_EQUALS_SIGN_GREATER_THAN_SIGNc  {$op="cbis";}  |
L_ine  {$op="cline";}  |
Btw  {$op="cbtw";}  |
R_ay  {$op="cray2";}  |
Ref  {$op="cref";}  |
PrIdl  {$op="cpridl";}  |
LIndF  {$op="clindf";}  |
MaAdju  {$op="cmadu";}  |
MEndo  {$op="cmend";}  |
SubDRing  {$op="csdrg";}  |
TopLnd  {$op="ctoplnd";}  |
USLGrph  {$op="cuslg";}  |
Csc  {$op="ccsc";}  |
Sgn  {$op="csgn";}  |
LSAtoms  {$op="clsa";}  |
LESS_THAN_SIGNoL  {$op="clcv";}  |
AtLat  {$op="cal";}  |
TEndo  {$op="ctendo";}  |
DVecA  {$op="cdveca";}  |
OcH  {$op="coch";}  |
HDMapDIGIT_ONE  {$op="chdma1";}  |
HDMap  {$op="chdma";}  |
On  {$op="con0";}  |
FinDIGIT_TWO  {$op="cfin2";}  |
FinDIGIT_SIX  {$op="cfin6";}  |
Tarski  {$op="ctsk";}  |
PLUS_SIGNpQ  {$op="cplpq";}  |
PLUS_SIGNQ  {$op="cplq";}  |
P_FULL_STOP  {$op="cnp";}  |
PLUS_SIGNP_FULL_STOP  {$op="cpp";}  |
LESS_THAN_SIGNR  {$op="cltr";}  |
RR  {$op="cr";}  |
XFULL_STOP  {$op="cmul";}  |
DIGIT_THREE  {$op="c3";}  |
RR_PLUS_SIGN  {$op="crp";}  |
VERTICAL_LINE_LOW_LINE  {$op="cfl";}  |
CIRCUMFLEX_ACCENT  {$op="cexp";}  |
LESS_THAN_SIGN_LOW_LINEO_LEFT_PARENTHESIS_DIGIT_ONE_RIGHT_PARENTHESIS  {$op="clo1";}  |
Phi  {$op="cphi";}  |
S_Set  {$op="csts";}  |
Le  {$op="cple";}  |
CIRCUMFLEX_ACCENTs  {$op="cpws";}  |
Mono  {$op="cmon";}  |
Subcat  {$op="csubc";}  |
OFULL_STOPfunc  {$op="ccofu";}  |
Lt  {$op="cplt";}  |
Join  {$op="cjn";}  |
DIGIT_ONE_FULL_STOP  {$op="cp1";}  |
MndHom  {$op="cmhm";}  |
SubGrp  {$op="csubg";}  |
NrmSGrp  {$op="cnsg";}  |
Cntz  {$op="ccntz";}  |
CMnd  {$op="ccmn";}  |
Abel  {$op="cabel";}  |
LMIso  {$op="clmim";}  |
SubringAlg  {$op="csra";}  |
RSpan  {$op="crsp";}  |
LPIR  {$op="clpir";}  |
Domn  {$op="cdomn";}  |
AlgSc  {$op="cascl";}  |
MPwSer  {$op="cmps";}  |
MPSDer  {$op="cpsd";}  |
LESS_THAN_SIGNbag  {$op="cltb";}  |
CCfld  {$op="ccnfld";}  |
TopOn  {$op="ctopon";}  |
PNrm  {$op="cpnrm";}  |
DIGIT_ONEstc  {$op="c1stc";}  |
Htpy  {$op="chtpy";}  |
CMet  {$op="cms";}  |
S_FULL_STOP_DIGIT_TWO  {$op="citg2";}  |
Dn  {$op="cdvn";}  |
UnicDIGIT_ONEp  {$op="cuc1p";}  |
Xp  {$op="cidp";}  |
Arccos  {$op="cacos";}  |
Ppi  {$op="cppi";}  |
Sigma  {$op="csgm";}  |
GrpOpHom  {$op="cghom";}  |
DIGIT_ZEROvec  {$op="cn0v";}  |
HYPHEN_MINUSv  {$op="cnsb";}  |
FULL_STOPh  {$op="csm";}  |
VH  {$op="chj";}  |
HYPHEN_MINUSop  {$op="chod";}  |
BndLinOp  {$op="cbo";}  |
Eigvec  {$op="cei";}  |
Lambda  {$op="cspc";}  |
LESS_THAN_SIGNoH  {$op="ccv";}  |
HAtoms  {$op="cat";}  |
MH_ASTERISK  {$op="cdmd";}  |
VERTICAL_LINEg  {$op="cgna";}  |
SatE  {$op="csate";}  |
VERTICAL_LINE_EQUALS_SIGN  {$op="cprv";}  |
AxInf  {$op="cgzi";}  |
ZF  {$op="cgzf";}  |
IntgRing  {$op="citr";}  |
SOLIDUSQp  {$op="crqp";}  |
LESS_THAN_SIGNs  {$op="cslt";}  |
Trans  {$op="ctrans";}  |
Singletons  {$op="csingles";}  |
Cart  {$op="ccart";}  |
EE  {$op="cee";}  |
PLUS_SIGNcv  {$op="cplcv";}  |
CatOLD  {$op="ccatOLD";}  |
IsoOLD  {$op="cisoOLD";}  |
Source  {$op="csrce";}  |
Sink  {$op="csnk";}  |
MorphismSetCat  {$op="ccmrcase";}  |
EQUALS_SIGN_GREATER_THAN_SIGNc  {$op="cimps";}  |
Andc  {$op="candc";}  |
Impc  {$op="cimpc";}  |
Convex  {$op="cconvex";}  |
Ibcg  {$op="cibcg";}  |
Angtrg  {$op="cangtrg";}  |
PtFin  {$op="cptfin";}  |
RngIso  {$op="crngiso";}  |
PellDIGIT_ONEQR  {$op="cpell1qr";}  |
LdgIdlSeq  {$op="cldgis";}  |
DegAA  {$op="cdgraa";}  |
LOW_LINEZZ  {$op="cza";}  |
HYPHEN_MINUSr  {$op="cminusr";}  |
TrailOn  {$op="ctrlon";}  |
Sec  {$op="csec";}  |
Cot  {$op="ccot";}  |
OP  {$op="cops";}  |
Points  {$op="cpointsN";}  |
PSubSp  {$op="cpsubsp";}  |
PCl  {$op="cpclN";}  |
WAtoms  {$op="cwpointsN";}  |
PAut  {$op="cpautN";}  |
TrL  {$op="ctrl";}  |
DIsoB  {$op="cdib";}  ;


cC3 locals[String op] : LESS_THAN_SIGN_QUOTATION_MARK c c c QUOTATION_MARK_GREATER_THAN_SIGN  {$op="cs3";}  |
TrPred LEFT_PARENTHESIS c COMMA c COMMA c RIGHT_PARENTHESIS  {$op="ctrpred";}  |
LESS_THAN_SIGN_FULL_STOP c COMMA c COMMA c GREATER_THAN_SIGN_FULL_STOP  {$op="cotp";}  |
S_FULL_STOPm c c LOW_LINEd c  {$op="citgm";}  |
LEFT_PARENTHESIS_LEFT_PARENTHESIS c c c RIGHT_PARENTHESIS_RIGHT_PARENTHESIS  {$op="caov";}  |
LOW_LINEpred LEFT_PARENTHESIS c COMMA c COMMA c RIGHT_PARENTHESIS  {$op="c-bnj14";}  |
Pred LEFT_PARENTHESIS c COMMA c COMMA c RIGHT_PARENTHESIS  {$op="cpred";}  |
LEFT_PARENTHESIS c c c RIGHT_PARENTHESIS  {$op="co";}  |
LOW_LINEtrCl LEFT_PARENTHESIS c COMMA c COMMA c RIGHT_PARENTHESIS  {$op="c-bnj18";}  |
Sup LEFT_PARENTHESIS c COMMA c COMMA c RIGHT_PARENTHESIS  {$op="csup";}  |
Seq c LEFT_PARENTHESIS c COMMA c RIGHT_PARENTHESIS  {$op="cseq";}  |
LEFT_CURLY_BRACKET c COMMA c COMMA c RIGHT_CURLY_BRACKET  {$op="ctp";}  ;


cC7 locals[String op] : LESS_THAN_SIGN_QUOTATION_MARK c c c c c c c QUOTATION_MARK_GREATER_THAN_SIGN  {$op="cs7";}  ;


cSC2 locals[String op] : InfLOW_LINE s EFULL_STOP c c  {$op="clinfp";}  |
X_LOW_LINE s EFULL_STOP c c  {$op="cixp";}  |
LEFT_PARENTHESIS s EFULL_STOP c VERTICAL_LINE_HYPHEN_MINUS_GREATER_THAN_SIGN c RIGHT_PARENTHESIS  {$op="cmpt";}  |
SumASTERISK s EFULL_STOP c c  {$op="cesum";}  |
SupLOW_LINE s EFULL_STOP c c  {$op="clsupp";}  |
VERTICAL_LINE_CIRCUMFLEX_ACCENT_VERTICAL_LINE_LOW_LINE s EFULL_STOP c c  {$op="ciin";}  |
SumLOW_LINE s EFULL_STOP c c  {$op="csu";}  |
U_LOW_LINE s EFULL_STOP c c  {$op="ciun";}  |
ProdLOW_LINE s EFULL_STOP c c  {$op="cprod";}  ;


wW12 locals[String op] : LEFT_PARENTHESIS_FULL_STOP w COMMA_FULL_STOP w COMMA_FULL_STOP w COMMA_FULL_STOP w COMMA_FULL_STOP w COMMA_FULL_STOP w COMMA_FULL_STOP w COMMA_FULL_STOP w COMMA_FULL_STOP w COMMA_FULL_STOP w COMMA_FULL_STOP w COMMA_FULL_STOP w RIGHT_PARENTHESIS_FULL_STOP  {$op="wvhc12";}  ;


w0 locals[String op] : F_FULL_STOP  {$op="wfal";}  |
CHOICE  {$op="wac";}  |
T_FULL_STOP  {$op="wtru";}  ;


wW3 locals[String op] : Hadd LEFT_PARENTHESIS w COMMA w COMMA w RIGHT_PARENTHESIS  {$op="whad";}  |
LEFT_PARENTHESIS w REVERSE_SOLIDUS_SOLIDUS w REVERSE_SOLIDUS_SOLIDUS w RIGHT_PARENTHESIS  {$op="w3o";}  |
LEFT_PARENTHESIS w SOLIDUS_REVERSE_SOLIDUS w SOLIDUS_REVERSE_SOLIDUS w RIGHT_PARENTHESIS  {$op="w3a";}  |
LEFT_PARENTHESIS_FULL_STOP w COMMA_FULL_STOP w COMMA_FULL_STOP w RIGHT_PARENTHESIS_FULL_STOP  {$op="wvhc3";}  |
LEFT_PARENTHESIS w HYPHEN_MINUS_SOLIDUS_REVERSE_SOLIDUS w HYPHEN_MINUS_SOLIDUS_REVERSE_SOLIDUS w RIGHT_PARENTHESIS  {$op="w3nand";}  |
Cadd LEFT_PARENTHESIS w COMMA w COMMA w RIGHT_PARENTHESIS  {$op="wcad";}  |
LEFT_PARENTHESIS_FULL_STOP w COMMA_FULL_STOP w HYPHEN_MINUS_GREATER_THAN_SIGN_FULL_STOP w RIGHT_PARENTHESIS_FULL_STOP  {$op="wvd2";}  ;


wS2 locals[String op] : s EFULL_STOP s  {$op="wel";}  |
s EQUALS_SIGN s  {$op="weq";}  ;


cS3W locals[String op] : LEFT_CURLY_BRACKET LESS_THAN_SIGN_FULL_STOP LESS_THAN_SIGN_FULL_STOP s COMMA s GREATER_THAN_SIGN_FULL_STOP COMMA s GREATER_THAN_SIGN_FULL_STOP VERTICAL_LINE w RIGHT_CURLY_BRACKET  {$op="coprab";}  ;


cSCSC2 locals[String op] : LEFT_PARENTHESIS s EFULL_STOP c COMMA s EFULL_STOP c VERTICAL_LINE_HYPHEN_MINUS_GREATER_THAN_SIGN c RIGHT_PARENTHESIS  {$op="cmpt2";}  ;


cC4 locals[String op] : LESS_THAN_SIGN_QUOTATION_MARK c c c c QUOTATION_MARK_GREATER_THAN_SIGN  {$op="cs4";}  ;


cC5 locals[String op] : LESS_THAN_SIGN_QUOTATION_MARK c c c c c QUOTATION_MARK_GREATER_THAN_SIGN  {$op="cs5";}  ;


wW4 locals[String op] : LEFT_PARENTHESIS_FULL_STOP w COMMA_FULL_STOP w COMMA_FULL_STOP w HYPHEN_MINUS_GREATER_THAN_SIGN_FULL_STOP w RIGHT_PARENTHESIS_FULL_STOP  {$op="wvd3";}  |
LEFT_PARENTHESIS_FULL_STOP w COMMA_FULL_STOP w COMMA_FULL_STOP w COMMA_FULL_STOP w RIGHT_PARENTHESIS_FULL_STOP  {$op="wvhc4";}  |
LEFT_PARENTHESIS w SOLIDUS_REVERSE_SOLIDUS w SOLIDUS_REVERSE_SOLIDUS w SOLIDUS_REVERSE_SOLIDUS w RIGHT_PARENTHESIS  {$op="w-bnj17";}  ;


wW6 locals[String op] : LEFT_PARENTHESIS_FULL_STOP w COMMA_FULL_STOP w COMMA_FULL_STOP w COMMA_FULL_STOP w COMMA_FULL_STOP w COMMA_FULL_STOP w RIGHT_PARENTHESIS_FULL_STOP  {$op="wvhc6";}  ;


wW9 locals[String op] : LEFT_PARENTHESIS_FULL_STOP w COMMA_FULL_STOP w COMMA_FULL_STOP w COMMA_FULL_STOP w COMMA_FULL_STOP w COMMA_FULL_STOP w COMMA_FULL_STOP w COMMA_FULL_STOP w COMMA_FULL_STOP w RIGHT_PARENTHESIS_FULL_STOP  {$op="wvhc9";}  ;


wSCW locals[String op] : E_ASTERISK s EFULL_STOP c w  {$op="wrmo";}  |
E_EXCLAMATION_MARK s EFULL_STOP c w  {$op="wreu";}  |
A_FULL_STOP s EFULL_STOP c w  {$op="wral";}  |
E_FULL_STOP s EFULL_STOP c w  {$op="wrex";}  ;


wS2W locals[String op] : LEFT_SQUARE_BRACKET s SOLIDUS s RIGHT_SQUARE_BRACKET w  {$op="wsb";}  |
CondEq LEFT_PARENTHESIS s EQUALS_SIGN s HYPHEN_MINUS_GREATER_THAN_SIGN w RIGHT_PARENTHESIS  {$op="wcdeq";}  ;


cSCW locals[String op] : LEFT_PARENTHESIS IotaLOW_LINE s EFULL_STOP c w RIGHT_PARENTHESIS  {$op="crio";}  |
LEFT_CURLY_BRACKET s EFULL_STOP c VERTICAL_LINE w RIGHT_CURLY_BRACKET  {$op="crab";}  ;


cSC3 locals[String op] : ProdDIGIT_THREE_LOW_LINE s EFULL_STOP c c c  {$op="cgprd3";}  |
ProdDIGIT_TWO_LOW_LINE s EFULL_STOP c c c  {$op="cgprd2";}  |
GprodLOW_LINE s EFULL_STOP c c c  {$op="cgprd";}  ;


wW8 locals[String op] : LEFT_PARENTHESIS_FULL_STOP w COMMA_FULL_STOP w COMMA_FULL_STOP w COMMA_FULL_STOP w COMMA_FULL_STOP w COMMA_FULL_STOP w COMMA_FULL_STOP w COMMA_FULL_STOP w RIGHT_PARENTHESIS_FULL_STOP  {$op="wvhc8";}  ;


wSC locals[String op] : F_SOLIDUS_LOW_LINE s c  {$op="wnfc";}  ;


wCSW locals[String op] : LEFT_SQUARE_BRACKET_FULL_STOP c SOLIDUS s RIGHT_SQUARE_BRACKET_FULL_STOP w  {$op="wsbc";}  ;


wC3 locals[String op] : c COLON c HYPHEN_MINUSontoHYPHEN_MINUS_GREATER_THAN_SIGN c  {$op="wfo";}  |
c COLON c HYPHEN_MINUS_DIGIT_ONE_HYPHEN_MINUS_DIGIT_ONE_HYPHEN_MINUS_GREATER_THAN_SIGN c  {$op="wf1";}  |
c COLON c HYPHEN_MINUS_DIGIT_ONE_HYPHEN_MINUS_DIGIT_ONE_HYPHEN_MINUSontoHYPHEN_MINUS_GREATER_THAN_SIGN c  {$op="wf1o";}  |
c COLON c HYPHEN_MINUS_HYPHEN_MINUS_GREATER_THAN_SIGN c  {$op="wf";}  |
c c c  {$op="wbr";}  |
LOW_LINETrFo LEFT_PARENTHESIS c COMMA c COMMA c RIGHT_PARENTHESIS  {$op="w-bnj19";}  ;


wW5 locals[String op] : LEFT_PARENTHESIS_FULL_STOP w COMMA_FULL_STOP w COMMA_FULL_STOP w COMMA_FULL_STOP w COMMA_FULL_STOP w RIGHT_PARENTHESIS_FULL_STOP  {$op="wvhc5";}  ;


wW10 locals[String op] : LEFT_PARENTHESIS_FULL_STOP w COMMA_FULL_STOP w COMMA_FULL_STOP w COMMA_FULL_STOP w COMMA_FULL_STOP w COMMA_FULL_STOP w COMMA_FULL_STOP w COMMA_FULL_STOP w COMMA_FULL_STOP w COMMA_FULL_STOP w RIGHT_PARENTHESIS_FULL_STOP  {$op="wvhc10";}  ;


cC8 locals[String op] : LESS_THAN_SIGN_QUOTATION_MARK c c c c c c c c QUOTATION_MARK_GREATER_THAN_SIGN  {$op="cs8";}  ;


wW11 locals[String op] : LEFT_PARENTHESIS_FULL_STOP w COMMA_FULL_STOP w COMMA_FULL_STOP w COMMA_FULL_STOP w COMMA_FULL_STOP w COMMA_FULL_STOP w COMMA_FULL_STOP w COMMA_FULL_STOP w COMMA_FULL_STOP w COMMA_FULL_STOP w COMMA_FULL_STOP w RIGHT_PARENTHESIS_FULL_STOP  {$op="wvhc11";}  ;


cCSC locals[String op] : LEFT_SQUARE_BRACKET_LOW_LINE c SOLIDUS s RIGHT_SQUARE_BRACKET_LOW_LINE c  {$op="csb";}  ;


cSW locals[String op] : LEFT_PARENTHESIS Iota s w RIGHT_PARENTHESIS  {$op="cio";}  |
LEFT_CURLY_BRACKET s VERTICAL_LINE w RIGHT_CURLY_BRACKET  {$op="cab";}  ;


cC3S locals[String op] : S_LOW_LINE LEFT_SQUARE_BRACKET c HYPHEN_MINUS_GREATER_THAN_SIGN c RIGHT_SQUARE_BRACKET c LOW_LINEd s  {$op="cdit";}  ;


cWC2 locals[String op] : If LEFT_PARENTHESIS w COMMA c COMMA c RIGHT_PARENTHESIS  {$op="cif";}  ;


cS2W locals[String op] : LEFT_CURLY_BRACKET LESS_THAN_SIGN_FULL_STOP s COMMA s GREATER_THAN_SIGN_FULL_STOP VERTICAL_LINE w RIGHT_CURLY_BRACKET  {$op="copab";}  ;


cC2S locals[String op] : S_FULL_STOP c c LOW_LINEd s  {$op="citg";}  ;


cS locals[String op] : s  {$op="cv";}  ;


wC5 locals[String op] : c Isom c COMMA c LEFT_PARENTHESIS c COMMA c RIGHT_PARENTHESIS  {$op="wiso";}  ;


cC6 locals[String op] : LESS_THAN_SIGN_QUOTATION_MARK c c c c c c QUOTATION_MARK_GREATER_THAN_SIGN  {$op="cs6";}  ;


wW7 locals[String op] : LEFT_PARENTHESIS_FULL_STOP w COMMA_FULL_STOP w COMMA_FULL_STOP w COMMA_FULL_STOP w COMMA_FULL_STOP w COMMA_FULL_STOP w COMMA_FULL_STOP w RIGHT_PARENTHESIS_FULL_STOP  {$op="wvhc7";}  ;




classHolder : VARIABLE_CLASS;
setHolder:VARIABLE_SET;
wffHolder:VARIABLE_WFF;


 /* Lexer rules */

VARIABLE_SET : '∨' [0-9]+ 's ';
VARIABLE_WFF : '∨' [0-9]+ 'w ' ;
VARIABLE_CLASS : '∨' [0-9]+ 'c ' ;

AA : 'AA ' ;
ACS : 'ACS ' ;
AC_LOW_LINE : 'AC_ ' ;
AP : 'AP ' ;
APOSTROPHE_APOSTROPHE_APOSTROPHE : '\'\'\' ' ;
ASTERISK : '* ' ;
ASTERISKMet : '*Met ' ;
ASTERISKMetSp : '*MetSp ' ;
ASTERISKQ : '*Q ' ;
ASTERISKRing : '*Ring ' ;
ASTERISK_HYPHEN_MINUSFld : '*-Fld ' ;
ASTERISKe : '*e ' ;
ASTERISKp : '*p ' ;
ASTERISKr : '*r ' ;
ASTERISKrf : '*rf ' ;
A_FULL_STOP : 'A. ' ;
A_FULL_STOPg : 'A.g ' ;
Abel : 'Abel ' ;
AbelOp : 'AbelOp ' ;
Abs : 'abs ' ;
AbsVal : 'AbsVal ' ;
Action : 'Action ' ;
Adj : 'adj ' ;
Adjh : 'adjh ' ;
Aleph : 'aleph ' ;
Alg : 'Alg ' ;
AlgInd : 'AlgInd ' ;
AlgSc : 'algSc ' ;
AlgSpan : 'AlgSpan ' ;
Ana : 'Ana ' ;
Andc : 'andc ' ;
Angc : 'angc ' ;
Angle : 'angle ' ;
Angtrg : 'angtrg ' ;
AntiDir : 'AntiDir ' ;
Apply : 'Apply ' ;
Arccos : 'arccos ' ;
Arcsin : 'arcsin ' ;
Arctan : 'arctan ' ;
Area : 'area ' ;
Arrow : 'Arrow ' ;
Ass : 'Ass ' ;
AssAlg : 'AssAlg ' ;
AtLat : 'AtLat ' ;
Atoms : 'Atoms ' ;
AxExt : 'AxExt ' ;
AxInf : 'AxInf ' ;
AxPow : 'AxPow ' ;
AxReg : 'AxReg ' ;
AxRep : 'AxRep ' ;
AxUn : 'AxUn ' ;
BLnOp : 'BLnOp ' ;
Ball : 'ball ' ;
Ban : 'Ban ' ;
Base : 'Base ' ;
BaseSet : 'BaseSet ' ;
Bday : 'bday ' ;
BernPoly : 'BernPoly ' ;
Bic : 'bic ' ;
Bigcup : 'Bigcup ' ;
Bits : 'bits ' ;
Bnd : 'Bnd ' ;
BndLat : 'BndLat ' ;
BndLinOp : 'BndLinOp ' ;
BrSiga : 'BrSiga ' ;
Bra : 'bra ' ;
Btw : 'btw ' ;
Btwn : 'Btwn ' ;
CBan : 'CBan ' ;
CC : 'CC ' ;
CCfld : 'CCfld ' ;
CH : 'CH ' ;
CHOICE : 'CHOICE ' ;
CHStates : 'CHStates ' ;
CHil : 'CHil ' ;
CHilOLD : 'CHilOLD ' ;
CIRCUMFLEX_ACCENT : '^ ' ;
CIRCUMFLEX_ACCENTc : '^c ' ;
CIRCUMFLEX_ACCENTg : '^g ' ;
CIRCUMFLEX_ACCENTko : '^ko ' ;
CIRCUMFLEX_ACCENTm : '^m ' ;
CIRCUMFLEX_ACCENTmd : '^md ' ;
CIRCUMFLEX_ACCENTo : '^o ' ;
CIRCUMFLEX_ACCENTpm : '^pm ' ;
CIRCUMFLEX_ACCENTr : '^r ' ;
CIRCUMFLEX_ACCENTs : '^s ' ;
CLat : 'CLat ' ;
CMet : 'CMet ' ;
CMetSp : 'CMetSp ' ;
CMnd : 'CMnd ' ;
CMod : 'CMod ' ;
CNF : 'CNF ' ;
CNrm : 'CNrm ' ;
COLON : ': ' ;
COMMA : ', ' ;
COMMA_FULL_STOP : ',. ' ;
CPreHil : 'CPreHil ' ;
CPreHilOLD : 'CPreHilOLD ' ;
CRing : 'CRing ' ;
CRingOps : 'CRingOps ' ;
CSubSp : 'CSubSp ' ;
CVecOLD : 'CVecOLD ' ;
C_CIRCUMFLEX_ACCENTn : 'C^n ' ;
C_FULL_STOP : 'C. ' ;
C_LOW_LINE : 'C_ ' ;
C_LOW_LINEH : 'C_H ' ;
C_LOW_LINEcat : 'C_cat ' ;
C_omp : 'comp ' ;
C_on : 'con ' ;
Cadd : 'cadd ' ;
Cap : 'Cap ' ;
Card : 'card ' ;
Cart : 'Cart ' ;
Cat : 'Cat ' ;
CatCat : 'CatCat ' ;
CatOLD : 'CatOLD ' ;
Cau : 'Cau ' ;
CauFil : 'CauFil ' ;
Cauchy : 'Cauchy ' ;
Ceiling : 'ceiling ' ;
Cf : 'cf ' ;
Cgr : 'Cgr ' ;
CgrDIGIT_THREE : 'Cgr3 ' ;
Chr : 'chr ' ;
CinvOLD : 'cinvOLD ' ;
Circle : 'Circle ' ;
Circuits : 'Circuits ' ;
Class : 'class ' ;
Cls : 'cls ' ;
Clsd : 'Clsd ' ;
Cm : 'cm ' ;
Cn : 'Cn ' ;
CnP : 'CnP ' ;
Cntr : 'Cntr ' ;
Cntz : 'Cntz ' ;
CodA : 'codA ' ;
CodLOW_LINE : 'cod_ ' ;
CodSetCat : 'codSetCat ' ;
CoeDIGIT_ONE : 'coe1 ' ;
Coeff : 'coeff ' ;
Colinear : 'Colinear ' ;
Coln : 'coln ' ;
ComDIGIT_ONE : 'Com1 ' ;
ComDIGIT_TWO : 'Com2 ' ;
Comf : 'comf ' ;
Comp : 'Comp ' ;
CompA : 'compA ' ;
ComplUSGrph : 'ComplUSGrph ' ;
Con : 'Con ' ;
ConFn : 'ConFn ' ;
ConOp : 'ConOp ' ;
Conc : 'conc ' ;
Concat : 'concat ' ;
CondEq : 'CondEq ' ;
Convex : 'convex ' ;
Cos : 'cos ' ;
Cosh : 'cosh ' ;
Cot : 'cot ' ;
CovMap : 'CovMap ' ;
Cp : 'Cp ' ;
CplMetSp : 'cplMetSp ' ;
Cprob : 'cprob ' ;
Csc : 'csc ' ;
Cset : 'cset ' ;
Cup : 'Cup ' ;
CurDIGIT_ONE : 'cur1 ' ;
CurDIGIT_TWO : 'cur2 ' ;
Curry : 'curry ' ;
CurryF : 'curryF ' ;
Cut : 'Cut ' ;
CvLat : 'CvLat ' ;
CycGrp : 'CycGrp ' ;
Cycles : 'Cycles ' ;
CytP : 'CytP ' ;
DChr : 'DChr ' ;
DIGIT_EIGHT : '8 ' ;
DIGIT_FIVE : '5 ' ;
DIGIT_FOUR : '4 ' ;
DIGIT_FOURo : '4o ' ;
DIGIT_NINE : '9 ' ;
DIGIT_ONE : '1 ' ;
DIGIT_ONEP : '1P ' ;
DIGIT_ONEQ : '1Q ' ;
DIGIT_ONER : '1R ' ;
DIGIT_ONE_DIGIT_ZERO : '10 ' ;
DIGIT_ONE_FULL_STOP : '1. ' ;
DIGIT_ONEo : '1o ' ;
DIGIT_ONEr : '1r ' ;
DIGIT_ONEst : '1st ' ;
DIGIT_ONEstF : '1stF ' ;
DIGIT_ONEstc : '1stc ' ;
DIGIT_SEVEN : '7 ' ;
DIGIT_SIX : '6 ' ;
DIGIT_THREE : '3 ' ;
DIGIT_THREEo : '3o ' ;
DIGIT_TWO : '2 ' ;
DIGIT_TWOIdeal : '2Ideal ' ;
DIGIT_TWOnd : '2nd ' ;
DIGIT_TWOndF : '2ndF ' ;
DIGIT_TWOndc : '2ndc ' ;
DIGIT_TWOo : '2o ' ;
DIGIT_ZERO : '0 ' ;
DIGIT_ZEROH : '0H ' ;
DIGIT_ZEROR : '0R ' ;
DIGIT_ZERO_FULL_STOP : '0. ' ;
DIGIT_ZEROcv : '0cv ' ;
DIGIT_ZEROg : '0g ' ;
DIGIT_ZEROh : '0h ' ;
DIGIT_ZEROhop : '0hop ' ;
DIGIT_ZEROop : '0op ' ;
DIGIT_ZEROp : '0p ' ;
DIGIT_ZEROvec : '0vec ' ;
DIsoA : 'DIsoA ' ;
DIsoB : 'DIsoB ' ;
DIsoC : 'DIsoC ' ;
DIsoH : 'DIsoH ' ;
DLat : 'DLat ' ;
DProd : 'DProd ' ;
DProj : 'dProj ' ;
DVecA : 'DVecA ' ;
DVecH : 'DVecH ' ;
DWords : 'dWords ' ;
Ded : 'Ded ' ;
DefAt : 'defAt ' ;
Deg : 'deg ' ;
DegAA : 'degAA ' ;
DegDIGIT_ONE : 'deg1 ' ;
Denom : 'denom ' ;
Der : 'der ' ;
Derv : 'derv ' ;
Dgra : 'Dgra ' ;
DiagFunc : 'DiagFunc ' ;
Dil : 'Dil ' ;
Dioph : 'Dioph ' ;
DirRel : 'DirRel ' ;
Dirset : 'Dirset ' ;
DisjLOW_LINE : 'Disj_ ' ;
Dist : 'dist ' ;
DivRing : 'DivRing ' ;
DivRingOps : 'DivRingOps ' ;
Dmn : 'Dmn ' ;
Dn : 'Dn ' ;
Dom : 'dom ' ;
DomA : 'domA ' ;
DomLOW_LINE : 'dom_ ' ;
DomSetCat : 'domSetCat ' ;
Domain : 'Domain ' ;
Domn : 'Domn ' ;
EDRing : 'EDRing ' ;
EDRingR : 'EDRingR ' ;
EE : 'EE ' ;
EFULL_STOP : 'e. ' ;
EFULL_STOPg : 'e.g ' ;
EQUALS_SIGN : '= ' ;
EQUALS_SIGN_EQUALS_SIGN : '== ' ;
EQUALS_SIGN_GREATER_THAN_SIGNc : '=>c ' ;
EQUALS_SIGN_SOLIDUS_EQUALS_SIGN : '=/= ' ;
EQUALS_SIGNg : '=g ' ;
ESOLIDUS : 'e/ ' ;
EXCLAMATION_MARK : '! ' ;
E_ASTERISK : 'E* ' ;
E_EXCLAMATION_MARK : 'E! ' ;
E_FULL_STOP : 'E. ' ;
E_FULL_STOPg : 'E.g ' ;
Eigval : 'eigval ' ;
Eigvec : 'eigvec ' ;
Epi : 'Epi ' ;
Epic : 'Epic ' ;
Er : 'Er ' ;
EulPaths : 'EulPaths ' ;
Eval : 'eval ' ;
EvalDIGIT_ONE : 'eval1 ' ;
EvalF : 'evalF ' ;
EvalSub : 'evalSub ' ;
EvalSubDIGIT_ONE : 'evalSub1 ' ;
ExId : 'ExId ' ;
Exp : 'exp ' ;
FBas : 'fBas ' ;
FClus : 'fClus ' ;
FClusf : 'fClusf ' ;
FLim : 'fLim ' ;
FLimf : 'fLimf ' ;
FLimfrs : 'fLimfrs ' ;
FULL_STOP : '. ' ;
FULL_STOPN : '.N ' ;
FULL_STOPP_FULL_STOP : '.P. ' ;
FULL_STOPQ : '.Q ' ;
FULL_STOPR : '.R ' ;
FULL_STOP_FULL_STOP_CIRCUMFLEX_ACCENT : '..^ ' ;
FULL_STOP_FULL_STOP_FULL_STOP : '... ' ;
FULL_STOPcv : '.cv ' ;
FULL_STOPfn : '.fn ' ;
FULL_STOPg : '.g ' ;
FULL_STOPh : '.h ' ;
FULL_STOPi : '.i ' ;
FULL_STOPiOLD : '.iOLD ' ;
FULL_STOPif : '.if ' ;
FULL_STOPih : '.ih ' ;
FULL_STOPm : '.m ' ;
FULL_STOPo : '.o ' ;
FULL_STOPop : '.op ' ;
FULL_STOPpQ : '.pQ ' ;
FULL_STOPpR : '.pR ' ;
FULL_STOPr : '.r ' ;
FULL_STOPs : '.s ' ;
FULL_STOPsOLD : '.sOLD ' ;
FULL_STOPsf : '.sf ' ;
FULL_STOPv : '.v ' ;
F_FULL_STOP : 'F. ' ;
F_SOLIDUS : 'F/ ' ;
F_SOLIDUS_LOW_LINE : 'F/_ ' ;
Faith : 'Faith ' ;
Fi : 'fi ' ;
Field : 'Field ' ;
Fil : 'Fil ' ;
FilGen : 'filGen ' ;
FilMap : 'FilMap ' ;
Fin : 'Fin ' ;
FinDIGIT_FIVE : 'Fin5 ' ;
FinDIGIT_FOUR : 'Fin4 ' ;
FinDIGIT_ONEa : 'Fin1a ' ;
FinDIGIT_SEVEN : 'Fin7 ' ;
FinDIGIT_SIX : 'Fin6 ' ;
FinDIGIT_THREE : 'Fin3 ' ;
FinDIGIT_TWO : 'Fin2 ' ;
FiveSeg : 'FiveSeg ' ;
Fix : 'Fix ' ;
Fld : 'Fld ' ;
Fmla : 'Fmla ' ;
Fn : 'Fn ' ;
Fne : 'Fne ' ;
Fr : 'Fr ' ;
Fre : 'Fre ' ;
FreeGrp : 'freeGrp ' ;
FreeLMod : 'freeLMod ' ;
FreeMnd : 'freeMnd ' ;
FreeSemiGrp : 'FreeSemiGrp ' ;
Frf : 'Frf ' ;
FriendGrph : 'FriendGrph ' ;
Full : 'Full ' ;
FullFun : 'FullFun ' ;
Fun : 'Fun ' ;
Func : 'Func ' ;
FuncCat : 'FuncCat ' ;
FuncOLD : 'FuncOLD ' ;
Funpart : 'Funpart ' ;
Funs : 'Funs ' ;
GCH : 'GCH ' ;
GEx : 'gEx ' ;
GF : 'GF ' ;
GF_LOW_LINEoo : 'GF_oo ' ;
GId : 'GId ' ;
GRAVE_ACCENT : '` ' ;
GRAVE_ACCENT_APOSTROPHE : '`\' ' ;
GREATER_THAN_SIGN : '> ' ;
GREATER_THAN_SIGN_FULL_STOP : '>. ' ;
GREATER_THAN_SIGN_GREATER_THAN_SIGN : '>> ' ;
GREATER_THAN_SIGN_LOW_LINE : '>_ ' ;
Gamma : 'gamma ' ;
Gcd : 'gcd ' ;
GcdOLD : 'gcdOLD ' ;
Ge : 'ge ' ;
Glb : 'glb ' ;
GprodLOW_LINE : 'gprod_ ' ;
Grammar : 'Grammar ' ;
GraphSetCat : 'graphSetCat ' ;
Grp : 'Grp ' ;
GrpAct : 'GrpAct ' ;
GrpHom : 'GrpHom ' ;
GrpIso : 'GrpIso ' ;
GrpOp : 'GrpOp ' ;
GrpOpHom : 'GrpOpHom ' ;
GrpOpIso : 'GrpOpIso ' ;
Gsum : 'gsum ' ;
HAtoms : 'HAtoms ' ;
HDMap : 'HDMap ' ;
HDMapDIGIT_ONE : 'HDMap1 ' ;
HGMap : 'HGMap ' ;
HL : 'HL ' ;
HLHil : 'HLHil ' ;
HVMap : 'HVMap ' ;
HYPHEN_MINUS : '- ' ;
HYPHEN_MINUS_DIGIT_ONER : '-1R ' ;
HYPHEN_MINUS_DIGIT_ONE_HYPHEN_MINUS_DIGIT_ONE_HYPHEN_MINUS_GREATER_THAN_SIGN : '-1-1-> ' ;
HYPHEN_MINUS_DIGIT_ONE_HYPHEN_MINUS_DIGIT_ONE_HYPHEN_MINUSontoHYPHEN_MINUS_GREATER_THAN_SIGN : '-1-1-onto-> ' ;
HYPHEN_MINUS_FULL_STOP : '-. ' ;
HYPHEN_MINUS_FULL_STOPc : '-.c ' ;
HYPHEN_MINUS_FULL_STOPg : '-.g ' ;
HYPHEN_MINUS_GREATER_THAN_SIGN : '-> ' ;
HYPHEN_MINUS_GREATER_THAN_SIGN_FULL_STOP : '->. ' ;
HYPHEN_MINUS_GREATER_THAN_SIGN_FULL_STOP_FULL_STOP : '->.. ' ;
HYPHEN_MINUS_GREATER_THAN_SIGNg : '->g ' ;
HYPHEN_MINUS_HYPHEN_MINUS_GREATER_THAN_SIGN : '--> ' ;
HYPHEN_MINUS_SOLIDUS_REVERSE_SOLIDUS : '-/\\ ' ;
HYPHEN_MINUScnHYPHEN_MINUS_GREATER_THAN_SIGN : '-cn-> ' ;
HYPHEN_MINUScv : '-cv ' ;
HYPHEN_MINUSe : '-e ' ;
HYPHEN_MINUSg : '-g ' ;
HYPHEN_MINUSh : '-h ' ;
HYPHEN_MINUSontoHYPHEN_MINUS_GREATER_THAN_SIGN : '-onto-> ' ;
HYPHEN_MINUSoo : '-oo ' ;
HYPHEN_MINUSop : '-op ' ;
HYPHEN_MINUSr : '-r ' ;
HYPHEN_MINUSu : '-u ' ;
HYPHEN_MINUSucv : '-ucv ' ;
HYPHEN_MINUSv : '-v ' ;
H_om : 'hom ' ;
Hadd : 'hadd ' ;
Halfplane : 'Halfplane ' ;
Har : 'har ' ;
Haus : 'Haus ' ;
Hf : 'Hf ' ;
Hil : 'Hil ' ;
HmOp : 'HmOp ' ;
Hom : 'Hom ' ;
HomA : 'HomA ' ;
HomF : 'HomF ' ;
HomLim : 'HomLim ' ;
HomLimB : 'HomLimB ' ;
Homeo : 'Homeo ' ;
Homf : 'Homf ' ;
HrmOp : 'HrmOp ' ;
Htpy : 'Htpy ' ;
ICIRCUMFLEX_ACCENTi : 'i^i ' ;
IDomn : 'IDomn ' ;
II : 'II ' ;
I_nv : 'inv ' ;
Ibcg : 'Ibcg ' ;
Ibg : 'Ibg ' ;
IblFnM : 'IblFnM ' ;
Id : 'Id ' ;
IdA : 'IdA ' ;
IdFunc : 'idFunc ' ;
IdLOW_LINE : 'id_ ' ;
IdSetCat : 'IdSetCat ' ;
Idl : 'Idl ' ;
IdlGen : 'IdlGen ' ;
IdlGenDIGIT_ONEp : 'idlGen1p ' ;
IdlNEW : 'IdlNEW ' ;
If : 'if ' ;
Ig : 'Ig ' ;
Im : 'Im ' ;
Image : 'Image ' ;
Img : 'Img ' ;
Impc : 'impc ' ;
Inacc : 'Inacc ' ;
InaccW : 'InaccW ' ;
IndCls : 'IndCls ' ;
IndMet : 'IndMet ' ;
InfLOW_LINE : 'inf_ ' ;
Infw : 'infw ' ;
InitObj : 'InitObj ' ;
InnerFiveSeg : 'InnerFiveSeg ' ;
Int : 'int ' ;
IntgOver : 'IntgOver ' ;
IntgRing : 'IntgRing ' ;
Intvl : 'Intvl ' ;
Inv : 'Inv ' ;
Invg : 'invg ' ;
Invr : 'invr ' ;
Iop : 'Iop ' ;
Iota : 'iota ' ;
IotaLOW_LINE : 'iota_ ' ;
Irred : 'Irred ' ;
Ismty : 'Ismty ' ;
Iso : 'Iso ' ;
IsoOLD : 'IsoOLD ' ;
Isofunc : 'Isofunc ' ;
IsolatedPt : 'IsolatedPt ' ;
Isom : 'Isom ' ;
Join : 'join ' ;
JoinH : 'joinH ' ;
KGen : 'kGen ' ;
KQ : 'KQ ' ;
Ketbra : 'ketbra ' ;
Kleene : 'Kleene ' ;
KolDIGIT_TWO : 'Kol2 ' ;
LAut : 'LAut ' ;
LBasis : 'LBasis ' ;
LCDual : 'LCDual ' ;
LDil : 'LDil ' ;
LDual : 'LDual ' ;
LEFT_CURLY_BRACKET : '{ ' ;
LEFT_PARENTHESIS : '( ' ;
LEFT_PARENTHESIS_COMMA_RIGHT_PARENTHESIS : '(,) ' ;
LEFT_PARENTHESIS_COMMA_RIGHT_SQUARE_BRACKET : '(,] ' ;
LEFT_PARENTHESIS_FULL_STOP : '(. ' ;
LEFT_PARENTHESIS_LEFT_PARENTHESIS : '(( ' ;
LEFT_PARENTHESIS_PLUS_SIGN_PLUS_SIGN_RIGHT_PARENTHESIS : '(++) ' ;
LEFT_PARENTHESIS_PLUS_SIGN_RIGHT_PARENTHESISm : '(+)m ' ;
LEFT_PARENTHESIS_RIGHT_PARENTHESIS : '() ' ;
LEFT_PARENTHESIS_SOLIDUS_RIGHT_PARENTHESIS : '(/) ' ;
LEFT_PARENTHESISxRIGHT_PARENTHESIS : '(x) ' ;
LEFT_SQUARE_BRACKET : '[ ' ;
LEFT_SQUARE_BRACKETC_FULL_STOP_RIGHT_SQUARE_BRACKET : '[C.] ' ;
LEFT_SQUARE_BRACKET_COMMA_RIGHT_PARENTHESIS : '[,) ' ;
LEFT_SQUARE_BRACKET_COMMA_RIGHT_SQUARE_BRACKET : '[,] ' ;
LEFT_SQUARE_BRACKET_FULL_STOP : '[. ' ;
LEFT_SQUARE_BRACKET_FULL_STOP_RIGHT_SQUARE_BRACKET : '[.] ' ;
LEFT_SQUARE_BRACKET_LOW_LINE : '[_ ' ;
LEFT_SQUARE_BRACKET_RIGHT_SQUARE_BRACKETNN : '[]NN ' ;
LESS_THAN_SIGN : '< ' ;
LESS_THAN_SIGNN : '<N ' ;
LESS_THAN_SIGNP : '<P ' ;
LESS_THAN_SIGNQ : '<Q ' ;
LESS_THAN_SIGNR : '<R ' ;
LESS_THAN_SIGNRR : '<RR ' ;
LESS_THAN_SIGN_EQUALS_SIGN_GREATER_THAN_SIGNc : '<=>c ' ;
LESS_THAN_SIGN_FULL_STOP : '<. ' ;
LESS_THAN_SIGN_GREATER_THAN_SIGN : '<> ' ;
LESS_THAN_SIGN_HYPHEN_MINUS_GREATER_THAN_SIGN : '<-> ' ;
LESS_THAN_SIGN_HYPHEN_MINUS_GREATER_THAN_SIGNg : '<->g ' ;
LESS_THAN_SIGN_LESS_THAN_SIGN : '<< ' ;
LESS_THAN_SIGN_LOW_LINE : '<_ ' ;
LESS_THAN_SIGN_LOW_LINEO_LEFT_PARENTHESIS_DIGIT_ONE_RIGHT_PARENTHESIS : '<_O(1) ' ;
LESS_THAN_SIGN_LOW_LINEop : '<_op ' ;
LESS_THAN_SIGN_QUOTATION_MARK : '<" ' ;
LESS_THAN_SIGNbag : '<bag ' ;
LESS_THAN_SIGNo : '<o ' ;
LESS_THAN_SIGNoH : '<oH ' ;
LESS_THAN_SIGNoL : '<oL ' ;
LESS_THAN_SIGNpQ : '<pQ ' ;
LESS_THAN_SIGNs : '<s ' ;
LFinGen : 'LFinGen ' ;
LFnl : 'LFnl ' ;
LHyp : 'LHyp ' ;
LIdeal : 'LIdeal ' ;
LIndF : 'LIndF ' ;
LIndS : 'LIndS ' ;
LKer : 'LKer ' ;
LLines : 'LLines ' ;
LMHom : 'LMHom ' ;
LMIso : 'LMIso ' ;
LMod : 'LMod ' ;
LNoeM : 'LNoeM ' ;
LNoeR : 'LNoeR ' ;
LOW_LINE : '_ ' ;
LOW_LINEC : '_C ' ;
LOW_LINED : '_D ' ;
LOW_LINEE : '_E ' ;
LOW_LINEFrSe : '_FrSe ' ;
LOW_LINEG : '_G ' ;
LOW_LINEI : '_I ' ;
LOW_LINEInd : '_Ind ' ;
LOW_LINEQp : '_Qp ' ;
LOW_LINESe : '_Se ' ;
LOW_LINETrFo : '_TrFo ' ;
LOW_LINEV : '_V ' ;
LOW_LINEZZ : '_ZZ ' ;
LOW_LINE_VERTICAL_LINE_LOW_LINE : '_|_ ' ;
LOW_LINE_VERTICAL_LINE_LOW_LINEP : '_|_P ' ;
LOW_LINE_VERTICAL_LINE_LOW_LINEc : '_|_c ' ;
LOW_LINEd : '_d ' ;
LOW_LINEe : '_e ' ;
LOW_LINEi : '_i ' ;
LOW_LINEpred : '_pred ' ;
LOW_LINEtrCl : '_trCl ' ;
LPIR : 'LPIR ' ;
LPIdeal : 'LPIdeal ' ;
LPlanes : 'LPlanes ' ;
LPol : 'LPol ' ;
LSAtoms : 'LSAtoms ' ;
LSHyp : 'LSHyp ' ;
LSSum : 'LSSum ' ;
LSpan : 'LSpan ' ;
LSubSp : 'LSubSp ' ;
LTrn : 'LTrn ' ;
LVec : 'LVec ' ;
LVols : 'LVols ' ;
L_CIRCUMFLEX_ACCENT_DIGIT_ONE : 'L^1 ' ;
L_ine : 'line ' ;
Lam : 'Lam ' ;
Lambda : 'Lambda ' ;
Lat : 'Lat ' ;
LatAlg : 'LatAlg ' ;
LatRel : 'LatRel ' ;
Lb : 'lb ' ;
LdgIdlSeq : 'ldgIdlSeq ' ;
Le : 'le ' ;
LeR : 'leR ' ;
Lim : 'Lim ' ;
LimCC : 'limCC ' ;
LimCat : 'LimCat ' ;
LimPt : 'limPt ' ;
Limits : 'Limits ' ;
Limsup : 'limsup ' ;
LinFn : 'LinFn ' ;
LinOp : 'LinOp ' ;
Line : 'Line ' ;
LineDIGIT_THREE : 'line3 ' ;
Lines : 'Lines ' ;
LinesEE : 'LinesEE ' ;
LnOp : 'LnOp ' ;
LocFin : 'LocFin ' ;
Locally : 'Locally ' ;
Log : 'log ' ;
LogLOW_LINE : 'log_ ' ;
LogLOW_LINEG : 'log_G ' ;
Logb : 'logb ' ;
Lt : 'lt ' ;
Lub : 'lub ' ;
MDeg : 'mDeg ' ;
MEndo : 'MEndo ' ;
MH : 'MH ' ;
MH_ASTERISK : 'MH* ' ;
MHomP : 'mHomP ' ;
MPSDer : 'mPSDer ' ;
MPoly : 'mPoly ' ;
MPwSer : 'mPwSer ' ;
MVar : 'mVar ' ;
MaAdju : 'maAdju ' ;
MaDet : 'maDet ' ;
MaMul : 'maMul ' ;
Magma : 'Magma ' ;
Mapd : 'mapd ' ;
Mat : 'Mat ' ;
MaxIdl : 'MaxIdl ' ;
MblFn : 'MblFn ' ;
MblFnM : 'MblFnM ' ;
Measures : 'measures ' ;
Meet : 'meet ' ;
Met : 'Met ' ;
MetOpen : 'MetOpen ' ;
MetSp : 'MetSp ' ;
MinPolyAA : 'minPolyAA ' ;
Mmu : 'mmu ' ;
Mnd : 'Mnd ' ;
MndHom : 'MndHom ' ;
MndOp : 'MndOp ' ;
Mnl : 'mnl ' ;
Mod : 'mod ' ;
Monic : 'Monic ' ;
MonicDIGIT_ONEp : 'Monic1p ' ;
Mono : 'Mono ' ;
MonoAP : 'MonoAP ' ;
MonoOLD : 'MonoOLD ' ;
Moore : 'Moore ' ;
MorphismSetCat : 'MorphismSetCat ' ;
MrCls : 'mrCls ' ;
MrInd : 'mrInd ' ;
MulGrp : 'mulGrp ' ;
Mxl : 'mxl ' ;
MzPoly : 'mzPoly ' ;
MzPolyCld : 'mzPolyCld ' ;
NGHom : 'NGHom ' ;
NMHom : 'NMHom ' ;
NN : 'NN ' ;
NN_DIGIT_ZERO : 'NN0 ' ;
NUMBER_SIGN : '# ' ;
N_FULL_STOP : 'N. ' ;
N_HYPHEN_MINUSLocally : 'N-Locally ' ;
Nat : 'Nat ' ;
Natural : 'Natural ' ;
Ndx : 'ndx ' ;
Nei : 'nei ' ;
Neighbors : 'Neighbors ' ;
Neug : 'Neug ' ;
No : 'No ' ;
NoeACS : 'NoeACS ' ;
Norm : 'norm ' ;
NormCV : 'normCV ' ;
NormOp : 'normOp ' ;
NormOpOLD : 'normOpOLD ' ;
Normfn : 'normfn ' ;
Normh : 'normh ' ;
Normop : 'normop ' ;
Notc : 'notc ' ;
Nrm : 'Nrm ' ;
NrmCVec : 'NrmCVec ' ;
NrmGrp : 'NrmGrp ' ;
NrmMod : 'NrmMod ' ;
NrmRing : 'NrmRing ' ;
NrmSGrp : 'NrmSGrp ' ;
NrmVec : 'NrmVec ' ;
Null : 'null ' ;
Numer : 'numer ' ;
NzRing : 'NzRing ' ;
OBasis : 'OBasis ' ;
ODual : 'ODual ' ;
OF : 'oF ' ;
OFC : 'oFC ' ;
OFULL_STOP : 'o. ' ;
OFULL_STOPfunc : 'o.func ' ;
OL : 'OL ' ;
OLOW_LINE : 'o_ ' ;
OML : 'OML ' ;
OP : 'OP ' ;
OR : 'oR ' ;
ORVC : 'oRVC ' ;
O_LEFT_PARENTHESIS_DIGIT_ONE_RIGHT_PARENTHESIS : 'O(1) ' ;
Oc : 'oc ' ;
OcA : 'ocA ' ;
OcH : 'ocH ' ;
Ocv : 'ocv ' ;
Od : 'od ' ;
OdZ : 'odZ ' ;
Om : 'om ' ;
OmDIGIT_ONE : 'Om1 ' ;
OmN : 'OmN ' ;
On : 'On ' ;
Opfn : 'opfn ' ;
OppCat : 'oppCat ' ;
OppG : 'oppG ' ;
OppR : 'oppR ' ;
Or : 'Or ' ;
OrHom : 'OrHom ' ;
OrIso : 'OrIso ' ;
Ord : 'Ord ' ;
OrdIso : 'OrdIso ' ;
OrdPwSer : 'ordPwSer ' ;
OrdTop : 'ordTop ' ;
Ors : 'ors ' ;
OuterFiveSeg : 'OuterFiveSeg ' ;
OutsideOf : 'OutsideOf ' ;
PAut : 'PAut ' ;
PCl : 'PCl ' ;
PCnt : 'pCnt ' ;
PCon : 'PCon ' ;
PGrp : 'pGrp ' ;
PHtpy : 'PHtpy ' ;
PID : 'PID ' ;
PLUS_SIGN : '+ ' ;
PLUS_SIGNH : '+H ' ;
PLUS_SIGNN : '+N ' ;
PLUS_SIGNP : '+P ' ;
PLUS_SIGNP_FULL_STOP : '+P. ' ;
PLUS_SIGNQ : '+Q ' ;
PLUS_SIGNR : '+R ' ;
PLUS_SIGN_HYPHEN_MINUS : '+- ' ;
PLUS_SIGNc : '+c ' ;
PLUS_SIGNcv : '+cv ' ;
PLUS_SIGNe : '+e ' ;
PLUS_SIGNf : '+f ' ;
PLUS_SIGNfn : '+fn ' ;
PLUS_SIGNg : '+g ' ;
PLUS_SIGNh : '+h ' ;
PLUS_SIGNm : '+m ' ;
PLUS_SIGNo : '+o ' ;
PLUS_SIGNoo : '+oo ' ;
PLUS_SIGNop : '+op ' ;
PLUS_SIGNpQ : '+pQ ' ;
PLUS_SIGNpR : '+pR ' ;
PLUS_SIGNr : '+r ' ;
PLUS_SIGNv : '+v ' ;
PLines : 'PLines ' ;
PNrm : 'PNrm ' ;
PPoints : 'PPoints ' ;
PSubCl : 'PSubCl ' ;
PSubSp : 'PSubSp ' ;
PSyl : 'pSyl ' ;
P_FULL_STOP : 'P. ' ;
PairF : 'pairF ' ;
PathOn : 'PathOn ' ;
Paths : 'Paths ' ;
Pc : 'Pc ' ;
PellDIGIT_ONEQR : 'Pell1QR ' ;
PellDIGIT_ONE_DIGIT_FOURQR : 'Pell14QR ' ;
PellDIGIT_ONE_DIGIT_TWO_DIGIT_THREE_DIGIT_FOURQR : 'Pell1234QR ' ;
PellFund : 'PellFund ' ;
Perf : 'Perf ' ;
Phc : 'phc ' ;
Phi : 'phi ' ;
Pi : 'pi ' ;
PiDIGIT_ONE : 'pi1 ' ;
PiN : 'piN ' ;
Plig : 'Plig ' ;
PmSgn : 'pmSgn ' ;
PmTrsp : 'pmTrsp ' ;
Pmap : 'pmap ' ;
Po : 'Po ' ;
Points : 'Points ' ;
Poly : 'Poly ' ;
PolyAP : 'PolyAP ' ;
PolyDIGIT_ONE : 'Poly1 ' ;
PolyFld : 'polyFld ' ;
PolyLESS_THAN_SIGN : 'Poly< ' ;
PolySplitLim : 'polySplitLim ' ;
Poset : 'Poset ' ;
PosetRel : 'PosetRel ' ;
Ppi : 'ppi ' ;
Pprod : 'pprod ' ;
Pr : 'pr ' ;
PrIdl : 'PrIdl ' ;
PrRing : 'PrRing ' ;
Prdct : 'prdct ' ;
PreHil : 'PreHil ' ;
Pred : 'Pred ' ;
Preset : 'Preset ' ;
PresetRel : 'PresetRel ' ;
Prime : 'Prime ' ;
Prj : 'prj ' ;
Prob : 'Prob ' ;
ProdDIGIT_THREE_LOW_LINE : 'prod3_ ' ;
ProdDIGIT_TWO_LOW_LINE : 'prod2_ ' ;
ProdLOW_LINE : 'prod_ ' ;
ProdObj : 'ProdObj ' ;
Proj : 'proj ' ;
ProjDIGIT_ONE : 'proj1 ' ;
Projh : 'projh ' ;
Prop : 'Prop ' ;
Provable : '|- ' ;
Prt : 'Prt ' ;
Psc : 'psc ' ;
Psi : 'psi ' ;
PtDf : 'PtDf ' ;
PtFin : 'PtFin ' ;
PwSerDIGIT_ONE : 'PwSer1 ' ;
QQ : 'QQ ' ;
QTop : 'qTop ' ;
QUOTATION_MARK : '" ' ;
QUOTATION_MARK_GREATER_THAN_SIGN : '"> ' ;
QUOTATION_MARKs : '"s ' ;
Q_FULL_STOP : 'Q. ' ;
Qp : 'Qp ' ;
Quot : 'quot ' ;
QuotDIGIT_ONEp : 'quot1p ' ;
RAffSp : 'RAffSp ' ;
REVERSE_SOLIDUS : '\\ ' ;
REVERSE_SOLIDUS_SOLIDUS : '\\/ ' ;
REVERSE_SOLIDUS_SOLIDUSH : '\\/H ' ;
REVERSE_SOLIDUS_SOLIDUS_LOW_LINE : '\\/_ ' ;
REVERSE_SOLIDUS_SOLIDUSc : '\\/c ' ;
REVERSE_SOLIDUS_SOLIDUSg : '\\/g ' ;
RIGHT_CURLY_BRACKET : '} ' ;
RIGHT_PARENTHESIS : ') ' ;
RIGHT_PARENTHESIS_FULL_STOP : '). ' ;
RIGHT_PARENTHESIS_RIGHT_PARENTHESIS : ')) ' ;
RIGHT_SQUARE_BRACKET : '] ' ;
RIGHT_SQUARE_BRACKET_FULL_STOP : ']. ' ;
RIGHT_SQUARE_BRACKET_LOW_LINE : ']_ ' ;
RLReg : 'RLReg ' ;
RPrime : 'RPrime ' ;
RR : 'RR ' ;
RR_ASTERISK : 'RR* ' ;
RR_ASTERISKs : 'RR*s ' ;
RR_DIGIT_THREE : 'RR3 ' ;
RR_PLUS_SIGN : 'RR+ ' ;
RRndVar : 'rRndVar ' ;
RSpan : 'RSpan ' ;
RVec : 'RVec ' ;
R_DIGIT_ONE : 'R1 ' ;
R_FULL_STOP : 'R. ' ;
R_ay : 'ray ' ;
Ramsey : 'Ramsey ' ;
Ran : 'ran ' ;
Range : 'Range ' ;
Rank : 'rank ' ;
Ray : 'Ray ' ;
Re : 'Re ' ;
Rec : 'rec ' ;
Recs : 'recs ' ;
Ref : 'Ref ' ;
Reg : 'Reg ' ;
Rel : 'Rel ' ;
RemDIGIT_ONEp : 'rem1p ' ;
Restrict : 'Restrict ' ;
Retr : 'Retr ' ;
Reverse : 'reverse ' ;
Ring : 'Ring ' ;
RingHom : 'RingHom ' ;
RingIso : 'RingIso ' ;
RingLMod : 'ringLMod ' ;
RingOps : 'RingOps ' ;
RingSpan : 'RingSpan ' ;
RmX : 'rmX ' ;
RmY : 'rmY ' ;
Rn : 'Rn ' ;
RngHom : 'RngHom ' ;
RngIso : 'RngIso ' ;
RoSetCat : 'roSetCat ' ;
SCon : 'SCon ' ;
SEMICOLON : '; ' ;
SH : 'SH ' ;
SOLIDUS : '/ ' ;
SOLIDUSL : '/L ' ;
SOLIDUSQ : '/Q ' ;
SOLIDUSQp : '/Qp ' ;
SOLIDUS_FULL_STOP : '/. ' ;
SOLIDUS_REVERSE_SOLIDUS : '/\\ ' ;
SOLIDUS_REVERSE_SOLIDUSc : '/\\c ' ;
SOLIDUS_REVERSE_SOLIDUSg : '/\\g ' ;
SOLIDUScv : '/cv ' ;
SOLIDUSe : '/e ' ;
SOLIDUSg : '/g ' ;
SOLIDUSr : '/r ' ;
SOLIDUSs : '/s ' ;
SPaths : 'SPaths ' ;
SSet : 'SSet ' ;
SX : 'sX ' ;
S_FULL_STOP : 'S. ' ;
S_FULL_STOP_DIGIT_ONE : 'S.1 ' ;
S_FULL_STOP_DIGIT_TWO : 'S.2 ' ;
S_FULL_STOPm : 'S.m ' ;
S_LOW_LINE : 'S_ ' ;
S_Set : 'sSet ' ;
Sadd : 'sadd ' ;
Sat : 'Sat ' ;
SatE : 'SatE ' ;
Scalar : 'Scalar ' ;
Se : 'Se ' ;
Sec : 'sec ' ;
Sect : 'Sect ' ;
Seg : 'seg ' ;
SegLESS_THAN_SIGN_LOW_LINE : 'Seg<_ ' ;
Segc : 'segc ' ;
Segments : 'Segments ' ;
Segtrg : 'segtrg ' ;
SelectVars : 'selectVars ' ;
SemiGrp : 'SemiGrp ' ;
SemiGrpHom : 'SemiGrpHom ' ;
Seq : 'seq ' ;
Seqom : 'seqom ' ;
Set : 'set ' ;
SetCat : 'SetCat ' ;
SetCatOLD : 'SetCatOLD ' ;
Sgn : 'sgn ' ;
Shift : 'shift ' ;
SigAlgebra : 'sigAlgebra ' ;
SigaGen : 'sigaGen ' ;
Sigma : 'sigma ' ;
Sin : 'sin ' ;
Singleton : 'Singleton ' ;
Singletons : 'Singletons ' ;
Sinh : 'sinh ' ;
Sink : 'Sink ' ;
Slices : 'slices ' ;
Slot : 'Slot ' ;
Smo : 'Smo ' ;
Smul : 'smul ' ;
Source : 'Source ' ;
Span : 'span ' ;
Splice : 'splice ' ;
SplitFld : 'splitFld ' ;
SplitFldDIGIT_ONE : 'splitFld1 ' ;
Sqr : 'sqr ' ;
Ss : 'ss ' ;
States : 'States ' ;
Struct : 'Struct ' ;
SubCat : 'SubCat ' ;
SubDRing : 'SubDRing ' ;
SubGrp : 'SubGrp ' ;
SubGrpOp : 'SubGrpOp ' ;
SubMnd : 'SubMnd ' ;
SubRing : 'SubRing ' ;
SubSemiGrp : 'SubSemiGrp ' ;
SubSemiGrpGen : 'subSemiGrpGen ' ;
SubSp : 'SubSp ' ;
SubVec : 'SubVec ' ;
Subcat : 'Subcat ' ;
SubringAlg : 'subringAlg ' ;
Substr : 'substr ' ;
Suc : 'suc ' ;
Succ : 'Succ ' ;
SumASTERISK : 'sum* ' ;
SumLOW_LINE : 'sum_ ' ;
SumObj : 'SumObj ' ;
Sup : 'sup ' ;
SupLOW_LINE : 'sup_ ' ;
Supw : 'supw ' ;
Sym : 'sym ' ;
SymGrp : 'SymGrp ' ;
TASTERISK : 't* ' ;
TASTERISKrec : 't*rec ' ;
TC : 'TC ' ;
TEndo : 'TEndo ' ;
TGrp : 'TGrp ' ;
TILDEFG : '~FG ' ;
TILDEH : '~H ' ;
TILDEP : '~P ' ;
TILDEQ : '~Q ' ;
TILDEQG : '~QG ' ;
TILDEQp : '~Qp ' ;
TILDER : '~R ' ;
TILDE_EQUALS_SIGN : '~= ' ;
TILDE_EQUALS_SIGNg : '~=g ' ;
TILDE_EQUALS_SIGNm : '~=m ' ;
TILDE_EQUALS_SIGNph : '~=ph ' ;
TILDE_EQUALS_SIGNr : '~=r ' ;
TILDE_LESS_THAN_SIGN : '~< ' ;
TILDE_LESS_THAN_SIGN_LOW_LINE : '~<_ ' ;
TILDE_LESS_THAN_SIGN_LOW_LINE_ASTERISK : '~<_* ' ;
TILDE_TILDE : '~~ ' ;
TILDE_TILDE_GREATER_THAN_SIGN : '~~> ' ;
TILDE_TILDE_GREATER_THAN_SIGNr : '~~>r ' ;
TILDE_TILDE_GREATER_THAN_SIGNt : '~~>t ' ;
TILDE_TILDE_GREATER_THAN_SIGNu : '~~>u ' ;
TILDE_TILDE_GREATER_THAN_SIGNv : '~~>v ' ;
TPLUS_SIGN : 't+ ' ;
TX : 'tX ' ;
T_FULL_STOP : 'T. ' ;
T_r : 'tr ' ;
Tail : 'tail ' ;
Tan : 'tan ' ;
Tanh : 'tanh ' ;
Tar : 'tar ' ;
Tarski : 'Tarski ' ;
TarskiMap : 'tarskiMap ' ;
Tayl : 'Tayl ' ;
TermObj : 'TermObj ' ;
Theta : 'theta ' ;
ToCHil : 'toCHil ' ;
ToHL : 'toHL ' ;
ToInc : 'toInc ' ;
ToMetSp : 'toMetSp ' ;
ToNrmGrp : 'toNrmGrp ' ;
ToPolyDIGIT_ONE : 'toPoly1 ' ;
Tofld : 'Tofld ' ;
Top : 'Top ' ;
TopBases : 'TopBases ' ;
TopDRing : 'TopDRing ' ;
TopFld : 'TopFld ' ;
TopGen : 'topGen ' ;
TopGrp : 'TopGrp ' ;
TopLnd : 'TopLnd ' ;
TopMnd : 'TopMnd ' ;
TopMod : 'TopMod ' ;
TopOn : 'TopOn ' ;
TopOpen : 'TopOpen ' ;
TopRing : 'TopRing ' ;
TopSep : 'TopSep ' ;
TopSet : 'TopSet ' ;
TopSp : 'TopSp ' ;
TopSpOLD : 'TopSpOLD ' ;
TopVec : 'TopVec ' ;
TopX : 'topX ' ;
Toset : 'Toset ' ;
TosetRel : 'TosetRel ' ;
TotBnd : 'TotBnd ' ;
Tpos : 'tpos ' ;
Tr : 'Tr ' ;
TrL : 'trL ' ;
TrPred : 'TrPred ' ;
TrailOn : 'TrailOn ' ;
Trails : 'Trails ' ;
Trans : 'Trans ' ;
TransportTo : 'TransportTo ' ;
Trcng : 'trcng ' ;
Triangle : 'triangle ' ;
Trn : 'Trn ' ;
Tsums : 'tsums ' ;
UFL : 'UFL ' ;
UFULL_STOP : 'u. ' ;
UFil : 'UFil ' ;
UMGrph : 'UMGrph ' ;
USGrph : 'USGrph ' ;
USLGrph : 'USLGrph ' ;
U_FULL_STOP : 'U. ' ;
U_LOW_LINE : 'U_ ' ;
Ub : 'ub ' ;
Uncurry : 'uncurry ' ;
UncurryF : 'uncurryF ' ;
Undef : 'Undef ' ;
UniOp : 'UniOp ' ;
UnicDIGIT_ONEp : 'Unic1p ' ;
Unif : 'Unif ' ;
UnifSp : 'UnifSp ' ;
Unit : 'Unit ' ;
UnitVec : 'unitVec ' ;
Univ : 'Univ ' ;
UnivVertex : 'UnivVertex ' ;
Until : 'until ' ;
VA : 'vA ' ;
VDeg : 'VDeg ' ;
VERTICAL_LINE : '| ' ;
VERTICAL_LINE_CIRCUMFLEX_ACCENT_VERTICAL_LINE : '|^| ' ;
VERTICAL_LINE_CIRCUMFLEX_ACCENT_VERTICAL_LINE_LOW_LINE : '|^|_ ' ;
VERTICAL_LINE_EQUALS_SIGN : '|= ' ;
VERTICAL_LINE_GRAVE_ACCENT : '|` ' ;
VERTICAL_LINE_GRAVE_ACCENTcat : '|`cat ' ;
VERTICAL_LINE_GRAVE_ACCENTf : '|`f ' ;
VERTICAL_LINE_GRAVE_ACCENTs : '|`s ' ;
VERTICAL_LINE_GRAVE_ACCENTt : '|`t ' ;
VERTICAL_LINE_HYPHEN_MINUS_GREATER_THAN_SIGN : '|-> ' ;
VERTICAL_LINE_LOW_LINE : '|_ ' ;
VERTICAL_LINE_VERTICAL_LINE : '|| ' ;
VERTICAL_LINE_VERTICAL_LINEr : '||r ' ;
VERTICAL_LINEg : '|g ' ;
VH : 'vH ' ;
VarDIGIT_ONE : 'var1 ' ;
VarFGrp : 'varFGrp ' ;
VarFMnd : 'varFMnd ' ;
Vec : 'Vec ' ;
Vol : 'vol ' ;
VolASTERISK : 'vol* ' ;
WAtoms : 'WAtoms ' ;
WUni : 'WUni ' ;
WUniCl : 'wUniCl ' ;
WalkOn : 'WalkOn ' ;
Walks : 'Walks ' ;
We : 'We ' ;
Wff : 'wff ' ;
Word : 'Word ' ;
Words : 'Words ' ;
XFULL_STOP : 'x. ' ;
XX_FULL_STOP : 'XX. ' ;
X_FULL_STOP : 'X. ' ;
X_LOW_LINE : 'X_ ' ;
XcFULL_STOP : 'Xc. ' ;
Xm : 'xm ' ;
Xp : 'Xp ' ;
XsFULL_STOP : 'Xs. ' ;
XsLOW_LINE : 'Xs_ ' ;
XtLOW_LINE : 'Xt_ ' ;
Yon : 'Yon ' ;
ZF : 'ZF ' ;
ZMod : 'ZMod ' ;
ZRHom : 'ZRHom ' ;
ZRing : 'ZRing ' ;
ZZ : 'ZZ ' ;
ZZ_GREATER_THAN_SIGN_EQUALS_SIGN : 'ZZ>= ' ;
Z_LEFT_SQUARE_BRACKETiRIGHT_SQUARE_BRACKET : 'Z[i] ' ;
Z_SOLIDUSnZ : 'Z/nZ ' ;
ZeroDiv : 'zeroDiv ' ;
Zeta : 'zeta ' ;
Zp : 'Zp ' ;

Giovanni Mascellani

unread,
Feb 21, 2019, 1:00:10 PM2/21/19
to meta...@googlegroups.com
Hi,

Il 20/02/19 12:32, Mario Carneiro ha scritto:
> I'm experimenting with a new language, Metamath Zero, an amalgamation
> of Metamath, Ghilbert and Lean syntax. The goal is to balance human
> readability and ease of writing a verifier, while also making some
> attempts to patch the bugs in metamath soundness (not that it's
> wrong, but there is a lot of soundness justification currently
> happening outside the metamath verifier kernel).

Seems to be a very interesting experiment, and I personally agree with
your objectives!

I will mark some of my comments as "[cosmetic]", meaning that I do not
personally give a lot of importance to those specific issues.

My email program seems to have partially spoiled your example file,
removing some commas and spaces. Hopefully this is still clear enough so
that my comments make sense.

> Without giving too much explanation of what the syntax means (it's
> all fictional syntax at the moment, there is no parser yet), I would
> like to use some of you as guinea pigs for the "readability" aspect.
> How obvious is it what things mean? I will put some more explanatory
> comments at the end.
>
> nonempty sortwff;

If I am not wrong, all sorts in your example are defined to be nonempty.
So why isn't the keyword "sort" set to directly mean a "non-empty sort",
instead of having to specify it? Or else, what the use for a sort which
is not declared nonempty? [cosmetic]

> var phpsch: wff*;

I sort of understand what does the * mean. But why not introducing
another keyword to accompany "var" (maybe "open"?), instead of adding
the star, which is inhomogeneous with the rest of the syntax? [cosmetic]

> termwi (phps: wff): wff; infixrwi: -> 25;

I like this way to specify the syntax for "human-readable" expressions
(and I also like very much that idea of introducing a
more-human-readable expression syntax over Metamath altogether). I like
even more the proposal that was made to only specify a partial priority
order, basically for two reasons: first, because it removes the problem
of deciding how much "priority space" one has to leave between two
different operators; second, because it makes operators coming from two
different sources automatically not comparable, instead of automatically
having a random priority order depending on the specific numbers chosen
by the two sources.

I also understand that in this setting the use of parentheses "(" and
")" to force priority is hardcoded in the language and cannot be
changed, say, to "[" and "]" by a specific file. It is not necessarily
bad, it is just different from Metamath, where the choice of "(" and ")"
is completely arbitrary. [cosmetic]

Also, "infix" without the "r" is meant to be infix left? Or is there
also an "infixl"? In that case, what does "infix" mean? If "infix" is
already infix left, I would suggest to use "infixl" and leave out
"infix", for better symmetry. [cosmetic]

> termwn (ph: wff): wff; prefixwn: ~ 40;
>
> axiomax-1: << ph-> ps-> ph>>
>
> (wi ph(wi psph));

I am among those that do not like these double expressions. I do not
like in general when the same file contains some data that is mean to be
hand-written and some data that is meant to be auto-generated. I would
definitely prefer that the verifier is required to parse mathematical
expressions and check them, so that the file is entirely hand-written.
The proposal to include a proof of the grammar unambiguity in the proof
file sounds reasonable to me.

The above comment calls for eliminating the need for chevrons, but if
they are to stay, then I would suggest replacing them with some other
syntax, in order to make less confusion with arrows and leave them
available for the math syntax (where they can mean "much greater than").
I would propose "$$", which seems to be less likely to be used in maths.
[cosmetic]

> axiomax-2: << (ph-> ps-> ch) -> (ph-> ps) -> ph-> ch>>
>
> (wi (wi ph(wi psch)) (wi (wi phps) (wi phch)));
>
> axiomax-3: << (~ph-> ~ps) -> ps-> ph>>
>
> (wi (wi (wn ph) (wn ps)) (wi psph));
>
> axiomax-mp:
>
> << ph>> ph->
>
> << ph-> ps>> (wi phps) ->
>
> << ps>> ps;
>
>
> theorema1i: << ph>> ph-> << ps-> ph>> (wi psph);

You do not specify what is the proof format. It is ok for me to store it
in another file (actually, I find it positive, in that it goes in the
direction of separating hand-written and generated content) and I do not
need the details of the encoding, but at least I would like to know what
is the abstract format. In Metamath the abstract format is the
substitution, but I believe something has to change in Metamath Zero,
since the abstract expression representation has changed too (Metamath
deals with partially with symbols strings and partially with their
parsing tree, although without guaranteeing that it is unique, while
Metamath Zero seems to be dealing with
parsing-trees-plus-bindings-information; is this correct?).

> defwb(phps: wff): wff :=
>
> << ~((ph-> ps) -> ~(ps-> ph)) >>
>
> (wn (wi (wi phps) (wn (wi psph)))) {
>
> infixwb: <-> 20;
>
>
> theorembi1: << (ph<-> ps) -> ph-> ps>>
>
> (wi (wb phps) (wi phps));
>
> theorembi2: << (ph<-> ps) -> ps-> ph>>
>
> (wi (wb phps) (wi psph));
>
> theorembi3: << (ph-> ps) -> (ps-> ph) -> (ph<-> ps) >>
>
> (wi (wi phps) (wi (wi psph) (wb phps)));
>
> }

I like very much this syntax for definitions. I like the fact that the
soundness check is built into the language, and I like the idea that the
definition is "valid" only for a limited scope, and has to be used via
theorems afterwards. This, if correctly used, enables to separate the
axioms (or "interface") of a theory from its foundational construction
("implementation"), which is what in Metamath is done manually (and,
again, requiring a lot of external verification) for number sets.
Ideally this enables users to "rebase" theories on different
foundations, as long as the definition block does not let foundational
details escape.

I believe that the definition is somehow materialized as an additional
step available to the proof file for theorems appearing in the
definition block. Can you give details on this? Also, what is required
from the proof file to guarantee that the definition is sound?

> defwa(phps: wff): wff := << ~(ph-> ~ps) >> (wn (wi ph(wn ps))) {
>
> infixwa: /\ 20;
>
> theoremdf-an: << (ph/\ ps) <-> ~(ph-> ~ps) >>
>
> (wi (wa phps) (wn (wi ph(wn ps))));
>
> }
>
>
> defwtru(boundp: wff): wff := << p<-> p>> (wb pp) {
>
> notationwtru:= << T. >>;

I guess "notation" is another syntax constructor for the
"human-readable" expressions, isn't it? So it is on the same level with
"prefix", "infix" and "infixr", except that these latter are somewhat
more "smart".

> theoremdf-tru: << T. <-> (ph<-> ph) >> (wb wtru (wb phph));
>
> }
>
>
> pure nonempty sortset;

What does "pure" mean?

> boundxyzw: set;

What does prevent the user to declare bound variables on sorts for which
this should not happen, like wff or class? Is this what "pure" is for? I
have the feeling I could have seen something related to this question in
the thread, but I cannot find it any more, sorry...

> termwal (x: set) (ph: wff x): wff; prefixwal: A. 30;
>
>
> defwex(x: set) (ph: wff x): wff :=
>
> << ~A. x~ph>> (wn (wal x(wn ph))) {
>
> prefixwex: E. 30;
>
>
> theoremdf-ex: << E. xph<-> ~A. x~ph>>
>
> (wb (wex xph) (wn (wal x(wn ph))));
>
> }

[...]

> defcv(a: set): class := << {x| wel xa} >>
>
> (cab x(wel xa)) {

I do not like very much the fact that you can also use "direct" term
constructors in "human-readable" expressions, instead of passing by some
construct defined with "notation", "infix" or whatever. Could not this
be avoided by extending a little bit the scope of the local notations
introduced a few lines above, and changing some of them so they do not
clash? It would also simplify the parser, probably, because there would
be less rules to consider and less interactions to check when proving
unambiguity. [semi-cosmetic]

> coercioncv: set -> class;

This basically means that a set can automatically be converted to a
class by introducing a cv constructor, without it to be explicit in the
expression, right?

> I'm curious to hear what you all think of a project such as this.
> I'm especially interested in ways to simplify the grammar if we can
> retain readability.

Thank you for coming up with this proposal, which again I find very
interesting. I hope my comments can help to make it even better.

Giovanni.
--
Giovanni Mascellani <g.masc...@gmail.com>
Postdoc researcher - Université Libre de Bruxelles

signature.asc

David A. Wheeler

unread,
Feb 21, 2019, 2:55:12 PM2/21/19
to metamath, metamath
On Thu, 21 Feb 2019 16:04:55 +0100, OlivierBinda <olus....@gmail.com> wrote:
> I can relate to "make the metamath parser simple, unambiguous and efficient"
>
> I have just written an Antlr4 grammar for an old version of metamath.
> But the performance was so bad (it took minutes to parse 4000 set.mm
> statements, with some statements like

ANTLR4 is really neat.
However, unlike ANTLR3, they've changed to focus on a parse algorithm
that is easy to use, even if it can have terrible worst case performance.
Some discussion of their ALL(*) algorithm is here:
https://www.antlr.org/papers/allstar-techreport.pdf

It may be that a more traditional parsing approach would be
fine for "metamath zero". Many parsing systems build up a parse table.
It shouldn't be too hard to experiment with building parse tables as you go.
I'd start with LALR, simply because that's well-known and relatively general,
but that'd be my starting guess.

--- David A. Wheeler

Mario Carneiro

unread,
Feb 22, 2019, 3:51:13 AM2/22/19
to metamath
On Thu, Feb 21, 2019 at 9:43 AM André L F S Bacci <andrebac...@gmail.com> wrote:
On Thu, Feb 21, 2019 at 12:48 AM Mario Carneiro <di....@gmail.com> wrote:
Indeed, dealing with modularization seems pretty important and apparently caused problems for Raph, so perhaps I should give it more thought.

In a specification file such as this, we don't have to worry about proof dependencies, but we do have to be able to import axiom systems and syntax from another file. But now I'm worried about the possible security vulnurabilities this causes. A specification file is supposed to be self contained in the sense that if you know the language then it says what it means. If a specification file imports another, then the meaning of the first file depends on the meaning of that other file as well. It's even worse if the path is ambiguous in some way, and we don't see exactly what it's referring to. If the includes are relative to a path provided at the command line, then it's not even definite what file is being referred to.

One way to make sure the import is right is to reproduce the axioms and theorems that are expected to be imported. This way the verifier can double check that all the files are stitched together correctly. But that may well be the whole file or nearly so (theorems don't need to be imported, but axioms and definitions do). And that's a hell of a lot of boilerplate, and also obscures the actual content of the file if it starts with a lengthy recap section.

The specification file / proof file dichotomy is supposed to represent the distinction between *what* is being proven and *how* it should be proven. The proof file is a fine place for putting in instructions for how to find other files, but the specification files should be more focused on content and cross checks.

I will return to this topic in a bit with a more concrete syntax proposal.

I was thinking more in terms of development than modularization / meaning. The "correct" file to attest meaning and validation of proofs is the solid, self contained file, which in turn could be hashed / signed, (and so, proof files need a syntax to permit their coexistence on a self contained file).

Part of the purpose of this design is to make it so that the content of the proof file *doesn't matter at all*. So you don't need to hash it or anything like that to ensure correctness of anything. But I can see use in hashing the specification files themselves for linking purposes.

The `file` and `file.import` statements are only for the easy of development -- think the mega files of Metamath versus GitHub, and source control in general. In this context, `file` directives will be treated as comments, only utilized on spliting and joining files.

Modularization comes last, as it is better treated in syntax / language itself. May be a Python-like `import` statement.

I'm not that worried about this aspect. As I've suggested, this file will probably be the compiler output of another file, that is more suitable for proper code maintenance primitives.

So maybe the best solution is just to not have any file splitting primitives in this language at all. They get generated from some other language that has file splitting capabilities, and the result file is all stuck together.



On Thu, Feb 21, 2019 at 10:05 AM OlivierBinda <olus....@gmail.com> wrote:
b) if it is easy to make things computer-understandable (build a tree), it might easy (or not :/)to turn those things into human-friendly stuff (display, write and interact with it nicely) with another software layer that isn't in the language

c) writing things twice should be a big no no for any programmer and mathematician

d) even if you write things twice in your database, you'll still have to build a parser to handle human input anyway :)

I think that the double writing thing has been met with a resounding "no", even if it is machine generated and machine checked. So I will have to find another solution.

As for needing to build a parser anyway, that's true but it's not part of the verifier. The core of this design is a separation between the things that need to be checked by hand in order to know what a theorem says or why it will be checked correctly, and everything else. I'm assuming for the purpose here that "proofs are cheap"; that is, we have an oracle that can provide proofs for whatever we need, we just need to ensure that the specification part goes well.

As for parsing solutions, I am down to two possibilities at the moment.

1. Specify grammar rules as a CFG, and check for unambiguity via a deterministic pushdown automaton in the proof file.
2. Restrict the kinds of notations to a small collection of possibilities, i.e. infix operators, binders, and precedence, in such a way that nondeterminism or ambiguity isn't possible.

The advantage of (1) is that it's super general. The class of grammars accepted by this method are DCFGs (https://en.wikipedia.org/wiki/Deterministic_context-free_grammar), which includes all LR(k), LALR(k), LL(k), SLR and KLR grammars, including set.mm's grammar. Pretty much any grammar that is actually unambiguous can be expressed this way. However, there is a somewhat complicated proof condition, which will need to be encoded in the proof file. Luckily, this certificate can be checked in linear time; unluckily, the certificate is potentially exponential size in the grammar. However the edge cases where the certificate gets really large (it's proportional to the size of the finite state machine times the number of productions in the grammar) only happen for complicated and nearly-ambiguous grammars. Last I checked, the state machines generated for set.mm are reasonably sized, there are just a few extra states for disambiguating stuff like { <. <. x , y >. , z >. | ph } and { <. <. x , y >. , z >. }.

The downside of (1) is that it doesn't easily handle things like precedence and infix notations and so on. You have to have a whole bunch of nonterminals to encode it well, which bloats the other stuff. Unlike metamath, though, we don't need to have a one-to-one relationship between the CFG productions and syntax axioms, which makes it easier to support things like superfluous parentheses. I should investigate if there are good ways to combine direct CFG descriptions like BNF or a yacc file with simple additions like "add an infix operator now".

With (2), the range of possible syntax is much reduced, and probably many set.mm notations won't survive. On the other hand, there is no need for an unambiguity proof, and it might be that a basic support for infix operators and some precedence is really all we need for getting over that "readable math" hurdle. (It doesn't have to be perfectly general, as long as it is good enough that people get the idea without a significant decoding effort.)


On Thu, Feb 21, 2019 at 1:00 PM Giovanni Mascellani <g.masc...@gmail.com> wrote:
> nonempty sort wff;


If I am not wrong, all sorts in your example are defined to be nonempty.
So why isn't the keyword "sort" set to directly mean a "non-empty sort",
instead of having to specify it? Or else, what the use for a sort which
is not declared nonempty? [cosmetic]
 
All sorts defined in metamath are nonempty, so since this file is basically a translation from a metamath file all the sorts are going to be nonempty here. But it's a good question. One of the things baked into metamath as part of the foundation, which you don't even realize is important until you try to formalize the semantics, is the ability to forget about dummy variables. If I have a theorem |- P(x, y) involving free variables x and y, then in the course of the proof I am allowed to introduce a new variable z, disjoint from x and y, and use it in the proof. For bound variables this makes sense, we need a way to pick fresh variables. But because metamath natively doesn't distinguish any of the sorts, you can also do this with wff and class variables, and this is more strange. The definition of wtru is a prime example of this. In metamath it says

df-tru $a |- ( T. <-> ( ph <-> ph ) ) $.

but to pretend this as a definition doesn't make any sense without some additional assumptions: consider that before this axiom, there are no closed wff terms, so it might well be the case that there are no wffs at all - just knowing we can build ( ph -> ps ) and -. ph for any wffs ph and ps doesn't prove that there are any wffs. But after the "definition" above we can construct wffs like "T." and "-. T." and "( T. -> T. )" that have no variables, so now the collection of all wffs is provably nonempty.

What really happens is that metamath treats all these types as being inhabited by the variables, in addition to all the regular values constructed by the syntax constructors. This is most obvious for the sort "set", which has no constructors at all and yet is perfectly well usable with elements like "x" and "y". From a FOL perspective, this means that we need the sort to be nonempty for this move to make sense.

This leads to the question: What does a sort that's not nonempty look like? In short, it's a sort where "bound x" declarations are not legal. In such a sort you can't prove a theorem by introducing a dummy value of this sort and forgetting about it; you have to actually pick a value from the sort, constructed with the syntax constructors, or use an input variable. Another example is the following proof of id, the most general unifier for id1:

50::ax-1            |- ( ph -> ( ps -> ph ) )
51::ax-1             |- ( ph -> ( ( ps -> ph ) -> ph ) )
52::ax-2             |- ( ( ph -> ( ( ps -> ph ) -> ph ) ) -> ( ( ph -> ( ps -> ph ) ) -> ( ph -> ph ) ) )
53:51,52:ax-mp      |- ( ( ph -> ( ps -> ph ) ) -> ( ph -> ph ) )
qed:50,53:ax-mp    |- ( ph -> ph )


Notice that "ps" is a free variable in this proof, which does not appear in the final result. Metamath will actually accept this proof, although we prefer not to have unnecessary variables like this because it doesn't compress as well, and so the actual proof of id1 substitutes "ph" for "ps". If "wff" was not declared as nonempty, we would be forced to do this or some other substitution to "ps" for this to count as a valid proof.

> var ph ps ch: wff*;

I sort of understand what does the * mean. But why not introducing
another keyword to accompany "var" (maybe "open"?), instead of adding
the star, which is inhomogeneous with the rest of the syntax? [cosmetic]

This is true, and I was tempted by this suggestion; it would also be reasonable to write

open var ph ps ch: wff;

But actually I think it's not totally inhomogeneous. Specifically, the free variables of a wff variable appear after the sort in theorems, i.e. (ph: wff x y), and so I think having some kind of keyword at the end of "wff" is the right place to put this information. It's like declaring (ph: wff x y z w ...) with all the variables in the world there.

(Side note: this usage of "open" clashes with the lean keyword on which much of the syntax is based; there "open foo" means "open the namespace foo so that any definitions like 'foo.bar' can be referred to as just 'bar'". But I don't think a namespacing system is worth it for a pure specification language - it's more of a code maintenance primitive, and not worth the complexity it would bring.)
 
> term wi (ph ps: wff): wff; infixr wi: -> 25;

I like this way to specify the syntax for "human-readable" expressions
(and I also like very much that idea of introducing a
more-human-readable expression syntax over Metamath altogether). I like
even more the proposal that was made to only specify a partial priority
order, basically for two reasons: first, because it removes the problem
of deciding how much "priority space" one has to leave between two
different operators; second, because it makes operators coming from two
different sources automatically not comparable, instead of automatically
having a random priority order depending on the specific numbers chosen
by the two sources.

Right, the problem with precedence numbers is that you are always faced with the question "what number should I give this operator?" and the answer is always "who knows". It's a local specification as part of a global problem, and people have difficulty thinking in these terms. On the other hand, a precedence parser is one of the easier parsers to write, and it provides a lot of flexibility.

I also understand that in this setting the use of parentheses "(" and
")" to force priority is hardcoded in the language and cannot be
changed, say, to "[" and "]" by a specific file. It is not necessarily
bad, it is just different from Metamath, where the choice of "(" and ")"
is completely arbitrary. [cosmetic]

That's right. Personally I'm comfortable with that, and I expect most people coming from other languages will expect that as well.

Also, "infix" without the "r" is meant to be infix left? Or is there
also an "infixl"? In that case, what does "infix" mean? If "infix" is
already infix left, I would suggest to use "infixl" and leave out
"infix", for better symmetry. [cosmetic]

Actually "infix" means non-associative, so you have to bracket both associations. For things like "wel: set -> set -> wff" there isn't an associativity problem since the return type is different from the input type, but I guess "div: class -> class -> class" is an example of a nonassociative operator: you should never present a/b/c to a user because then they have to guess what association is being used. Maybe "wb: wff -> wff -> wff" is another good example: ph <-> ps <-> ch is just plain confusing, even though by a quirk of truth tables <-> is actually an associative operator.
 
> term wn (ph: wff): wff; prefix wn: ~ 40;
>
> axiom ax-1: << ph -> ps -> ph >>
>
> (wi ph (wi ps ph));

I am among those that do not like these double expressions. I do not
like in general when the same file contains some data that is mean to be
hand-written and some data that is meant to be auto-generated. I would
definitely prefer that the verifier is required to parse mathematical
expressions and check them, so that the file is entirely hand-written.
The proposal to include a proof of the grammar unambiguity in the proof
file sounds reasonable to me.

The above comment calls for eliminating the need for chevrons, but if
they are to stay, then I would suggest replacing them with some other
syntax, in order to make less confusion with arrows and leave them
available for the math syntax (where they can mean "much greater than").
I would propose "$$", which seems to be less likely to be used in maths.
[cosmetic]

Actually, I think that $ ph -> ps -> ph $ would work quite well, now that I come to think about it, because mathematicians are already used to it - it's the latex inline math separator. "$$" works too, but that's for display math, which is not what we are looking at here.

But if a parser is required as part of the operation, then perhaps we don't need sentinels at all, although in this case other tokens that are currently being used as meta operators like "->" and ";" can't be part of inline math.

> theorem a1i: << ph >> ph -> << ps -> ph >> (wi ps ph);

You do not specify what is the proof format. It is ok for me to store it
in another file (actually, I find it positive, in that it goes in the
direction of separating hand-written and generated content) and I do not
need the details of the encoding, but at least I would like to know what
is the abstract format. In Metamath the abstract format is the
substitution, but I believe something has to change in Metamath Zero,
since the abstract expression representation has changed too (Metamath
deals with partially with symbols strings and partially with their
parsing tree, although without guaranteeing that it is unique, while
Metamath Zero seems to be dealing with
parsing-trees-plus-bindings-information; is this correct?).

In the spec, it will officially be "implementation dependent but here's a reasonable option". The design of the language is such that you can still use an essentially metamath-like proof checking process. You can use either trees or strings to represent the formulas (where the strings are just the trees laid out in prefix polish notation), and you perform direct substitution to apply a theorem. If we didn't have the variable declarations, we could write a1i in standalone form as:

theorem a1i (ph ps: wff): $ ph $ -> $ ps -> ph $;
 
So you can view this as a function that takes three arguments, ph, ps, and a proof of ph, and produces a proof of ps. So for example the s-expression (a1i wtru ph tru) represents a proof of $ ph -> T. $.

Theorems that bind variables in the proof don't have these variables appear when applying them, but they are available for use in the theorem's proof. The theorem statement in the specification file need not declare these bound variables, since they depend on the particular way in which the theorem is proven.

DV conditions are tracked just like in metamath; they are just declared in a different way. If you have

theorem ax-5 (x: set) (ph: wff): $ ph -> A. x ph $;

then ph is not depdending on x because it's not (ph: wff x), so when the theorem is applied we check that the substitution for ph does not depend on x. Unlike metamath we only permit DV conditions as resulting from bound variables and their interaction with other variables, so you will only get the equivalent of $d x ph $. and $d x y $. Of course that's the only kind of DV condition that appears in set.mm anyway.

The reason for this halfway point with bound variables kind of tracked but not really is because I want a smooth translation to HOL as well. A variable (ph: wff) is just a truth value, while (ph: wff x) is an open term depending on x, which we might view as (ph: set -> wff) where ph is replaced by "ph x" everywhere in the theorem. A binder like "A. x ph", declared as "wal (x: set) (ph: wff x): wff", is a binary operator from metamath's perspective, and a binding operator, of type (set -> wff) -> wff, from HOL perspective. So "wal x ph" gets translated first to "wal x (ph x)" and then "wal (\x -> ph x)" and now we have a proper HOL term.

I like very much this syntax for definitions. I like the fact that the
soundness check is built into the language, and I like the idea that the
definition is "valid" only for a limited scope, and has to be used via
theorems afterwards. This, if correctly used, enables to separate the
axioms (or "interface") of a theory from its foundational construction
("implementation"), which is what in Metamath is done manually (and,
again, requiring a lot of external verification) for number sets.
Ideally this enables users to "rebase" theories on different
foundations, as long as the definition block does not let foundational
details escape.

I believe that the definition is somehow materialized as an additional
step available to the proof file for theorems appearing in the
definition block. Can you give details on this? Also, what is required
from the proof file to guarantee that the definition is sound?

In the proof file, the definition appears already pre-substituted into the theorem. So if the specification appears as:

  def wb (ph ps: wff): wff := $ ~((ph -> ps) -> ~(ps -> ph)) $ {

    infix wb: <-> 20;


    theorem bi1: $ (ph <-> ps) -> ph -> ps $;

    theorem bi2: $ (ph <-> ps) -> ps -> ph $;

    theorem bi3: $ (ph -> ps) -> (ps -> ph) -> (ph <-> ps) $;

}


then the proof file will see proof obligations:

proof bi1: $ ~((ph -> ps) -> ~(ps -> ph)) -> ph -> ps $ := ...;

proof bi2: $ ~((ph -> ps) -> ~(ps -> ph)) -> ps -> ph $ := ...;

proof bi3: $ (ph -> ps) -> (ps -> ph) -> ~((ph -> ps) -> ~(ps -> ph)) $ := ...;


If the definition includes dummy variables (i.e. "bound" quantifiers), then those dummy variables will be renamed in the substitution. (Note that this doesn't require any smart substitution - it just replaces the variables with new names and leaves everything else alone. Dummies have to be disjoint from everything else anyway, so this doesn't cause a problem.) Example:

  def wtru (bound p: wff): wff := $ p <-> p $ {

    notation wtru := $ T. $;

    theorem df-tru: $ T. <-> (ph <-> ph) $;

  }

  def weu (x: set) (ph: wff x) (bound y: set): wff := $ E. y A. x (ph <-> x = y) $ {

    prefix weu: E! 30;

    theorem df-eu (x y: set) (ph: wff x): $ E! x ph <-> E. y A. x (ph <-> x = y) $;

  }


proof file:

proof df-tru (ph ps: wff): $ (ps <-> ps) <-> (ph <-> ph) $ := ...;

proof df-eu (x y z: set) (ph: wff x):

$ E. z A. x (ph <-> x = z) <-> E. y A. x (ph <-> x = y) $ := ...;


> defwa(phps: wff): wff := << ~(ph-> ~ps) >> (wn (wi ph(wn ps))) {
>
> infixwa: /\ 20;
>
> theoremdf-an: << (ph/\ ps) <-> ~(ph-> ~ps) >>
>
> (wi (wa phps) (wn (wi ph(wn ps))));
>
> }
>
>
> defwtru(boundp: wff): wff := << p<-> p>> (wb pp) {
>
> notationwtru:= << T. >>;

I guess "notation" is another syntax constructor for the
"human-readable" expressions, isn't it? So it is on the same level with
"prefix", "infix" and "infixr", except that these latter are somewhat
more "smart".

Yes, that's right. They are all in the same logical grouping, and all of them take a local modifier and so on. "notation" is just the catch all version, useful for zero-ary constructors like this one, and also more complicated notations, if we go the "arbitrary CFG" route.

> pure nonempty sortset;

What does "pure" mean?

I'm not sure if it's necessary to add this in, but it's useful for translation to metamath. "pure" means that there will be no syntax constructors for this type, so it is not necessary to translate this sort into two metamath sorts (one for the variables and one for the expressions) because there are no expressions. If the verifier sees this the only thing it needs to do is disallow "term" constructors targeting this sort. I think it will be useful to explicitly declare this intent, since it is not completely obvious that if you add a term constructor to the "set" sort in metamath you can derive a contradiction.
 
> bound x y z w: set;

What does prevent the user to declare bound variables on sorts for which
this should not happen, like wff or class? Is this what "pure" is for? I
have the feeling I could have seen something related to this question in
the thread, but I cannot find it any more, sorry...

The thing that prevents the user from declaring bound variables is (the lack of) the "nonempty" modifier on the sort. So as I've defined it you actually can declare bound variables that are wffs and classes. However, this on its own only has the effect that you can forget dummies of this sort, it doesn't give you the ability to quantify internally over them. For that, you need a syntax constructor which binds a wff or class variable, and there aren't any of those in the file. Dual to the "pure" modifier, we could have a "strict" modifier on sorts to say that there should be no variable sort generated for this sort in metamath, and this comes with an attendant restriction that you can't declare a binding operator like

term all_class (A: class) (ph: wff A): wff;

that would make the theory more like MK set theory.
 
> def cv (a: set): class := << {x | wel x a} >>
>   (cab x(wel xa)) {

I do not like very much the fact that you can also use "direct" term
constructors in "human-readable" expressions, instead of passing by some
construct defined with "notation", "infix" or whatever. Could not this
be avoided by extending a little bit the scope of the local notations
introduced a few lines above, and changing some of them so they do not
clash? It would also simplify the parser, probably, because there would
be less rules to consider and less interactions to check when proving
unambiguity. [semi-cosmetic]

The idea is that if there is no notation for a term constructor, then this prefix form is still available. In particular, that means that you can always write s-expressions if you really want to, and for complicated term constructors (say, with 5+ arguments) this may be the cleanest way to actually write them. I don't think a fancy infix notation should be required for things where a simple prefix notation works fine.

I'm not exactly sure what to do in the case of overlapping notations, but my instinct is to disallow it entirely. In this case we have one binary operator "e." that is being repurposed to mean three different things throughout the file. You don't want the computer guessing which one you mean, and especially for theorems where you have to talk about two or more of them at the same time, like df-cv:

theorem df-cv: $ a e. b <-> wel a b $;
 
you should be forced to write the expressions in an unambiguous way.

It is up to the person writing the specification file whether they want to have a bunch of nonoverlapping global notations, metamath style, or scope things more locally. The prefix notation is a backup when you don't have a notation for the constructor at the moment (or don't want to be bothered defining one).

> coercioncv: set -> class;

This basically means that a set can automatically be converted to a
class by introducing a cv constructor, without it to be explicit in the
expression, right?

This is another "notation family" command. It's just setting up the production "class -> set" in the CFG, or in metamath lingo, it's the notation associated to the term constructor $a class x $.

On Thu, Feb 21, 2019 at 2:55 PM David A. Wheeler <dwhe...@dwheeler.com> wrote:
It may be that a more traditional parsing approach would be
fine for "metamath zero".  Many parsing systems build up a parse table.
It shouldn't be too hard to experiment with building parse tables as you go.
I'd start with LALR, simply because that's well-known and relatively general,
but that'd be my starting guess.

The neat thing about the parsing option (1) I laid out is that the class of all DCFGs is way larger than LALR and all the others, and you still get linear time parsing. It's exploiting the oracular nature of the proof file to have the whole parser already built and ready for use, and it can even help with verifying the correctness of the parser. But this check will be somewhat separate from the rest of the verification process - it's not a regular proof and can't be expressed as a simple proof obligation like all the theorems - which makes the checker larger and makes correctness more involved.

Mario

David A. Wheeler

unread,
Feb 22, 2019, 10:39:23 PM2/22/19
to metamath, metamath
On Fri, 22 Feb 2019 03:50:57 -0500, Mario Carneiro <di....@gmail.com> wrote:
> As for parsing solutions, I am down to two possibilities at the moment.
>
> 1. Specify grammar rules as a (context-free grammar) CFG, and check for unambiguity via a
> deterministic pushdown automaton in the proof file.
> 2. Restrict the kinds of notations to a small collection of possibilities,
> i.e. infix operators, binders, and precedence, in such a way that
> nondeterminism or ambiguity isn't possible.
>
> The advantage of (1) is that it's super general...
> However, there is a somewhat complicated proof
> condition, which will need to be encoded in the proof file. Luckily, this
> certificate can be checked in linear time; unluckily, the certificate is
> potentially exponential size in the grammar.

I think you should seriously think about "1.5" (something between).

Supporting general deterministic context-free grammars (DCFGs)
is certainly powerful, as noted in:
https://en.wikipedia.org/wiki/Deterministic_context-free_grammar

However, I don't see any reason you need the full generality of
an arbitrary DCFG. Instead, seriously think about picking a more restrictive
subset of DCFGs such as LALR(k) or LL(1). I suspect you could work quite
happily within LALR(1) at least; most programming languages are
easily parsed as LALR(1), and you can *choose* the syntax.

If you pick a common restrictive subset like LALR(1), not only is the algorithm
much simpler, but in many programming languages there is
a trivially-available library or tool that does all the work anyway.
Subsets like LALR(1) or LL(1) have been *extensively* studied,
and anyone who's had a compiler course has already used them,
so you also get to build on well-understood work.
Handling precedence in these subsets, for example, is trivial & well-understood.

You can also borrow a trick from ANTLR to make the tool more user-friendly:
directly convert left (or right recursion) if it's expressed within the rule itself.
First, let's review the problem. (Mario, I know you understand the
left/right recursion issue; I'm explaining some details so everyone else can follow along.)
As noted in many places like https://lambda.uta.edu/cse5317/notes/node21.html
In LALR(1) the opposite or LL(1) is true: left recursion is good, right recursion is bad.
E.g., this is typically a bad rule for LALR(1) parsers:

L ::= id , L
| id

"which captures a list of ids separated by commas. If the FOLLOW of L (or to be more precise, the expected lookaheads for L ::= id) contains the comma token, we are in big trouble. We can use instead:"

L ::= L , id
| id
LL(1) parsers have the opposite problem: You need to rewrite the other way, to use the
first rule form and not the second rule form.

What the ANTLR folks realized was that the common case (shown above) is really
easy to automatically rewrite just like a human would do it. If you do that, the tool
is MUCH easier to use correctly, yet the implementation code to do it is trivial.

You could still have special shorthand to add rules like your "possibility 2",
but they could just be shorthand for rules you *could* spell out directly.

The unambiguity proof is then trivial: Generate the parse table (as normal)
as you go, and if there's a conflict (e.g., shift/shift, shift/reduce, or reduce/reduce),
then the rule that created the ambiguity is in error.
In this case precedence rules might be a good thing - they'll eliminate many kinds
of structures that would otherwise be an ambiguity.

--- David A. Wheeler

Mario Carneiro

unread,
Feb 22, 2019, 11:00:59 PM2/22/19
to metamath
I think you misunderstand. If the name of the game is that the oracle provides a parser and I check that it works, then the class of accepted grammars is all DCFGs. I don't have to do anything special for this to happen. Restricting to LALR(k) or LL(1) doesn't make this process any easier, it just makes the oracle's job easier, and I don't care about that. (From the oracle's point of view, i.e. the one coming up with the parser and its proof, it may well be worth it to restrict its attention to LL(1) grammars or some other more manageable subset, but that question doesn't affect the code of the checker or the specification language at all.) Essentially, it's the difference in power between P and NP problems - the class of problems checkable by a verifier in polynomial time is the class NP, which is much larger than P, which is what the checker would be limited to if it had to invent the proof itself.
 
You can also borrow a trick from ANTLR to make the tool more user-friendly:
directly convert left (or right recursion) if it's expressed within the rule itself.
First, let's review the problem.  (Mario, I know you understand the
left/right recursion issue; I'm explaining some details so everyone else can follow along.)
As noted in many places like https://lambda.uta.edu/cse5317/notes/node21.html
In LALR(1) the opposite or LL(1) is true: left recursion is good, right recursion is bad.
E.g., this is typically a bad rule for LALR(1) parsers:

L ::= id , L
    | id

"which captures a list of ids separated by commas. If the FOLLOW of L (or to be more precise, the expected lookaheads for L ::= id) contains the comma token, we are in big trouble. We can use instead:"

L ::= L , id
    | id
LL(1) parsers have the opposite problem: You need to rewrite the other way, to use the
first rule form and not the second rule form.

What the ANTLR folks realized was that the common case (shown above) is really
easy to automatically rewrite just like a human would do it.  If you do that, the tool
is MUCH easier to use correctly, yet the implementation code to do it is trivial.

You could still have special shorthand to add rules like your "possibility 2",
but they could just be shorthand for rules you *could* spell out directly.

The unambiguity proof is then trivial: Generate the parse table (as normal)
as you go, and if there's a conflict (e.g., shift/shift, shift/reduce, or reduce/reduce),
then the rule that created the ambiguity is in error.
In this case precedence rules might be a good thing - they'll eliminate many kinds
of structures that would otherwise be an ambiguity.

The part of the process I want to avoid in the checker is the generation of the parse table. You have to discover the finite state automaton as you go, and it's easier to just be given an automaton to start with.

I have a more concrete proposal for option 2 as well. The code for a precedence parser (https://en.wikipedia.org/wiki/Operator-precedence_parser) is simple, linear time, requires no parse table generation, and supports infix operators, precedence ordering, and associativity. It can also be adapted for including prefix operators like s-expressions, as well as a partial precedence ordering, although it looks like constructing a partial order data structure is O(n^3) in the worst case (where n is the number of infix operators in the grammar), probably better for sparse graphs (which I expect the precedence ordering will be).

Thierry Arnoux

unread,
Feb 23, 2019, 3:41:45 AM2/23/19
to meta...@googlegroups.com
On 22/02/2019 00:50, Mario Carneiro wrote:

> I think that the double writing thing has been met with a resounding
> "no", even if it is machine generated and machine checked. So I will
> have to find another solution.
>
Why don't you make this double writing part of the file containing the
proofs? That's the one that has to be easily verifiable, right?

So you'd have:

- one human readable file listing the syntax, definitions, axioms and
theorems, not easily verifiable/parse-able by itself but fully
human-readable.

- one computer generated file providing the s-expressions and the
proofs. This file is to be easily verifiable, for both proofs and syntax
statements.

One could still use the second file (with the s-expressions) to verify
that the first one (with just the syntax) is valid. You anyway already
need it to verify that the theorems have valid proofs, right?

The human readable file may actually be re-generated from the second
file, as a way to prove it.

But maybe I did not fully understand what you try to achieve here?

_
Thierry

Mario Carneiro

unread,
Feb 23, 2019, 4:47:25 AM2/23/19
to metamath
The important requirement is that the proof file cannot affect the meaning of the statements in any way. If I used a metamath approach, where the math statements are human readable (ish) but have no given parse, and the proofs provide the parses for the math statements, then what I am actually verifying is that *there exists* a parse for the expression such that the statement is provable. If the grammar is ambiguous, this is very bad, because it means that an "antagonistic oracle" could provide whichever parse for the statement is the unexpected one or the trivial one.

So the intent of the s-expressions in the specification file is so that the specification file itself pins down the parses completely, so that this kind of trickery isn't possible. But as David has pointed out, since the s-expressions are quite hard to read, the security benefits they provide, showing the user exactly what is being proved, are mostly negated if the expression is so hard to read that users just ignore it in favor of the possibly ambiguous pretty printed version.

As far as which parts are easily verifiable, I haven't been focusing on that too much but I think all of it should be easily verifiable. The proof file should provide enough information that verifying it is as simple as following the instructions given directly in the file. So it should be possible to achieve roughly linear time verification.

Mario

Giovanni Mascellani

unread,
Feb 23, 2019, 12:48:36 PM2/23/19
to meta...@googlegroups.com
Dear Mario,

thank you very much for your answers and the interesting discussion. I
have a few more comments, in the hope they will be useful.

Il 22/02/19 09:50, Mario Carneiro ha scritto:
> I'm not that worried about this aspect. As I've suggested, this file
> will probably be the compiler output of another file, that is more
> suitable for proper code maintenance primitives.
>
> So maybe the best solution is just to not have any file splitting
> primitives in this language at all. They get generated from some other
> language that has file splitting capabilities, and the result file is
> all stuck together.

I am not sure I have understood the discussion on modularization: I
believe that properly supporting modularization is important and that it
should be something more clever than C's #include. Something like
Python's import would be much better (I have myself been thinking for a
while on a different format proposal for Metamath; not seriously enough
to actually have a real thing, but Python-styled modularization was one
of the points I had in mind; also OpenTheory is built along the same
lines, if I remember correctly).

However, I would not be able to comment more in depth so far, so I will
wait for a more complete proposal by Mario. Except for one point: I
would like that for the sake of verifying the proof of a specific module
there would be no need to know the proofs for the modules the first
module depends on. But I believe this is already one of the firm points
by Mario.

> As for parsing solutions, I am down to two possibilities at the moment.
>
> 1. Specify grammar rules as a CFG, and check for unambiguity via a
> deterministic pushdown automaton in the proof file.
> 2. Restrict the kinds of notations to a small collection of
> possibilities, i.e. infix operators, binders, and precedence, in such a
> way that nondeterminism or ambiguity isn't possible.

I have already casted a tentative vote for number 1, but I am not as
sure any more. A problem I now see with 1 is that the automaton that
verifies a CFG depends globally on that grammar. This makes
modularization more difficult: introducing a new syntax in a
dependencies requires recomputing the automata for all the modules that
use the dependency, even if they do not use the new syntax. Each time
you include a new module you need to recompute the automaton. This seems
to make practical usage of modules quite complicated. Proposal 2 does
not have this problem, it seems.

Regarding proposal number 2, it seems pretty likely that it wouldn't be
able to handle the grammar currently described in set.mm. However, I
would like to understand if we can support enough features so that a few
notations that are usual in mathematics do not get lost. For example, a
reasonable notation for "limit-type" operators (i.e., limits, integrals,
summations, ...) and for class abstractions. So for example we could add
the provision that derivations of the form "{" SOMETHING_INSIDE "}" are
allowed, provided that tokens "{" and "}" do not appear anywhere else in
the grammar. Or derivations like "integral_sign" EXPRESSION are allowed
with the additional rule that EXPRESSION is as short as possible. I am
not sure these rules actually preserve unambiguity, I am not really an
expert in parsing. It is just a little brainstorming.

> The thing that prevents the user from declaring bound variables is (the
> lack of) the "nonempty" modifier on the sort. So as I've defined it you
> actually can declare bound variables that are wffs and classes. However,
> this on its own only has the effect that you can forget dummies of this
> sort, it doesn't give you the ability to quantify internally over them.
> For that, you need a syntax constructor which binds a wff or class
> variable, and there aren't any of those in the file. Dual to the "pure"
> modifier, we could have a "strict" modifier on sorts to say that there
> should be no variable sort generated for this sort in metamath, and this
> comes with an attendant restriction that you can't declare a binding
> operator like
>
> term all_class (A: class) (ph: wff A): wff;
>
> that would make the theory more like MK set theory.

I tend to think that I would like to have both "pure" and "strict", and
to defined set as pure and wff and class as strict. It seems to me that
the principle of restricting all that does not have a reason to be
allowed finds good application here.

> The idea is that if there is no notation for a term constructor, then
> this prefix form is still available. In particular, that means that you
> can always write s-expressions if you really want to, and for
> complicated term constructors (say, with 5+ arguments) this may be the
> cleanest way to actually write them. I don't think a fancy infix
> notation should be required for things where a simple prefix notation
> works fine.

I am not completely convinced. In a sense I would like to separate
completely the human-readable notation and the names used in the parsing
tree. However, I can perfectly live with it.

> I'm not exactly sure what to do in the case of overlapping notations,
> but my instinct is to disallow it entirely. In this case we have one
> binary operator "e." that is being repurposed to mean three different
> things throughout the file. You don't want the computer guessing which
> one you mean, and especially for theorems where you have to talk about
> two or more of them at the same time, like df-cv:
>
> theoremdf-cv: $ ae. b<-> wel ab$;
>  
> you should be forced to write the expressions in an unambiguous way.
>
> It is up to the person writing the specification file whether they want
> to have a bunch of nonoverlapping global notations, metamath style, or
> scope things more locally. The prefix notation is a backup when you
> don't have a notation for the constructor at the moment (or don't want
> to be bothered defining one).

As I understand it, for both signs "=" and "e." only the class-class
version is used except in the foundation part of the file, isn't it? So
what I would do is reserve the names "=" and "e." for that version, and
invent some other variations like "=_" or "e._" for the set versions,
and of course declare them locally to a context that exists only as long
as the foundations have to be set up. And I would only use those
syntaxes, not the explicit s-expression one.

However, in the end it is a matter of tastes, and it doesn't really
bother me if you prefer something else.

Thank you again, also for the comments I have not replied again (meaning
that I am happy with the explanation you already gave).
signature.asc

Mario Carneiro

unread,
Feb 24, 2019, 4:19:17 AM2/24/19
to metamath

To help you see where I am going with this, consider the idea of formalizing all of this - the specification language and the verifier. This project is actually borne of a more abstract problem, to define what is a compiler-verifier and what is needed to ensure correctness of a program in a way that admits "bootstrapping". I've attached a PDF which describes some of my attempts to wrap my head around all this. It's at a more abstract level (I wrote it before I started working on metamath zero), but the idea of this project is to define a simple language that can be the lowest level, a language that can prove itself. It's unpublished and unfinished but it will probably help with seeing why I'm so focused on the separation of concerns between the proof file and the specification file.

To put this project in the context of that paper, here's the first line:

> Fix an input language L and output set M, and let R C_ L X. M . A compiler-verifier for R is a program P such that given inputs s, x, if s e. L, then either P(s,x) does not terminate or indicates failure, or (s, P(s,x)) e. R .

In this case, L is the specification of Metamath Zero. It describes what is a well formed specification file. M is a syntax for executable files of some sort (I haven't talked about how the compiler part would work yet, let's ignore it for now, and assume M = {""}, i.e. the compiler produces no output), and R is the interpretation function. With M trivialized, this is basically just a subset of L, and it describes the "correct" specifications. This is the really important bit. For metamath zero, (s, "") e. R means that all the theorems in the file are provable from the axioms given in the file.

The program P is the metamath zero verifier. It is given inputs s, the specification file, and x, the proof file, and is required to either report failure, or return an executable file P(s,x) (in this case just "") such that (s, P(s,x)) e. R, i.e. the theorems in the specification file are true.

Now, to the questions:

On Sat, Feb 23, 2019 at 12:48 PM Giovanni Mascellani <g.masc...@gmail.com> wrote:
Il 22/02/19 09:50, Mario Carneiro ha scritto:
> So maybe the best solution is just to not have any file splitting
> primitives in this language at all. They get generated from some other
> language that has file splitting capabilities, and the result file is
> all stuck together.

I am not sure I have understood the discussion on modularization: I
believe that properly supporting modularization is important and that it
should be something more clever than C's #include. Something like
Python's import would be much better (I have myself been thinking for a
while on a different format proposal for Metamath; not seriously enough
to actually have a real thing, but Python-styled modularization was one
of the points I had in mind; also OpenTheory is built along the same
lines, if I remember correctly).

With the language above, it's easy to see that there is a partial ordering on specification files, where s1 <= s2 if (s1, m) e. R -> (s2, m) e. R for all m. (This is read "s1 is stronger than s2".) So for instance if s2 is obtained from s1 by deleting theorems, and then deleting any unused definitions, then s1 <= s2.

We can thus minimize a specification file by deleting theorems unused definitions; this can allow you to craft a specification file that is "tailored" to the particular big theorem you want to get to. It can be thought of as a self contained presentation format for a theorem that you want to convince someone of. You will always have to include all axioms and primitive syntax constructors, but these should usually be O(1) (they are basically the contents of the file I posted, in the case of set.mm style ZFC).

What does this have to do with modularization? Well, I think that these specification files won't actually need to be that big - a specification file for all of set.mm will of course be quite large to accomodate all the theorems, but most of them won't be needed for stating a theorem. For example if I wanted to show off the prime number theorem to someone, I would need to include the ZFC axioms, all the definitions in the hierarchy of definitions leading to the statement of the theorem, and the statement of pnt. I would not need to include any theorems other than pnt itself. This is exactly what you need in order to explain the meaning of the statement, and no more.

With this in mind, I think that having file splitting primitives would make the definition of the relation R much harder, as it would need to refer not just to s but to all files referenced by s. Even figuring out what the statement of the theorem is might lead to a nonterminating computation - imagine if there was a naughty fileserver that decides to feed me a sequence of files like so:

include "file0";
theorem bad: $ T0 $;

file(n).mh:
include "file(n+1)";
def T_n: wff := $ T_n+1 $ {
  theorem T_n_def: $ T_n <-> T_n+1 $;
}

In order for me to know what the interpretation of theorem bad is, I have to consult "file0", which refers to "file1" and so on, as long as the server is willing to field my requests. (I'm assuming the server is generating these files parametrically, they aren't actually stored anywhere.) I'll never finish, and so (spec.mh, "") e. R is undefined.

Of course, at this extreme there are similar problems even with a single file. How do you know that the file that you viewed is the same as the file that is fed to the program? What if the file that the program reads is neverending by some OS filesystem tricks? In this case it's not clear how to view this program as an idealized turing machine, when it's being fed input data in a complicated way that causes interactions with concurrent untrusted computations like the OS piping system. I realize this all sounds super paranoid, but I'm trying to figure out here *exactly* what we are assuming when we say we have verified a theorem, and to what degree we can control the existing risks.

Long story short, file splitting is hard to model without some strong assumptions, and so if I can avoid it I will. This specification file can be generated from another similar language that is more suited to practical usage, and file splitting can go in that language.

However, I would not be able to comment more in depth so far, so I will
wait for a more complete proposal by Mario. Except for one point: I
would like that for the sake of verifying the proof of a specific module
there would be no need to know the proofs for the modules the first
module depends on. But I believe this is already one of the firm points
by Mario.

One place where file splitting may be necessary for practical reasons is in splitting proofs when verification becomes complex enough to require multiple runs of the verifier. As I've suggested above, this is a rather difficult problem, but it can still be expressed with simple (not split) specification files. You can express a cut as a pair of specification files where one has some theorems and the other has axioms with matching statements. In this case you can verify each part individually, and then you need a "split verifier" that checks that the axioms of one indeed match the theorems of the other, and can spit out the composite file (or checks the composite against a third input file). Let's put that idea under "future work" for now.
 
> As for parsing solutions, I am down to two possibilities at the moment.
>
> 1. Specify grammar rules as a CFG, and check for unambiguity via a
> deterministic pushdown automaton in the proof file.
> 2. Restrict the kinds of notations to a small collection of
> possibilities, i.e. infix operators, binders, and precedence, in such a
> way that nondeterminism or ambiguity isn't possible.

I have already casted a tentative vote for number 1, but I am not as
sure any more. A problem I now see with 1 is that the automaton that
verifies a CFG depends globally on that grammar. This makes
modularization more difficult: introducing a new syntax in a
dependencies requires recomputing the automata for all the modules that
use the dependency, even if they do not use the new syntax. Each time
you include a new module you need to recompute the automaton. This seems
to make practical usage of modules quite complicated. Proposal 2 does
not have this problem, it seems.

This is true. For option 1 I have waffled between having a big "notation { }" section at the start that declares all the notation needed for the whole file, and would be matched by an automaton proof in the proof file, and allowing the notation declarations to be scattered throughout the file, where they logically belong, and then the file has to be read in two passes, one to accumulate the notations and check them, and another to read the statements using the parser. As for modularization, you would be forced to make a new parser, with a new automaton from scratch, for each module that introduces new notation or combines previous modules that are incomparable in dependency order.
 
Regarding proposal number 2, it seems pretty likely that it wouldn't be
able to handle the grammar currently described in set.mm. However, I
would like to understand if we can support enough features so that a few
notations that are usual in mathematics do not get lost. For example, a
reasonable notation for "limit-type" operators (i.e., limits, integrals,
summations, ...) and for class abstractions. So for example we could add
the provision that derivations of the form "{" SOMETHING_INSIDE "}" are
allowed, provided that tokens "{" and "}" do not appear anywhere else in
the grammar. Or derivations like "integral_sign" EXPRESSION are allowed
with the additional rule that EXPRESSION is as short as possible. I am
not sure these rules actually preserve unambiguity, I am not really an
expert in parsing. It is just a little brainstorming.

One relatively easy class of unambiguous expressions is prefix expressions. These have the form

my_expr v1 c1 c2 v2 c3 (v3 : prec)

where "my_expr" is unique and you can use any sequence of constants and variables thereafter. The variables will have sorts appropriate to the syntax, and the each variable is associated with a precedence. Here v1 and v2 get the precedences declared for c1 and c3 respectively (these must be "infixy constants" which have declared precedences).

The operator precedence parser I mentioned needs some partially ordered set of precedence levels, which are usually taken to be numbers, but these levels need not correspond one to one to sorts, syntax constructors, or operator symbols. They may be declared on their own. Here's a possible syntax based on that breakdown:

precedence plus mul: left;
precedence div: none;
precedence imp: right;
precedence and or: left;
precedence not bi: none;
precedence_order (imp bi) < (and or) < not < plus < (mul div);

infix $+$: plus := \(A B: class), co cadd A B;
infix $*$: mul := \(A B: class), co cmul A B;
infix $/$: div := \(A B: class), co cdiv A B;
infix $->$: imp := wi;
infix $<->$: bi := wb;
infix $/\$: and := wa;
infix $\/$: or := wo;
prefix $-.$: not := wn;
notation (x: set) (ph: wff x): $A.$ (x: max) (ph: 0) := wal x ph;

This declares a bunch of infix operators such that + and * are left associative, / is non-associative, * and / are not comparable (so a*b/c and a/b/c don't work), arithmetic has higher precedence than logic, and/or are not comparable but they are both individually left assoc, -> is right assoc, <-> is nonassoc and not comparable with ->, etc. The binder A. is interesting because it is defined using the extended prefix definition I described above; worth noting is that x is a variable followed by a variable so it must have max precedence, and ph can have any precedence but I've given it precedence bottom (aka 0) which means that it will extend until the next close parenthesis. (This is actually a bit different from metamath precedence, where "( A. x ph -> A. x ps )" means wi (wal x ph) (wal x ps) instead of wa x (wi ph (wal x ps)), which would be written A. x ( ph -> A. x ps ). I think metamath's precedence is the less usual one, but I could be wrong.)

I'm trying to be consistent with the declaration of variables, but it's not really necessary to put the types in the variables here, since they are implied by the RHS. But probably these expressions should be typechecked here, at the point of declaration, rather than after they have been put together into a syntax tree.

Unfortunately, it looks like the rules and restrictions are getting a bit complicated. Of course they need to be spelled out in detail in a spec, but I also want to make sure that this doesn't get so complicated that people can't follow how it works, because then we're back to square one with a system that can sneak things past you because it's too complex for you to understand.

> Dual to the "pure"
> modifier, we could have a "strict" modifier on sorts to say that there
> should be no variable sort generated for this sort in metamath, and this
> comes with an attendant restriction that you can't declare a binding
> operator like
>
> term all_class (A: class) (ph: wff A): wff;
>
> that would make the theory more like MK set theory.

I tend to think that I would like to have both "pure" and "strict", and
to defined set as pure and wff and class as strict. It seems to me that
the principle of restricting all that does not have a reason to be
allowed finds good application here.

I think you are right. There is another sort modifier I think I forgot, which has to do with the "|-" metamath typecode. Currently, there is nothing restricting the sort of hypotheses and theorem statements, but in set.mm of course these all have to be valid wffs (this isn't checked, but it's true). This is a metatheorem that follows from the fact that every declared axiom has a type which is a valid wff. So probably we need another modifier "provable" to express this, so that with

  strict provable nonempty sort wff;
  strict nonempty sort class;
  pure nonempty sort set;

the declaration

  axiom nonempty_intro (A B: class): $ A e. B $ -> $ B $;

is rejected.
 
> The idea is that if there is no notation for a term constructor, then
> this prefix form is still available. In particular, that means that you
> can always write s-expressions if you really want to, and for
> complicated term constructors (say, with 5+ arguments) this may be the
> cleanest way to actually write them. I don't think a fancy infix
> notation should be required for things where a simple prefix notation
> works fine.

I am not completely convinced. In a sense I would like to separate
completely the human-readable notation and the names used in the parsing
tree. However, I can perfectly live with it.

With reference to the above discussion of prefix notations, it is possible to have a default declaration like

notation: $foo$ (A: max) (B: max) (ph: max) := foo A B ph

for any syntax axiom that does not have a notation explicitly declared for it. So it's possible to support in conjunction with everything else, although that doesn't address the question of whether this is a good idea to begin with, as you point out.
 
> I'm not exactly sure what to do in the case of overlapping notations,
> but my instinct is to disallow it entirely. In this case we have one
> binary operator "e." that is being repurposed to mean three different
> things throughout the file. You don't want the computer guessing which
> one you mean, and especially for theorems where you have to talk about
> two or more of them at the same time, like df-cv:
>
> theoremdf-cv: $ ae. b<-> wel ab$;
>  
> you should be forced to write the expressions in an unambiguous way.
>
> It is up to the person writing the specification file whether they want
> to have a bunch of nonoverlapping global notations, metamath style, or
> scope things more locally. The prefix notation is a backup when you
> don't have a notation for the constructor at the moment (or don't want
> to be bothered defining one).

As I understand it, for both signs "=" and "e." only the class-class
version is used except in the foundation part of the file, isn't it?

Yes, although the foundation part of the file is not insignificant - there are at least a few hundred theorems in there. If we front load all the axioms, we can hurry up and get to the version of "e." and "=" that we want, but possibly there are some theorems for which we don't want to incur a dependence on set theory.
 
So what I would do is reserve the names "=" and "e." for that version, and
invent some other variations like "=_" or "e._" for the set versions,
and of course declare them locally to a context that exists only as long
as the foundations have to be set up. And I would only use those
syntaxes, not the explicit s-expression one.

The main issue I have with this is that it doesn't really allow for locally scoped notations. It seems like you still have to be globally unambiguous with this approach. I think that it's a good idea to declare notations for almost everything, but I don't want to require people to come up with funny operators just because they are forced to by the system, and I think that the possibility of un-nameable syntax is a big problem.

Mario
Bootstrapping_Compiler_Verifiers.pdf

Mario Carneiro

unread,
Feb 25, 2019, 2:43:21 AM2/25/19
to metamath
Thank you all again for the helpful discussion. I've put up the basic syntax of the language in a github repository:

https://github.com/digama0/mm0/blob/master/mm0.md

I plan to add more about the meaning of the files next. Hopefully it shouldn't be much further until we can have a well formedness checker for the language (i.e. determining that the file has a well defined proposition stating the provability of some statements in some axiom system).

Mario
Message has been deleted
Message has been deleted

Giovanni Mascellani

unread,
Feb 25, 2019, 5:54:49 AM2/25/19
to meta...@googlegroups.com
Hi,

Il 25/02/19 08:43, Mario Carneiro ha scritto:
> Thank you all again for the helpful discussion. I've put up the basic
> syntax of the language in a github repository:
>
> https://github.com/digama0/mm0/blob/master/mm0.md

Thanks for all your patience and answers, which I still have to finish
reading. Reading you grammar, I feel the need for another couple of
clarifications.

* It seems that nothing is said about lexing: while for the "outer"
language this is probably not very controversial (I imagine the rules
for most programming languages are in force: tokens are either sequence
of alphanumeric characters, or sequence of symbols; whitespace is
irrelevant except that it always divides tokens; symbol tokens are
divided as soon as they are not a prefix of a valid symbol token), I
believe something is to be said about how to tokenize the "inner"
language, i.e., the one whose grammar is described by the "infix",
"notation", etc rules. My current understanding is that when the outer
lexer finds a dollar character, it keeps reading until it finds another
dollar character that is not immediately followed by another dollar
character (counting parity: if three dollars in a row are found, but not
four, then the first two collapse to a dollar in the formula and the
third one closes the formula, I suppose). Everything in between the two
dollars is the content of the formula, which is treated as a string for
the time being (and runs of two consecutive dollars are collapsed to a
single one, again counting parity). The outer lexer then resumes after
the second dollar token having produced a "formula" expression. At some
point there is another lexer that has to tokenize the string. What does
it do?

In Metamath this is fixed easily by saying that tokens are always
separated by spaces. Very easy to implement, but makes formulae not as
readable as possible. Looking at your example file, it seems that you
treat parentheses as always a single token, i.e., they do not need
whitespace around, which is very reasonable. It also seems that they are
the only token that gets such a special treatment. This is also
reasonable, because they lexer remains rather simple and there are less
surprises for the user. With C one can skip a lot of spaces, because the
list of possible tokens is much more well-known (i.e., tokens are either
alphanumeric or symbols, they cannot mix; there are no way to introduce
new symbol tokens; both of these properties completely fail with
Metamath). It seems that in MM0 the only special treatment is for
parentheses, which have to be treated specially anyway. Again, all of
this is very reasonable to me, I just would like to be sure that I
understood it properly.

* The (non-associative) "infix" directive has disappeared and wb has
become infixl. Is this intentional? Your argument for not considering wb
associative seemed very convincing, in my opinion.

* I see you decided for numeric precedences, in the end. Is this due to
the additional complexity it would bring? I can live with it anyway, I
am just curious.

Thanks again for this work. I'll try to write a parser for the language
when I have some time, so that I can grab a more direct feeling of what
I still do not completely grasp of it.
signature.asc

Mario Carneiro

unread,
Feb 25, 2019, 8:05:28 AM2/25/19
to metamath


On Mon, Feb 25, 2019 at 4:14 AM j4n bur53 <burs...@gmail.com> wrote:
Or some type inclusion and/or some automatic boxing/unboxing.

I only saw that on GitHub https://github.com/digama0/mm0/blob/master/mm0.md
there is a mention of "coercion (nameless notation)", with the description:

- A coercion between distinct sorts means that the
  given syntax axiom will be silently inserted to convert from one sort to another.

Is this something new? Does thiis already exist in metamath somehow?
I don't know metamath enough, so excuse my naive question.

There is exactly one coercion in set.mm, and that is the "cv" constructor:

cv $a class x $.

This means that any set variable can be put where a class expression is expected, and "cv" does the translation.

From a parser standpoint coercions are a bit special, since they are triggered without any explicit syntax. So they are given a special command, in the notation category.
 


On Mon, Feb 25, 2019 at 4:05 AM j4n bur53 <burs...@gmail.com> wrote:
Some random thoughts: How about a compressed meta math,
that makes heavy use of an automated transfer principle, if
we can prove for a class C:

    |- phi(C)

We should be also able to prove for a set x:

    |- phi(x)
 
Doesn't provide modern computer science and proof assistant
technology something like union types? Couldn't this be used to
then have the following:

    |- phi(t,s)

Where t is can be either class and set, and a sort parameter s.
Some trickery with dependent types.

This is effectively already available because of the coercion. If you prove phi[C], then you can just substitute "x" (really "cv x") for C and get a proof of phi[x]. However, you can't state |- phi(t, s) where s is a sort variable in metamath or metamath zero, because there are no variables of type "sort" (which would be a "kind" but this concept does not exist); this would require higher order logic and the syntax of metamath zero is designed to look as much like higher order logic as possible without actually being higher order.

On Mon, Feb 25, 2019 at 5:54 AM Giovanni Mascellani <g.masc...@gmail.com> wrote:
 * It seems that nothing is said about lexing: while for the "outer"
language this is probably not very controversial (I imagine the rules
for most programming languages are in force: tokens are either sequence
of alphanumeric characters, or sequence of symbols; whitespace is
irrelevant except that it always divides tokens; symbol tokens are
divided as soon as they are not a prefix of a valid symbol token), I
believe something is to be said about how to tokenize the "inner"
language, i.e., the one whose grammar is described by the "infix",
"notation", etc rules. My current understanding is that when the outer
lexer finds a dollar character, it keeps reading until it finds another
dollar character that is not immediately followed by another dollar
character (counting parity: if three dollars in a row are found, but not
four, then the first two collapse to a dollar in the formula and the
third one closes the formula, I suppose). Everything in between the two
dollars is the content of the formula, which is treated as a string for
the time being (and runs of two consecutive dollars are collapsed to a
single one, again counting parity). The outer lexer then resumes after
the second dollar token having produced a "formula" expression.

This is correct. If you see $ ... $$$ inside a math string then that's a double dollar followed by an end of string.
 
At some there is another lexer that has to tokenize the string. What does

it do?

In Metamath this is fixed easily by saying that tokens are always
separated by spaces. Very easy to implement, but makes formulae not as
readable as possible. Looking at your example file, it seems that you
treat parentheses as always a single token, i.e., they do not need
whitespace around, which is very reasonable. It also seems that they are
the only token that gets such a special treatment.

Actually, it's an interesting point - I put spaces around everything other than "(" ")" because I think it looks better, but it does have this consequence. But I also don't put spaces around other bracketing characters like "{" "}" for the same reasons...

My first pass at the lexer was to say "maximal munch". There are a bunch of notation commands in the file; they define a bunch of constant symbols and we take the longest matching one repeatedly. I think it is important to be able to span the letter/punctuation boundary at least somewhat because many ascii math tokens from metamath use letters as symbols, i.e. "e." or "A." or "C_" or "=s". Stuff like that last one may actually cause unexpected collapsing of tokens when you don't use enough space, like "t=s" getting parsed as "t" "=s". This is a bit of a violation of principle of least surprise but I don't really have a good solution here. In a way this is ignoring (arbitrarily disambiguating) rather than erroring on an ambiguous lex, but I can't see lexing being completely unambiguous if you use anything like metamath tokens without spaces.

For tokens that include a parenthesis like character, I'm not sure what to do. I can imagine people wanting to lex stuff like "f(" x ")", but maybe it is sufficient to just produce "f" "(" x ")" and let that be two back to back constants. The planned restrictions on "notation" commands will eliminate the many varieties of set comprehension in set.mm, and I was thinking about having some "s{" token or similar to disambiguate the prefix. So this may indeed amount to special casing "(".

Maximal munch worked for C, so maybe it will work for us. (Modulo that C++ template v. right shift clash...) It is always guidable to the right solution if you put enough spaces in, and it only asks for extra spaces when they are actually needed due to an ambiguity. One way to special case "(" in a way so that it works with maximal munch is to say that "(", if it appears in a token, must be the last character. This way "(x" won't be processed as a token, which would probably break many things. Also tokens ending in an alphanumeric, like "_V", cannot be followed immediately by an identifier like "_Vph"; this is already handled by maximal munch rules because "_Vph" is a valid token, an identifier. Similar issues happen when the token doesn't start with an identifier character, like "x+gy" being "x" "+g" "y" by maximal munch rules, where "x" "+" "gy" is also a possible lex. As usual, spaces solve this problem, although it's an uncomfortable solution and we need to expect some level of care on the part of the spec file writer.
 
The nice thing about maximal munch is that it's easy, but it feels a lot like the metamath style solution where it works well in intended cases and is simple so we ignore the times when it doesn't work so well when the user isn't doing the right thing. If there is an easy way to reject expressions that use dangerously little whitespace, which doesn't reject common spacing habits, I would like to hear it.

This is also
reasonable, because they lexer remains rather simple and there are less
surprises for the user. With C one can skip a lot of spaces, because the
list of possible tokens is much more well-known (i.e., tokens are either
alphanumeric or symbols, they cannot mix; there are no way to introduce
new symbol tokens; both of these properties completely fail with
Metamath). It seems that in MM0 the only special treatment is for
parentheses, which have to be treated specially anyway. Again, all of
this is very reasonable to me, I just would like to be sure that I
understood it properly.

This solution is a bit different from mine, and I'm honestly torn on whether an approach like this is better. This is basically metamath parsing with a special case for (), which indeed fixes most of the visible issues with metamath expressions. There are still some unconventional spaces here, things like space before "," and ":". that I don't particularly like with this setup though.

 * The (non-associative) "infix" directive has disappeared and wb has
become infixl. Is this intentional? Your argument for not considering wb
associative seemed very convincing, in my opinion.

 * I see you decided for numeric precedences, in the end. Is this due to
the additional complexity it would bring? I can live with it anyway, I
am just curious.

Sigh. I know, I convinced myself too. I was going all for this partial ordering and nonassociativity stuff, but then I had to stop myself because this isn't the point of the language. It's really really tempting to add more sugar to the language, but it is supposed to be bare bones. Non-associativity and partial precedence are two expressions of the same idea - a certain way to write parentheses is understood but rejected. The parser needs special support for this rejection, and I think that it deviates too much from a traditional parser to be easy to write and verify. Let's say Metamath One is Metamath Zero including all these dropped features (local notations also got dropped btw). There will be a transpiler from mm1 to mm0, and mm1 will be more appropriate for actually writing specifications. (As I said, mm0 is supposed to be readable, not writable. mm1 can be the writable one.) Includes and file markings and modularization would also find a home in mm1.

Thanks again for this work. I'll try to write a parser for the language
when I have some time, so that I can grab a more direct feeling of what
I still do not completely grasp of it.

I'll be writing a Haskell parser in the relatively short term, that will be the next part of the project, along with and possibly concurrent with the specification of the interpretation function. Once I have a nice clear parser and interpreter, I will reimplement it in Rust or C++, and if it's still looking good I'll start trying to write it in really basic languages like lisp, something with a nice semantics...

That said, I certainly welcome any additional parsers or verifiers you write for this language; feel free to make a PR to the repo. Like Metamath, I hope this will be a polyglot, to increase confidence in the specification and to practice for bootstrapping.

Mario
Message has been deleted

David A. Wheeler

unread,
Feb 25, 2019, 11:55:39 PM2/25/19
to metamath, metamath
On Sun, 24 Feb 2019 04:19:00 -0500, Mario Carneiro <di....@gmail.com> wrote:
> To help you see where I am going with this, consider the idea of
> formalizing all of this - the specification language and the verifier. This
> project is actually borne of a more abstract problem, to define what is a
> compiler-verifier and what is needed to ensure correctness of a program in
> a way that admits "bootstrapping".

If you haven't seen them already,
you might find various work involving Milawa and ACL2 of interest,
as there's been work "bootstrapping" to prove the proof system. E.g.:
http://www.cs.utexas.edu/users/moore/acl2/manuals/current/manual/index-seo.php/ACL2____MILAWA

--- David A. Wheeler

Mario Carneiro

unread,
Feb 26, 2019, 1:46:17 AM2/26/19
to metamath
Thanks for that reference, David. (A PDF of the dissertation is https://www.kookamara.com/jared/dissertation.pdf but it's quite long.) I haven't read that before, but it is definitely relevant. Also related are the CakeML project and HOL in HOL re: bootstrapping proof systems. But actually it reinforces my belief that we don't have the best solution yet. That prover is very complex, and as a result the author had to check a huge collection of things, enough to fill 500 pages. CakeML is also a very complex system - it embeds the entire ML language.

The issue with bootstrapping a proof language is that it doesn't actually close the loop as far as trustworthiness is concerned. If a theorem prover has a bug it may be exploited to prove that it doesn't have a bug. So the entire dependency loop has to be trusted, and so it is important to make that loop as tight as possible. I think there is still space for a minimal bootstrapping language, and these projects don't seem to be aiming for it. Or possibly they don't have the advantage of knowing that a thing like metamath is possible and blazingly fast.

Giovanni Mascellani

unread,
Feb 26, 2019, 2:59:55 AM2/26/19
to meta...@googlegroups.com
Hi,

Il 26/02/19 07:46, Mario Carneiro ha scritto:
> The issue with bootstrapping a proof language is that it doesn't
> actually close the loop as far as trustworthiness is concerned. If a
> theorem prover has a bug it may be exploited to prove that it doesn't
> have a bug. So the entire dependency loop has to be trusted, and so it
> is important to make that loop as tight as possible. I think there is
> still space for a minimal bootstrapping language, and these projects
> don't seem to be aiming for it. Or possibly they don't have the
> advantage of knowing that a thing like metamath is possible and
> blazingly fast.

I wonder if my asmc project[1] can be useful here: it is basically an
experiment in compiler and operating system bootstrapping, but it might
be that its foundations can be helpful for bootstrapping verifier
trustworthiness too.

[1] https://gitlab.com/giomasce/asmc

The bulk of the project is a extremely minimal "operating system"
(written in x86 Assembly; more or less 15 KiB once assembled, although
with some code golfing and disabling debugging pieces it could be made
smaller), whose basically only feature is a compiler for a language I
invented and called G. G is my personal take on how to design a language
which as similar as possible to C, but such that it is very easy to
parse and compile. Two big things change from C: expressions are in
reverse Polish form and the type system is collapsed to a single type,
equivalent to C's int. I think that writing G is not difficult at all
for those who are used to C: you just think "in C" and apply a few
simple changes to the code you are writing down. There is an example of
G code in the repository's README (and, of course, in the repository
itself).

If I got correctly what you are looking for, this might be an ideal
environment for your project: you eliminate as a whole the dependencies
on "grown-up" compilers and on the underlying operating system; all that
remains is a few kilobytes of very simple x86 code implementing a simple
(non optimizing, which is probably a good thing in the context of easy
verifiability) compiler to a simple language to a simple subset of x86.
The runtime environment is as simple as it can be: 32 bit x86 protected
mode with flat addressing, no paging and disable interrupts. In
particular no multitasking, which is known to complicate a lot code
verification.

Of course the dependency on the CPU and on the BIOS remains, and it is
not little thing, but I believe that for the time being that is
basically unavoidable. There are projects around to build CPUs from
discrete components, which might be the further stage, but I believe
that it is very difficult to have enough computing power to verify the
Metamath database.

Also, you still would have to verify self-modifying code, but again I
guess that if you want to work with compilers there is not much to do.
Unless you want to directly implement the Metamath verifier in x86
(instead of in G), and then verify the whole x86 implementation. I don't
think it would be better.

What do you think of this? My idea was to write a Python implementation
of MM0, but maybe I can switch to G if the above sounds useful. Then a
proof of the G compiler and MM0 verifier should be written in MM0,
assuming that CPU and BIOS behave as specified (as I said, I fortunately
depend on a relatively little interface of both the CPU and the BIOS).

For the context, in the asmc repository there is already a C compiler
written in G, which is less than perfect, but still able to compile a
patched version of tinycc. The idea is to use tinycc in turn to compile
a patched and stripped down version of Linux and eventually of GCC and
the rest of a GNU operating system, so that the whole system is
bootstrapped and compiled from the initial 15 KiB binary blob (and, as
usual, the BIOS).

Hope this helps, Giovanni.
signature.asc

Mario Carneiro

unread,
Feb 26, 2019, 5:35:53 AM2/26/19
to metamath
On Tue, Feb 26, 2019 at 2:59 AM Giovanni Mascellani <g.masc...@gmail.com> wrote:
I wonder if my asmc project[1] can be useful here: it is basically an
experiment in compiler and operating system bootstrapping, but it might
be that its foundations can be helpful for bootstrapping verifier
trustworthiness too.

 [1] https://gitlab.com/giomasce/asmc

This sounds amazing! Indeed, minimizing dependencies on OS magic to the maximum degree possible is part of the goal here. I started looking into the literature and got bogged down a bit, so if you've already looked into it then that makes it a lot easier. I will need to study this language more carefully. Do you have a full specification, or is it just the description in the readme?
 
The bulk of the project is a extremely minimal "operating system"
(written in x86 Assembly; more or less 15 KiB once assembled, although
with some code golfing and disabling debugging pieces it could be made
smaller), whose basically only feature is a compiler for a language I
invented and called G. G is my personal take on how to design a language
which as similar as possible to C, but such that it is very easy to
parse and compile. Two big things change from C: expressions are in
reverse Polish form and the type system is collapsed to a single type,
equivalent to C's int. I think that writing G is not difficult at all
for those who are used to C: you just think "in C" and apply a few
simple changes to the code you are writing down. There is an example of
G code in the repository's README (and, of course, in the repository
itself).

I definitely like the idea of translating via G. Like you, I've been annoyed by the fact that while C is a good language for relatively low level programming without being completely tied to assembly details, it also has quite a lot of fancy features and gotchas, that serve no purpose from a formalization standpoint. G looks a lot more conducive to analysis.
 
If I got correctly what you are looking for, this might be an ideal
environment for your project: you eliminate as a whole the dependencies
on "grown-up" compilers and on the underlying operating system; all that
remains is a few kilobytes of very simple x86 code implementing a simple
(non optimizing, which is probably a good thing in the context of easy
verifiability) compiler to a simple language to a simple subset of x86.
The runtime environment is as simple as it can be: 32 bit x86 protected
mode with flat addressing, no paging and disable interrupts. In
particular no multitasking, which is known to complicate a lot code
verification.

Can you handle 64 bit processors? Do 32 bit pointers work properly on a x86-64 processor?
 
Of course the dependency on the CPU and on the BIOS remains, and it is
not little thing, but I believe that for the time being that is
basically unavoidable. There are projects around to build CPUs from
discrete components, which might be the further stage, but I believe
that it is very difficult to have enough computing power to verify the
Metamath database.

One thing you (by which I mean someone else) can do is formally model CPU circuitry to where you can prove that the CPU, if constructed to the hardware specification, will be able to execute the instruction set correctly. You won't get the end to end linkage with this approach - the spec in each case is from a different proof instance, and possibly a different language altogether, although in principle you could communicate to share a spec here. In practice this can only really be done from within the company that's building the hardware, but bits of this are actually done in some cases. Needless to say, this is beyond scope for this project, though.
 
Also, you still would have to verify self-modifying code, but again I
guess that if you want to work with compilers there is not much to do.
Unless you want to directly implement the Metamath verifier in x86
(instead of in G), and then verify the whole x86 implementation. I don't
think it would be better.

I don't think this is a problem a priori. You have to model changing state anyway, so if the state includes the code then you have the possibility of self modifying code and you don't really have to do anything special.

It could be a problem if the specification of G doesn't allow self modification and you are doing some trickery such that it does self modification anyway. In that case there is a hole in the spec. If you can support self modification in a principled way where it's easy to see what code is getting executed next, then that just becomes part of the model.
 
What do you think of this? My idea was to write a Python implementation
of MM0, but maybe I can switch to G if the above sounds useful. Then a
proof of the G compiler and MM0 verifier should be written in MM0,
assuming that CPU and BIOS behave as specified (as I said, I fortunately
depend on a relatively little interface of both the CPU and the BIOS).

This sounds like a great idea. If you're interested I'll help however I can.

Mario Carneiro

Giovanni Mascellani

unread,
Feb 26, 2019, 6:16:22 AM2/26/19
to meta...@googlegroups.com
Hi,

Il 26/02/19 11:35, Mario Carneiro ha scritto:
> This sounds amazing! Indeed, minimizing dependencies on OS magic to
> the maximum degree possible is part of the goal here. I started
> looking into the literature and got bogged down a bit, so if you've
> already looked into it then that makes it a lot easier. I will need
> to study this language more carefully. Do you have a full
> specification, or is it just the description in the readme?

Good, I am very happy of that! By the way, I did not really perform any
extensive literature research before starting to work on asmc.
Admittedly, my original purpose was mostly learning low level
programming and basics of operating systems. At some point I discovered
and joined the Bootstrappable community[1], which is also engaged in
projects of this kind, although for the moment I am not aware on anyone
working on things as low level as mine. Ideally asmc should also connect
with other Boostrappable projects, as soon as I have managed to go high
level enough (or they managed to go low level enough).

[1] http://bootstrappable.org/

For the same reason, I do not have a full specification for G at the
moment. Of course feel free to ask questions, and at some point it will
be a good idea to refine the specification (and possibly fix bugs in the
compiler, although I am not currently aware of any).

> Can you handle 64 bit processors? Do 32 bit pointers work properly on
> a x86-64 processor?

Asmc is currently 32 bit only (and this is currently hard-baked into G,
at the moment; also little-endianness of the processor is assumed).
However, Intel 64 bit processors fully support protected mode (i.e., 32
bit mode), so that's what I use at the moment. Also, a BIOS interface is
assumed in the boot loader (but not in the main program), mainly to read
things from the disk. I do not know if all modern UEFI firmware expose a
BIOS interface, so that might be a problem with modern machines.

Asmc is usually developed inside QEMU for practicality, but I also run
it every now and then on real hardware (spare old computers I have
around) to feel the satisfaction of seeing it working (and check if it
actually works). Until it scrutinized more carefully (or proven to be
correct!), I do not advise running it on computers where you cannot
afford to lose what is currently on the disk!

> I don't think this is a problem a priori. You have to model changing
> state anyway, so if the state includes the code then you have the
> possibility of self modifying code and you don't really have to do
> anything special.
>
> It could be a problem if the specification of G doesn't allow self
> modification and you are doing some trickery such that it does self
> modification anyway. In that case there is a hole in the spec. If
> you can support self modification in a principled way where it's easy
> to see what code is getting executed next, then that just becomes
> part of the model.

G is very low level, so it perfectly admits self-modifying code.
Basically, one of the API exposed from the G compiler to the G code is a
call to in turn compile G code and store it in the memory. Then you can
just jump inside it.

> This sounds like a great idea. If you're interested I'll help however
> I can.

Good. I think I can begin coding a MM0 parser in G and, as soon as we
have a proof format suggestion, a proof verifier. As soon as you gain
some confidence with G, which again I do not think to be difficult if
you already know C, you can definitely help. Even if I managed to write
a C parser in G, I do not feel really confident with all the details of
the dynamical parser we need for MM0, so your collaboration will be more
than welcome. The G compiler was not written with verification in mind,
so it might require some modifications. Hopefully not too much, and they
can arrive later. Also, at some point we will have to pinpoint what of
the x86 and PC/BIOS architecture is to be assumed, and check that
everything conforms to that.

Maybe as an immediate starting point I can make some boilerplate files
for hosting all of this in the asmc repository, so at least we know
where to start from.

Wow, I am very happy to be able to join asmc and Metamath, two things I
have been spending some time onto in the last months/years!

Do you mind if I post a link of this conversation in the Boostrappable
community? I think they might be interested.
signature.asc

Jens-Wolfhard Schicke-Uffmann

unread,
Feb 26, 2019, 5:35:51 PM2/26/19
to meta...@googlegroups.com
On Tue, Feb 26, 2019 at 08:59:52AM +0100, Giovanni Mascellani wrote:
> If I got correctly what you are looking for, this might be an ideal
> environment for your project: you eliminate as a whole the dependencies
> on "grown-up" compilers and on the underlying operating system; all that
> remains is a few kilobytes of very simple x86 code implementing a simple
> (non optimizing, which is probably a good thing in the context of easy
> verifiability) compiler to a simple language to a simple subset of x86.
> The runtime environment is as simple as it can be: 32 bit x86 protected
> mode with flat addressing, no paging and disable interrupts. In
> particular no multitasking, which is known to complicate a lot code
> verification.
I have a (currenly paused) project based upon a different take: Instead
of verifying a compiler + assembler, I aim for a compiler which will
co-compile high-level proofs about C to low-level proofs about the
resulting binary.

> Of course the dependency on the CPU and on the BIOS remains, and it is
> not little thing, but I believe that for the time being that is
> basically unavoidable.
My additional axioms are formalizations for things like "RAM cells keep
their values unless written by the CPU", "instruction pointer increases"
and specific semantics for about 12 ARM instructions.

> There are projects around to build CPUs from
> discrete components, which might be the further stage, but I believe
> that it is very difficult to have enough computing power to verify the
> Metamath database.
I think computing power is not really a problem, just verifying (on the
compressed proof representation) can be quite fast.

> What do you think of this? My idea was to write a Python implementation
> of MM0, but maybe I can switch to G if the above sounds useful. Then a
> proof of the G compiler and MM0 verifier should be written in MM0,
> assuming that CPU and BIOS behave as specified (as I said, I fortunately
> depend on a relatively little interface of both the CPU and the BIOS).
My main worry is the theorem which states that the verifier will only
verify "true" statements, because this obviously depends on the axiom
system going into it. And naturally, any bugs already present in the
axioms can be carried forward into the "child" verifier.

> For the context, in the asmc repository there is already a C compiler
> written in G, which is less than perfect, but still able to compile a
> patched version of tinycc. The idea is to use tinycc in turn to compile
> a patched and stripped down version of Linux and eventually of GCC and
> the rest of a GNU operating system, so that the whole system is
> bootstrapped and compiled from the initial 15 KiB binary blob (and, as
> usual, the BIOS).
This brings in a whole lot of unchecked code, no?


Regards,
Drahflow
signature.asc

Mario Carneiro

unread,
Feb 26, 2019, 6:07:30 PM2/26/19
to metamath
On Tue, Feb 26, 2019 at 6:16 AM Giovanni Mascellani <g.masc...@gmail.com> wrote:
Hi,

Il 26/02/19 11:35, Mario Carneiro ha scritto:
> This sounds amazing! Indeed, minimizing dependencies on OS magic to
> the maximum degree possible is part of the goal here. I started
> looking into the literature and got bogged down a bit, so if you've
> already looked into it then that makes it a lot easier. I will need
> to study this language more carefully. Do you have a full
> specification, or is it just the description in the readme?

Good, I am very happy of that! By the way, I did not really perform any
extensive literature research before starting to work on asmc.
Admittedly, my original purpose was mostly learning low level
programming and basics of operating systems. At some point I discovered
and joined the Bootstrappable community[1], which is also engaged in
projects of this kind, although for the moment I am not aware on anyone
working on things as low level as mine. Ideally asmc should also connect
with other Boostrappable projects, as soon as I have managed to go high
level enough (or they managed to go low level enough).

 [1] http://bootstrappable.org/

For the same reason, I do not have a full specification for G at the
moment. Of course feel free to ask questions, and at some point it will
be a good idea to refine the specification (and possibly fix bugs in the
compiler, although I am not currently aware of any).

* How do the library functions work? If you call "+" does it put an add instruction in or does it use a x86 CALL instruction? Is there a listing of them somewhere (even if it's just the source code)? Where can I find the list of available functions?
* Where is the parser (for G) found?

 
> This sounds like a great idea. If you're interested I'll help however
> I can.

Good. I think I can begin coding a MM0 parser in G and, as soon as we
have a proof format suggestion, a proof verifier.

I added a bit of information to the spec about what needs to be in the proof format, although I haven't got down to the concrete binary format yet.
 
As soon as you gain
some confidence with G, which again I do not think to be difficult if
you already know C, you can definitely help. Even if I managed to write
a C parser in G, I do not feel really confident with all the details of
the dynamical parser we need for MM0, so your collaboration will be more
than welcome. The G compiler was not written with verification in mind,
so it might require some modifications. Hopefully not too much, and they
can arrive later. Also, at some point we will have to pinpoint what of
the x86 and PC/BIOS architecture is to be assumed, and check that
everything conforms to that.

I guess you have a better idea on what is needed from the BIOS than me. Did you work from a source document or something I could read?
 
Maybe as an immediate starting point I can make some boilerplate files
for hosting all of this in the asmc repository, so at least we know
where to start from.

Wow, I am very happy to be able to join asmc and Metamath, two things I
have been spending some time onto in the last months/years!

Do you mind if I post a link of this conversation in the Boostrappable
community? I think they might be interested.

Sure.

On Tue, Feb 26, 2019 at 5:35 PM Jens-Wolfhard Schicke-Uffmann <drah...@gmx.de> wrote:
I have a (currenly paused) project based upon a different take: Instead
of verifying a compiler + assembler, I aim for a compiler which will
co-compile high-level proofs about C to low-level proofs about the
resulting binary.

How is that different? How does the program points get translated to stepping through the binary? It seems like if you have enough to keep track of that then you should already have enough to prove the theorem generally.
 
> What do you think of this? My idea was to write a Python implementation
> of MM0, but maybe I can switch to G if the above sounds useful. Then a
> proof of the G compiler and MM0 verifier should be written in MM0,
> assuming that CPU and BIOS behave as specified (as I said, I fortunately
> depend on a relatively little interface of both the CPU and the BIOS).

My main worry is the theorem which states that the verifier will only
verify "true" statements, because this obviously depends on the axiom
system going into it. And naturally, any bugs already present in the
axioms can be carried forward into the "child" verifier.

The way I am envisioning it, the verifier does not actually prove that some theorem is true, it proves that a theorem is derivable from some axioms. This is a necessary component for being able to properly bootstrap - if you try to say "verifier A proves that any theorem checked by verifier B is true (in verifier A's axiom system)", then when verifier B = verifier A, the theorems checked by verifier B are actually all provable theorems in verifier A's axiom system, so this runs up against Godel. It is up to the user to input good axioms and check that they are correctly stated, and this has to be done at each stage too, in order to prevent loss of consistency strength.
 
> For the context, in the asmc repository there is already a C compiler
> written in G, which is less than perfect, but still able to compile a
> patched version of tinycc. The idea is to use tinycc in turn to compile
> a patched and stripped down version of Linux and eventually of GCC and
> the rest of a GNU operating system, so that the whole system is
> bootstrapped and compiled from the initial 15 KiB binary blob (and, as
> usual, the BIOS).
This brings in a whole lot of unchecked code, no?

I gather that this is the Bootstrappable approach, which doesn't focus on verification but relies only on simplicity and concision for trustworthiness.

Mario
Message has been deleted
Message has been deleted
Message has been deleted

Mario Carneiro

unread,
Mar 13, 2019, 3:53:34 PM3/13/19
to metamath


On Wed, Mar 13, 2019 at 3:29 PM j4n bur53 <burs...@gmail.com> wrote:
So meta math somehow combines syntax issues with
logic. Quite unusual, that the syntax is not abstracted

away before hand. So it can extend its own syntax,
and not only the logic.

Right, this is exactly what I'm trying to solve with this language. Notation shouldn't require changes to the axioms.

As for what mm0 will do about wbr and co, it won't be doing anything intelligent, and this will probably be a pain point in the translation. Metamath zero is assuming that infix operators are given as binary term constructors like

term foo: class > class > class;

which works well for term constructors that actually have that form, like ( A u. B ), but the preference in metamath is actually for using a combination of co and a class for the central operator, for things like ( A + B ). This is a problem for metamath zero, which does not allow infix operators to be ternary. General notations are still permitted, but there is a rather strict requirement that the first token in a general notation be a unique constant, and moreover the first two arguments must be passed with max precedence. So it will end up looking like op[ A + B ], which is okay I guess. Alternatively, during the translation process we can add new definitions like

def add (A B: class): class := $ co A cadd B $;
infixl $+$ prec 50;

and now you can take full advantage of the precedence parser in the math notation. The problem with this is that now the terms that are being manipulated are actually different; they will need additional steps to rewrite in and out of 'co' form, which are not present in the metamath source.


But then there was the ((f paradox

or something, if I remember well, so some syntax hygenie
would be nevertheless quite useful. Could be challenging!

On Wednesday, March 13, 2019 at 8:24:59 PM UTC+1, j4n bur53 wrote:
My current take would be, that similar like overloading
of "in" and "=" by classes, that it would effectively be:

    ((R A) B)

    ((F A) B)

With usually juxtaposition from Lambdacalculus
application. But overloading the application somehow.
I have no clue how the infix reordering should

happen, during parsing/unparsing, since R and
F would not be some known constant with some
infix declaration. Although could allow local operators

declarations for variables as a novellity. So that
where a variable is used, it can use something else
than the default. Maybe the default would also

include R(A,B) and F(A,B), so that no definitions
for it would be necessary. I am perplexed!

On Wednesday, March 13, 2019 at 8:11:14 PM UTC+1, j4n bur53 wrote:
How will metamath zero deal with:

     wff A R B
     http://metamath.tirix.org/wbr.html

     class ( A F B )
     http://metamath.tirix.org/co.html

I see a lot of problems, invisible function symbols, parenthesis
that are usually irrelevant for typing.

--

Mario Carneiro

unread,
Mar 24, 2019, 5:20:30 AM3/24/19
to metamath
Hi All,

This is a progress update on Metamath Zero. The spec has changed a bit since I last spoke on this thread, but there is now a working verifier for .mm0 specification files and a provisional .mmu format for specifying proofs. (Ultimately this will be replaced with a binary format but I want to develop that format in tandem with a low level verifier, because the format should reflect the internal data structures of the verifier.) The next step is to build a translator from metamath to mm0, which should be not too difficult, although there is some additional metadata required and I may have to hardwire it for set.mm until there is a reasonable mechanism for specifying this metadata inside the .mm file (i.e. adding $j annotations).


delimiter $ ( ) ~ { } $;
strict provable sort wff;
var ph ps: wff*;
term wi (ph ps: wff): wff; infixr wi: $->$ prec 25;
term wn (ph: wff): wff; prefix wn: $~$ prec 40;

axiom ax_1: $ ph -> ps -> ph $;
axiom ax_2 (ph ps ch: wff): $ (ph -> ps -> ch) -> (ph -> ps) -> ph -> ch $;
axiom ax_3: $ (~ph -> ~ps) -> ps -> ph $;
axiom ax_mp (ph ps: wff): $ ph $ > $ ph -> ps $ > $ ps $;

theorem a1i: $ ph $ > $ ps -> ph $;

def wb (ph ps: wff): wff = $ ~((ph -> ps) -> ~(ps -> ph)) $;
infixl wb: $<->$ prec 20;


theorem bi1: $ (ph <-> ps) -> ph -> ps $;
theorem bi2: $ (ph <-> ps) -> ps -> ph $;
theorem bi3: $ (ph -> ps) -> (ps -> ph) -> (ph <-> ps) $;

def wa (ph ps: wff): wff = $ ~(ph -> ~ps) $;
infixl wa: $/\$ prec 20;
theorem df_an: $ (ph /\ ps) <-> ~(ph -> ~ps) $;

def wtru (.p: wff): wff = $ p <-> p $;
prefix wtru: $T.$ prec max;
theorem df_tru: $ T. <-> (ph <-> ph) $;

pure sort set;
var x y z w: set;

term wal {x: set} (ph: wff x): wff; prefix wal: $A.$ prec 30;

def wex {x: set} (ph: wff x): wff = $ ~(A. x ~ph) $;
prefix wex: $E.$ prec 30;
theorem df_ex: $ E. x ph <-> ~(A. x ~ph) $;


sort wff
term wi
term wn
axiom ax_1
axiom ax_2
axiom ax_3
axiom ax_mp

theorem a1i (ph: wff) (ps: wff), (h: ph): (wi ps ph) =
(ax_mp ph (wi ps ph) h (ax_1 ph ps))

local theorem a2i (ph: wff) (ps: wff) (ch: wff), (h: (wi ph (wi ps ch))):
  (wi (wi ph ps) (wi ph ch)) =
(ax_mp (wi ph (wi ps ch)) (wi (wi ph ps) (wi ph ch)) h (ax_2 ph ps ch))

....

local theorem simplim (ph: wff) (ps: wff),: (wi (wn (wi ph ps)) ph) =
(con1i ph (wi ph ps) (pm2_21 ph ps))

def wb (ph: wff) (ps: wff): wff = (wn (wi (wi ph ps) (wn (wi ps ph))))

theorem bi1 (ph: wff) (ps: wff), unfolding wb() (h: ph):
  (wi (wb ph ps) (wi ph ps)) =
(simplim (wi ph ps) (wn (wi ps ph)))

Changes and remarks:

* Definitions no longer have an associated block in the mm0 format. I realized that there is no point to the delimitation because spec files are not for proving things, so it doesn't make any difference. Instead, the theorems in the proof file can specify what definitions they want to unfold (see the bi1 example). This reduces the "definition hiding" property earlier remarked, but the main reason for the restricted unfolding is just to avoid the processing overhead of unfolding everything all the time. A proper modularization requires more work anyway.

* Bound variables are now explicit in the syntax, with {x: set} curly binders. Bound variables also appear in the core verifier loop, because substitutions are checked so that a bound variable slot can only be filled by another bound variable.

* The verification process is now defined. It looks mostly like metamath verification, except some of the things that are "gentleman's agreements" are now enforced in the syntax. For example, you can't have a hypothesis to a non-provable typecode because terms don't have hypotheses (or DV conditions). When you substitute into a theorem, there are slots for the variables and slots for the hypotheses, and it constructs a substitution map from the variables (checking that the sorts match up and BVs get substituted for BVs), checks the DV conditions, and matches each of the given hypothesis subproofs.

* Definitions are checked by a slightly different process. A term might have a type dependency, such as

term wal {x: set} (ph: wff x): wff;

and this indicates that free occurrences of x in ph are not considered free in the result. Definitions are checked to have no extraneous dependencies, so that definitions like these are rejected:

def bad1 {x: set} (ph: wff x) {.y: set}: wff = $ x = y \/ A. x ph $;
def bad2 {x: set} (ph: wff x) {.y: set}: wff x y = $ x = y \/ A. x ph $;
def bad3 {x: set} (ph: wff x): wff = $ x = x \/ A. x ph $;

Here bad1 has undeclared dependencies, because x and y appear free. "bad2" declares those dependencies but it's still bad because you can't declare a dependency on a dummy variable (the .y notation means that y is a dummy, so it will not appear in the term constructor in later uses). Similarly "bad3" has an undeclared dependency on x even though it is "effectively not free" in MM terminology.

* As was mentioned before, when you unfold a definition, the statement that is proved is not the same as the one that is declared. Notice that the proof of bi1:

theorem bi1 (ph: wff) (ps: wff), unfolding wb() (h: ph):
  (wi (wb ph ps) (wi ph ps)) =
(simplim (wi ph ps) (wn (wi ps ph)))

is a direct application of simplim, and is actually a proof of (¬ ((𝜑𝜓) → ¬ (𝜓𝜑)) → (𝜑𝜓)) which in metamath is actually the second-to-last step.

* The concrete syntax of a .mmu file is a bit verbose and human readability is certainly questionable, but it's definitely easier to mock up and display examples in text like that rather than in a binary format. You shouldn't focus on it too much. But it's really simple to parse, because there is a limited set of punctuation characters and every other character is a label character, from the set [A-Za-z0-9_].

* The hyphen has been removed as a legal label character (as well as the dot), because it conflicts with label conventions in other languages. You will notice in the examples that "ax-1" has been renamed to "ax_1" and so on. Hopefully this won't cause any name conflicts in set.mm. Similarly, initial digits cause a problem, so despite the cute name, "0.999..." will need a rename. I'm thinking of putting an initial underscore for these, so that it gets mangled as "_0_999___", but there are again non-injectivity issues with this kind of mangling convention.

* The "output" directive has not yet been implemented, but it is a future hook for producing checked output of some kind. This links into my plans for doing "proof-producing assembly". I think not much work has been spent in doing nondeterministic checked compilation, but the binary mmu format can be a suitable output format which can then be checked quickly (I'm pretty sure with some work Metamath Zero files can be checked as fast as they are read from the drive).

Anyway, the project is really starting to come together. If you want you can test out the Haskell verifier at https://github.com/digama0/mm0/tree/master/mm0-hs .

Mario Carneiro
Message has been deleted
Message has been deleted

Mario Carneiro

unread,
Mar 24, 2019, 7:16:43 AM3/24/19
to metamath


On Sun, Mar 24, 2019 at 7:03 AM j4n bur53 <burs...@gmail.com> wrote:
Nice!

So instead of "$= .... op $., it is (op ....), right?

At least in the concrete .mmu syntax, yes. The underlying data structure is actually a stream of commands from this grammar -

data LocalCmd =
Load HeapID
| PushApp TermID
| PushThm ThmID
| Save
| Sorry

which is more or less the same as metamath compressed proof format. (A "Load x" command is a backreference step, a "PushApp t" or "PushThm T" command is a regular step (applying a theorem or term constructor), and a "Save" is like the compressed "Z" step, which adds a backreference for future reference. "Sorry" is the equivalent of "?" so that the grammar doesn't get confused on incomplete proofs, but it's not strictly necessary.)


Will the system be "single sequent" or something along "multi sequent".

I'm not exactly sure what you mean by this; you can mimic sequent proofs in metamath and mm0 using implication, but it's fundamentally a Hilbert proof system.
 
Just by accident I claimed that the metamath verification steps $= $. are
FORTH. But in FORTH there are operations such as:

      dup(<x,x1,..,xn>) = <x,x,x1,..,xn>

      https://en.wikipedia.org/wiki/Forth_%28programming_language%29

So I wonder whether metamath or metamath zero has ever thought
about some such extension.

Using Save and Load you can mimic the operation of the dup command (although you leave a backreference on the heap as a result). On its own the dup command isn't so useful in metamath because it's rare that the thing you want to use twice appears twice in a row; usually it appears not long after but you need some additional stack manipulation commands to get this effect.
 
I see a problem, that it is currently
not possible since assertions are named:

${

      mp.1 $e |- ( A -> B )
      mp.2 $e |- A
      mp $a |- B

$}

But maybe if a group would be named, this would qualify
as a multi sequence inference rule, which are already
known from Gentzens work and also found in natural deduction:

 and-elim ${

      $e |- ( A /\ B )
      $a |- A
      $a |- B

$}

Just currious.

This is already the case, if I understand you correctly. The hypothesis must be duplicated and it's really two separate theorems, but that's no different in Gentzen's logic. You can't name groups of theorems like that, but if you have the ability to "discard" conclusions from a multi-sequent that you don't want, then it's basically the same as two theorems.
 
Possibly also difficult for systems that use formulas-
as-types, since formulas-as-types was primarily conceived
for "single sequent". Can Lean Prover do multi-sequents?

You can always have two theorems, or return a conjunction, if the logic is at all expressive.


Bye
Message has been deleted

Mario Carneiro

unread,
Mar 24, 2019, 7:19:24 AM3/24/19
to metamath
It's really easy to do that in set.mm:

$e |- ( ph \/ ( ps \/ ch ) )
$a |- ( ph \/ ps \/ ch )

On Sun, Mar 24, 2019 at 7:17 AM j4n bur53 <burs...@gmail.com> wrote:
Disclaimer:


 and-elim ${

      $e |- ( A /\ B )
      $a |- A
      $a |- B

$}

Is not typical multi-sequent, what would be multi-sequent would be:

 or-elim ${

      $e |- G, ( A \/ B )
      $a |- G, A , B

$}

Where comma is used to separate multiple formulas in the
RHS of |-. And G can match multiple formulas. So there would
be extra types maybe.

End of Excursion

On Sunday, March 24, 2019 at 12:07:17 PM UTC+1, j4n bur53 wrote:
The example and-elim  lacks LHS of a sequent, but
I guess the "deduction form trick" could be also applied.

And the multi-RHS is not inside the sequent but
in the verification languages itself, if it were FORTH.

Maybe too complicated without any benefit. So metamath
might be qualified as FORTH without stack operations?


On Sunday, March 24, 2019 at 12:03:31 PM UTC+1, j4n bur53 wrote:
Nice!

So instead of "$= .... op $., it is (op ....), right?

Will the system be "single sequent" or something along "multi sequent".
Just by accident I claimed that the metamath verification steps $= $. are
FORTH. But in FORTH there are operations such as:

      dup(<x,x1,..,xn>) = <x,x,x1,..,xn>

      https://en.wikipedia.org/wiki/Forth_%28programming_language%29

So I wonder whether metamath or metamath zero has ever thought
about some such extension. I see a problem, that it is currently

not possible since assertions are named:

${

      mp.1 $e |- ( A -> B )
      mp.2 $e |- A
      mp $a |- B

$}

But maybe if a group would be named, this would qualify
as a multi sequence inference rule, which are already
known from Gentzens work and also found in natural deduction:

 and-elim ${

      $e |- ( A /\ B )
      $a |- A
      $a |- B

$}

Just currious. Possibly also difficult for systems that use formulas-

as-types, since formulas-as-types was primarily conceived
for "single sequent". Can Lean Prover do multi-sequents?

Bye

Mario Carneiro

unread,
Mar 24, 2019, 6:59:06 PM3/24/19
to metamath
More progress:


This is my first serious attempt to formalize things in mm0. It is a specification of the syntax and provability predicate for the mm0 language itself, in Peano arithmetic. A bit of this would be easier to formalize in set theory, but I think it will be an interesting excursion into provability in a weak logic, and since PA has hereditarily finite sets it should be more than enough for most finitary constructions and proofs, and it would be nice to have explicit confirmation of this in Metamath.

The one thing that is a bit complicated is non-primitive induction and recursion. Wikipedia says that functions like the Ackermann function are definable in PA, but it's not obvious to me how to do this. I'm assuming we have an iota operator, so that we can extract functions from unique existence proofs. If we have a predicate that says "the Ackermann function is defined at (m,n)" then we can prove this predicate by double induction, but I don't know how to define this predicate. Any ideas?

Jon P

unread,
Mar 25, 2019, 4:10:35 PM3/25/19
to Metamath
Looks cool, I'm not sure I understand it so well, I'm only just getting Metamath classic :)

Is this idea, using Godel numbering, any help with defining recursive functions?
Message has been deleted
Message has been deleted
Message has been deleted

Mario Carneiro

unread,
Mar 26, 2019, 8:22:22 PM3/26/19
to metamath


On Tue, Mar 26, 2019 at 4:29 PM j4n bur53 <burs...@gmail.com> wrote:
Wont you need higher order unification sooner or later
for your forrays?  if you start using expressions such as

> * Definitions are checked by a slightly different process.
> A term might have a type dependency, such as
> term wal {x: set} (ph: wff x): wff;

If I am not totally wrong, if I remember well Isabelle has
some higher order unification somewhere in its "pure" bowels.
So far I was impressed by meta math, that it doesn't need

higher order unification, its rather some variables constraints
but not really a notion of higher order unification.

Metamath, and Metamath Zero, both don't even have first order unification. The only thing you need during verification is the ability to check two strings for syntactic equality (and to check strings for presence of a given variable). For an IDE/prover interface like mmj2, you need first order unification in order to deal with metavariables for theorems whose substitutions are not known (the "work variables" of mmj2). Higher order unification is not implemented in any Metamath IDE that I am aware of, but it might be required if you for example wanted to perform a rewrite step to get a certain result (i.e. apply equality lemmas and synthesize the resulting expression) or to perform induction by guessing the induction hypothesis.
 
P.S.: BTW: Isabelle People having a Look at Lean:

Coinduction, using Impredicativity of Prop
https://gist.github.com/gebner/66bab9997d4b37fdb0884f7bcab95418#file-coindpred-lean

I hope its a tame impredicativity available in Lean...

I know Gabriel and Johannes quite well. AFAIK Gabriel has not worked on Isabelle, but Johannes Hoelzl worked on Isabelle under Tobias Nipkow for a while before moving to Lean. Johannes and I are current maintainers of the 'mathlib' Lean math library.

 Mario
Message has been deleted

Mario Carneiro

unread,
Apr 21, 2019, 8:35:04 AM4/21/19
to metamath
Hi All,

This is another status update, and a question. The repo now contains a nearly working translation procedure from set.mm. If you want to play with it, the command to run is:

  stack exec -- mm0-hs from-mm set.mm -o set.mm0 set.mmu

The .mm0 file is complete, more or less. It gets all the axioms and theorem statements from the file, parses the $j statement and parses all math into s-exprs, and writes them in mm0 format. If you feed this back through the verifier with

  stack exec -- mm0-hs verify out.mm0 out.mmu

it should report "spec checked", meaning that the .mm0 file is well formed.

The harder part is of course the proofs, in the mmu file. Since mm0 is not based on exactly the same logic, it isn't quite as simple as just translating the proof tree (although it mostly is, because the format is similar). Right now it gets stuck on all the new disjoint variable conditions. I'll probably report back later with other interesting examples, but here's the first one:

input set.mm file:

  ax-7 $a |- ( x = y -> ( x = z -> y = z ) ) $.
  ${
    $d x y $.
    equid $p |- x = x $=
      ( vy weq wex a6ev ax-7 pm2.43i eximii ax5e ax-mp ) AACZBDKBACZKBBAELKBAAF
      GHKBIJ $.
  $}

output .mm0 file:

  axiom ax_7 {x y z: set}:
    $ wi (wceq (cv x) (cv y)) (wi (wceq (cv x) (cv z)) (wceq (cv y) (cv z))) $;

  theorem equid {x: set}:
    $ wceq (cv x) (cv x) $;

output mmu file:

  axiom ax_7

  theorem equid {v0: set}, {v1: set}: (wceq (cv v0) (cv v0)) =
  (ax_mp (wex v1 [(wceq (cv v0) (cv v0))=v2]) v2 (eximii v1 [(wceq (cv v1) (cv v0))=v3] v2 (a6ev v1 v0) (pm2_43i v3 v2 (ax_7 v1 v0 v0))) (ax5e v1 v2))


The verifier complains about a disjoint variable violation at the application (ax_7 v1 v0 v0) corresponding to the step

|- ( y = x -> ( y = x -> x = x ) )

in http://us2.metamath.org/mpeuni/equid.html . The problem is that mm0 treats all bound variables as distinct, so ax-7 acquires a DV condition between its three variables and hence you can't use it to prove reflexivity.

I'm not sure what the best solution is. MM0 already has a mechanism for this kind of thing - you just don't mark the variables as bound, as in:

  axiom ax_7 (x y z: set):
    $ wi (wceq (cv x) (cv y)) (wi (wceq (cv x) (cv z)) (wceq (cv y) (cv z))) $;

(Note the brackets around x y z are parens instead of curly braces.)  But on the presumption that we want a fully automatic translation, the converter needs to find this out for itself. Here are a few options:

1. Analyze the statement to deduce whether it really needs to be bound. This basically means that it isn't being used as the bound variable of a quantifier. Two problems:
  * Currently even "cv" is treated as a binding operator (it shouldn't be) because the converter only looks at the MM sort of the variable to determine if it is bound - i.e. set vars are bound in all contexts. So actually all bound markings are (apparently) required in the example.
  * There is a possibility of getting the statement wrong if the proof of the theorem happens to use binding operators even though the statement doesn't. For example the statement might say |- x = x but it is proven via |- E. x x = x . In this case replacing x with a term is not valid, but you can't tell this by looking only at the statement.
2. Provide additional $j hints whenever set variables shouldn't be bound. This could work, but it might require a lot of annotation in the FOL part of the file, before we introduce classes.
3. Do the same thing as was planned for proofs that violate DV conditions caused by the bound variable rule: make lots of copies of the theorem for every combination of variables being equal or not. In this case that would entail making a copy of ax-7 for this configuration:

  axiom ax_7xyy {x y: set}:
    $ wi (wceq (cv x) (cv y)) (wi (wceq (cv x) (cv y)) (wceq (cv y) (cv y))) $;

I'm currently favoring option (2) because it minimizes the amount of guesswork done by the converter (I really don't want the converter to be heuristic) in favor of explicit annotation. What do people think?


On Wed, Mar 27, 2019 at 2:33 AM j4n bur53 <burs...@gmail.com> wrote:
I think Isabelle/HOL uses some form of higher order unification,
according to these slides:

http://www21.in.tum.de/~ballarin/belgrade08-tut/session03/session03.pdf

"perform induction by guessing the induction hypothesis.", yes could
be such a situation. But its already helpful for implementing

the very subject logic itself. For example (exists intro, see also slides):

        G |- A(t)
        -----------------------
        G |- exists x A(x)

When you do backward proof search, Isabelle/HOL can provide
you with new schematic variable aka holes, correctly constrained.

Maybe Lean can also show such holes. Dunno

Mario Carneiro

unread,
May 3, 2019, 6:38:34 AM5/3/19
to metamath


On Fri, May 3, 2019 at 6:14 AM Mario Carneiro <di....@gmail.com> wrote:
On Fri, May 3, 2019 at 5:19 AM Giovanni Mascellani <g.masc...@gmail.com> wrote:
I brushed off my project
of writing an independent verifier in G. In preparation for that I began
writing a Python implementation, to understand the specification and to
use it as a model for the G implementation. I'm working on a clone of
your repository which is here:

  https://github.com/giomasce/mm0/blob/master/mm0-py/parser.py

Great! The more languages the better.

Also, on the subject of spec changes, "var" and { scopes } are deprecated. The formal specification doesn't deal with them at all, and I wasn't planning on implementing them in future verifiers (the Haskell verifier will support a few eliminable extensions to the language for development convenience; "include" is on the todo list).

Giovanni Mascellani

unread,
May 10, 2019, 1:13:31 PM5/10/19
to meta...@googlegroups.com
Hi,

Il 03/05/19 12:37, Mario Carneiro ha scritto:
> Also, on the subject of spec changes, "var" and { scopes } are
> deprecated. The formal specification doesn't deal with them at all, and
> I wasn't planning on implementing them in future verifiers (the Haskell
> verifier will support a few eliminable extensions to the language for
> development convenience; "include" is on the todo list).

Does this mean that variable inference as a whole will be removed?
Because I think it does not make sense to have variable inference
without "var", does it?

Also (but this is probably minor), specs say that "Bindings come in
three groups: the (nondependent) bound variables, the regular variable
bindings, and the formulas. It is an error for the explicit bindings to
come out of order". However, examples/set.mm0 contains

axiom ax_inf {x: set} (a: set) {y z: set}:
$ E. x (a es. x /\ A. y (y es. x -> E. z (y es. z /\ z es. x))) $;

which seems to violate that requirements (there are bound variables
after regular variables). The set.mm0 generated from set.mm does not
have this problem.

Thanks, Giovanni.
signature.asc

Mario Carneiro

unread,
May 13, 2019, 3:07:46 AM5/13/19
to metamath
On Fri, May 10, 2019 at 1:13 PM Giovanni Mascellani <g.masc...@gmail.com> wrote:
Hi,

Il 03/05/19 12:37, Mario Carneiro ha scritto:
> Also, on the subject of spec changes, "var" and { scopes } are
> deprecated. The formal specification doesn't deal with them at all, and
> I wasn't planning on implementing them in future verifiers (the Haskell
> verifier will support a few eliminable extensions to the language for
> development convenience; "include" is on the todo list).

Does this mean that variable inference as a whole will be removed?
Because I think it does not make sense to have variable inference
without "var", does it?

Yes.
 
Also (but this is probably minor), specs say that "Bindings come in
three groups: the (nondependent) bound variables, the regular variable
bindings, and the formulas. It is an error for the explicit bindings to
come out of order". However, examples/set.mm0 contains

  axiom ax_inf {x: set} (a: set) {y z: set}:
  $ E. x (a es. x /\ A. y (y es. x -> E. z (y es. z /\ z es. x))) $;

which seems to violate that requirements (there are bound variables
after regular variables). The set.mm0 generated from set.mm does not
have this problem.

The spec is a bit out of date here, I'll fix it. There are two groups - the bound and regular variables can be intermingled but the hypotheses must come after. This matters because a regular variable can only depend on bound variables that come before it. In the MM translation regular variables can depend on bound variables anywhere else in the definition (the actual order of arguments depends on the relative order of $f hyps in the file), so to avoid a forward dependency I move all the bound variables first.

I'm also making a slight change to the mmu format regarding binder order: dummies are considered to be introduced after all other binders, including hypotheses. To make introduction order match textual order, this means that dummies are moved until after hypotheses and the conclusion of the theorem - they are now the first thing *after* the = sign. For example:

theorem nic_imp (v0: wff) (v1: wff) (v2: wff) (v3: wff),
  (v4: (wnan v0 (wnan v2 v1))): (wnan (wnan v3 v2) (wnan (wnan v0 v3) (wnan v0 v3))) =
{v5: wff} (nic_mp (wnan v0 (wnan v2 v1)) (wnan (wnan v3 v2) (wnan [(wnan v0 v3)=v6] v6)) (wnan v5 (wnan v5 v5)) v4 (nic_ax v0 v1 v2 v3 v5))

Here v4 is a hypothesis, and v5 is a dummy. This also matches the location of dummies in the def parser, just after the = before the value of the definition. In the mm0 format dummies are just as before - they can be introduced whenever in a def command but they can only be referred to in the value of the definition.

Mario Carneiro

unread,
May 19, 2019, 4:32:13 AM5/19/19
to metamath
In response to a recent post by Giovanni on another thread, here's another status update on MM0. The big news is that I have a working translation to HOL! The translation into my custom HOL subset is now working and verifies set.mm (after translation through MM0). The plan is to use this HOL subset to translate to real HOL systems in use; I have a mostly complete translation to OpenTheory, although it has some bugs in it I haven't yet stamped out. To test all this:

stack exec -- mm0-hs from-mm set.mm -o out.mm0 out.mmu      // translate MM -> MM0; add "-f foo,bar" if you just want to translate the part of the MM file that is needed to verify theorems foo and bar
stack exec -- mm0-hs to-hol out.mm0 out.mmu -o out.hol           // translate MM0 -> HOL (output syntax is not parseable, it's just for viewing purpose)
stack exec -- mm0-hs to-othy out.mm0 out.mmu -o out.art           // translate MM0 -> OpenTheory article (currently under development)

The resulting .art file is 1G (!) and might be too much for the open theory verifier. There is a lot of explicit beta reduction that results from the use of variables with free set arguments; I'm not sure if this can be optimized further.

The file mm0-lean/x86.lean is a lean formalization of the x86 semantics. I don't think I will use this file directly in the project, it was mostly to get my thoughts in order. It will serve as a source for hand-translation to MM0 once it's ready.

The file mm0-lean/basic.lean is an embedding of MM0 in lean. It's not a translation exactly (the translation MM0 -> Lean is coming), but rather a subset of lean that is interpretable as MM0 theorems. So it's really a mechanism for Lean -> MM0. I hope to be able to use it to write MM0 proofs using lean (with the advantage of the lean tactic framework). The file mm0-lean/set.lean is a hand-translation of some of the theorems from set.mm0 in lean, with MM0 theorems being generated in the background and lean theorems being proven in the foreground. The nice thing about this embedding is that it is quite close to the way a lean formalization would actually work, except for the use of hilbert calculus. I hope to have support for explicit context management so that a higher order lean proof can translate to an MM0 proof using natural deduction style.

I've also started working on mm0-c and the binary file format. I don't have anything specific to share yet, but one interesting feature is that the format will serve as read-only memory for the verifier itself, so that I think I can achieve verification with O(1) read/write memory (no stack! no heap!) and linear time checking (not exactly streaming, but with limited random access) so that it really can run in low memory contexts. The basic idea is to represent the stack as a linked list and embed it in the proof stream itself. Because the stack changes over time, it actually appears as a forest instead of a linked list. Each proof step contains these stack backreferences in addition to a possible heap backreference for actual "Z" steps.

The web of translations is growing, and I'm really excited about all the applications. There is a low cost translation MM0 -> MM (not yet written) which will enable porting all these benefits back to pure MM. For example, we can use this to use Lean as a Metamath frontend, an alternative IDE. Plus other systems will be able to import anything proven in metamath.

Mario

Giovanni Mascellani

unread,
May 19, 2019, 7:20:55 AM5/19/19
to meta...@googlegroups.com
Hi,

Il 19/05/19 10:31, Mario Carneiro ha scritto:
> The web of translations is growing, and I'm really excited about all the
> applications. There is a low cost translation MM0 -> MM (not yet
> written) which will enable porting all these benefits back to pure MM.
> For example, we can use this to use Lean as a Metamath frontend, an
> alternative IDE. Plus other systems will be able to import anything
> proven in metamath.

Wow, that's really great news! Great work!

Speaking of proof translations, I recently discovered GAPT[1], developed
at the Vienna University of Technology, which among other things is able
to import refutation proofs generated by Eprover, Metis, SPASS, Prover9
and Vampire, deskolemize them and convert them to Natural Deduction
proofs. I wanted to do the same thing a few months ago, but never
understood enough of logic to properly do it. Since there is now this
thing that is already working and maintained by competent people, I am
writing the code to import the ND proofs it generates and convert them
to MM.

[1] https://www.logic.at/gapt/

Hopefully, being able to solve at once predicate calculus steps in
Metamath proofs in a way similar to sledgehammer and meson in other
proof systems will speed up Metamath proof writing. We'll see.
signature.asc

David A. Wheeler

unread,
May 19, 2019, 7:28:37 PM5/19/19
to Mario Carneiro, metamath
On May 19, 2019 4:31:58 AM EDT, Mario Carneiro <di....@gmail.com> wrote:
>In response to a recent post by Giovanni on another thread, here's
>another
>status update on MM0. The big news is that I have a working translation
>to
>HOL!

That is very impressive. Congrats!!


--- David A.Wheeler

Mario Carneiro

unread,
Jul 12, 2019, 2:16:13 AM7/12/19
to metamath
I have another status update, and this one might turn some heads. The syntax coloring in the initial post is no longer a matter of creative license - there is now a real syntax coloring mode for MM0. But that's not all: there is now a VSCode extension that will check your proofs on the fly, inspired by the lean editing mode. Here's a screen shot:

mm0.png

You will notice that the file extension here is ".mm1", not ".mm0". That's the next phase of the project. Metamath One is an extension of the syntax of Metamath Zero with a Turing complete programming language based on scheme. The basic idea is that MM1 is a scripting language for assembling MM0 proofs and statements. It's untrusted, and functions as the IDE for MM0, but it will strive for simplicity much as MM0 does. I won't be too precise on how it works, because it's not completely implemented yet, but the basic syntax is as follows:

In addition to "def", "term", "theorem" etc commands from MM0, there is a new one:

do {
(#def (my-double x) (+ x x))
(:print (my-double 4))
};

The do block enters into a scheme toplevel environment, where you can write arbitrary code in lisp syntax. The scheme environment is globally persistent through the file, so you can interleave proofs and function declarations. This allows you to create functions that are tailored to particular theorem statements.

On its own, this is of limited usefulness, but the scheme environment also has access to the proof environment, so it can inspect the environment to see what is defined, and add definitions and theorems as well. Combined with the metaprogramming capabilities of scheme, this actually means that you can create almost your own domain specific language that compiles down to MM0 theorems, and this is in fact what I intend to do.

The other place where scheme syntax appears is in theorem proofs. Again the details are still in flux, and the proof of id in the screen shot is fake - I haven't implemented proof checking yet - but the syntax is a hybrid of scheme and lean for entering metamath style proofs. One of the core concepts in Lean is the idea of a tactic proof. If you think about it from first principles it's quite similar to what you see in the metamath proof IDEs. The goal is to produce a term that is a proof of the target proposition, we think of this as a big tree of theorem applications. Initially we have only a hole where the proof should go. We can build this tree in two ways: by replacing a hole by a tree, possibly also containing holes in it (backwards proving), or by constructing a tree from smaller trees (forward proving). In the general state, we have some number of holes left to fill, and some number of trees we have constructed so far.

Even the most basic kind of inference (for example reconstructing the syntax proofs) requires the prover to have this notion of backward proof, where it is performing unification at each step. If we give the user access to this prover state, a tactic mode falls out without us having to do anything special. Here's how it might look:

theorem id (ph: wff): $ ph -> ph $ =
(:by
'ax_mp -- apply ax_mp
() -- show me the goals: ?a and ?a -> ph -> ph
'ax_1 -- now (?b -> ?c -> ?b) -> ph -> ph
(:= ?b $ph$) -- unify ?b := ph by providing an explicit term
-- now (ph -> ?c -> ph) -> ph -> ph
(ax_mp _ (ax_2)) -- unify against this subproof, now ph -> (?c -> ph) -> ph
'ax_1 -- finish the proof
);

theorem id2 (ph: wff): $ ph -> ph $ =
(ax_mp); -- press ctrl-space autocomplete:
-- ^
theorem id3 (ph: wff): $ ph -> ph $ =
(ax_mp () ()); -- each () has a description of the proof term here
-- ^ -- type ax_ and ctrl-space gives us the statements starting with
-- ax_ that unify with the goal
theorem id4 (ph: wff): $ ph -> ph $ =
(ax_mp (ax_1) (ax_mp)); -- there is a red squiggle here saying there are two
-- ^ -- missing arguments and their types
theorem id5 (ph: wff): $ ph -> ph $ =
(@ ax_mp (ax_1) (ax_mp)); -- there is now also a red squiggle here saying
-- ^ -- that two arguments are missing. Ctrl-space here
theorem id6 (ph: wff): $ ph -> ph $ =
(@ ax_mp $_ -> _ -> _$ $(_ -> _ -> _) -> ph -> ph$ (ax_1) (ax_mp));
-- the fields are auto populated, although it doesn't do in place
-- unification like mmj2. Auto complete doesn't do global rewrites, that
-- could be confusing.
theorem id7 (ph: wff): $ ph -> ph $ =
(@ ax_mp $_ -> _ -> _$ $(_ -> _ -> _) -> ph -> ph$ (ax_1) (ax_mp (ax_1) (ax_2)));
-- This is a valid proof, although a bit messy
theorem id8 (ph: wff): $ ph -> ph $ =
(ax_mp (ax_1) (:by -- We can also write a tactic proof inline
'ax_mp
'ax_1
'ax_2
));

do {
(#def id-prover (ax_mp (ax_1) (ax_mp (ax_1) (ax_2))))
};

theorem id9 (ph: wff): $ ph -> ph $ =
id-prover; -- We can also call scheme functions to produce proofs

I think there is a lot of space for a proper proof IDE from these primitives. I haven't enumerated what all the scheme primitives are yet, but mostly they will just be allowing to read and modify the prover state. By the way, the reason for the "#" and ":" sentinels is because the scheme environment overlaps the proof environment, in the sense that they can have the same names but are referred to the same way, so if there is a scheme function called "if" then you will have difficulty referring to the term constructor called "if". Since scheme has a much more permissive identifier convention, built in scheme functions are given names that start with ":" so that they can't be clobbered. The "#" names are so called "special forms" of scheme - these are not regular function applications but process their arguments in a special way. (The other main special form is "#fn", called "lambda" in scheme, for constructing functions and closures.)

I'm really excited about where this is all headed. Finally an IDE I'm not ashamed of. I have a while to go before I can start redirecting these efforts toward metamath proper, but I think one application is to be able to use this as a metamath shim. The hardest part with communicating with MM is not the bundling stuff, which is a relatively minor verification detail, but rather the notation. Metamath and MM0 have different approaches to notation, such that something like "co", operation value "(A F B)", is difficult to render well in MM0, because notations are tied to single constants. This is one of the areas where the scheme layer can actually help, by translating terms to and from metamath notation. Everything MM0 will be "pluggable" - for example, ":by" is a built in function but you can still redefine it if you want tactics to work differently, and similarly all the parsing and printing will pass through redefinable scheme functions so that you can customize the proof environment without changing the core.

Mario

Giovanni Mascellani

unread,
Jul 12, 2019, 6:43:05 AM7/12/19
to meta...@googlegroups.com
Hi,

Il 12/07/19 08:15, Mario Carneiro ha scritto:
> I have another status update, and this one might turn some heads.

Wonderful, that's really incredible! I won't have much time to check out
your work for a few weeks, but your progress is really interesting. :-)

Good work and keep up!
signature.asc

vvs

unread,
Jul 12, 2019, 8:25:08 AM7/12/19
to Metamath
On Friday, July 12, 2019 at 9:16:13 AM UTC+3, Mario Carneiro wrote:
I'm really excited about where this is all headed. Finally an IDE I'm not ashamed of. I have a while to go before I can start redirecting these efforts toward metamath proper, but I think one application is to be able to use this as a metamath shim.
While people are mostly silent in this thread I bet they watch it with great interest. This is to encourage and wish you best of luck. Metamath really needs a powerful and flexible proof assistant.

Mario Carneiro

unread,
Jul 21, 2019, 3:59:07 AM7/21/19
to metamath
I've started actually proving things using the IDE! The current contents of peano.mm1 are at the end of the message. Here are some changes since the last message and things to note in the proof:

* The only real user tactic so far is the following:
(def (refine-extra-args refine p . ps)
(foldl ps p
(fn (acc p2)
(refine (list 'ax_mp (verb p2) (verb acc))))))

  "refine-extra-args" is one of the built in callbacks from the system to the lisp user code, called when a theorem is called with too many arguments, for example if you write "(syl h1 h2 h3)" (since "syl" takes only two hypotheses). The default behavior is to just give an error, but the user code here instead translates it to "(ax_mp h3 (syl h1 h2))", iterating ax_mp as many times as necessary. This means that you get the lambda-calculus style of applying implications for free! For example, the proof of mpd is just "ax_2 h2 h1", which would normally be an error since ax_2 takes no arguments but instead translates to the real metamath proof, "ax_mp h1 (ax_mp h2 ax_2)". In fact, this notation is so convenient that it mostly obviates the need for theorems like a2i, since "ax_2 h" and "a2i h" are interchangeable. (If this ever makes it back to metamath, I expect these superfluous ax_mp applications to be optimized away when the dedicated theorems like a2i exist.)

* Most proofs are given directly, as a quoted lisp expression like '(foo bar baz). Previously I thought I would need to prefix all lisp constants with ":" to distinguish them from MM0 constants, but it turns out that Scheme's built in distinction between data s-exprs and evaluable s-exprs is sufficient. So the expression that goes after an = in a theorem is a lisp expression, which should evaluate to an s-expression containing MM0 theorems. This expression is then passed to "refine", which elaborates all the holes in the expression and reports all the errors. For more involved proofs, you would use a focus block like so:

(focus
'(foo _ _)
(focus
'(bar))
'(baz)
(stat));
 
  The "focus" tactic hides all goals except the first one, so it is good for block structuring a proof (it is the equivalent of lean's { } notation). It also calls "refine" whenever it is passed a value other than "undef" (which is the usual return value for things that return nothing), so that the above is equivalent to:

(focus
(refine '(foo _ _))
(focus
(refine '(bar)))
(refine '(baz))
(stat));

The "stat" tactic prints out the current state of the goal(s) as well as all subproofs. It is useful to leave this at the end of the proof so that you can see your progress.

* The "have" tactic looks like this:

(have 'h1 '(foo bar baz))
(have 'h2 $ x = a -> y = b $ '(foo bar baz))
(have 'h3 $ x = a -> y = b $ _)

  This declares a named subproof, and you can give either the statement or the proof (or both), and it will use the information to unify against existing variables and add the results to the context (so you could refer to h3 later, the same as an acutal hypothesis to the theorem).

* MM1 uses some fairly aggressive type inference to avoid needing to declare variables in theorems. You still have to label hypotheses, but for the most part you don't have to declare any bound or regular variables. (I switched to labeling prop variables with lowercase letters because I can, but also because new variables appear in alphabetical order in the statement.) The exception is when you have variables that depend on others, i.e. predicate variables, which happens a lot in the predicate calculus section. The default is to assume that a new variable is distinct from all bound variables in the statement (which is opposite of metamath's default, if you can call it that, in which no $d markings means everything can depend on everything else). My bet is that opt-out disjointness is easier to reason about, especially in the "late game" where you aren't doing predicate calculus as such but rather using it in some complex context, and there knowing that the inputs don't interact is very useful.

In principle, we could actually infer everything from just the proof, i.e. from just

local theorem mpi = '(mpd (a1i h1) h2);

we could infer the complete statement of mpi, by unifying everything, assuming all unknown names are hypotheses to the theorem, and turning all remaining metavariables into variables. This would be the metamath solitaire style. The main reason I don't want to do this is because the statement is valuable documentation (you probably don't want to be surprised about the order of arguments), plus it enables parallelization of proof checking (which isn't necessary for these tiny proofs but can be useful later on).

* I'm really loving the efficiency of the proof style. Of course I don't have a complete prop calc section here, but in a few hundred lines I have a working prop + pred calc section which takes tens of thousands of lines in set.mm.

* Although the whole system tracks locations of text snippets well, and the whole backend tries its best to put squiggles in the right place, lisp programs have a tendency to steal error messages. For example, the declaration

 (def (verb e) (list ':verb e))

  which inserts a verbatim block into an expression (to prevent refine from elaborating an already elaborated proof term), makes use of an atom ':verb, which is annotated with its position here at the top of the file. If it later turns out that this term is not type correct when it is inserted into the context (i.e. it was used incorrectly in a proof), the error will appear on this line, rather than on the proof which may be hundreds of lines below. I'm not sure exactly how to solve this problem without making offset management explicit in lisp programs. A stack trace will probably help here.

* While I am still the main intended user of the software for the next few months, I think it's in good enough shape for other people to beta test it if they are interested. The lisp environment is still pretty bare bones, and it will probably only get developed as I need to write real tactics on the lisp side. You will have to stick to brand new databases like peano.mm1 here until I add support for MM->MM1 or MM0+MMU->MM1. Also I haven't started the compilation part yet, MM1 -> MM0 + MMU (or more likely MM0 + MMB, the binary format).

delimiter $ ( [ { ~ , $
$ } ] ) ~ , $;
strict provable sort wff;
term im (ph ps: wff): wff; infixr im: $->$ prec 25;
term not (ph: wff): wff; prefix not: $~$ prec 40;

axiom ax_1 (ph ps: wff): $ ph -> ps -> ph $;
axiom ax_2 (ph ps ch: wff): $ (ph -> ps -> ch) -> (ph -> ps) -> ph -> ch $;
axiom ax_3 (ph ps: wff): $ (~ph -> ~ps) -> ps -> ph $;
axiom ax_mp (ph ps: wff): $ ph $ > $ ph -> ps $ > $ ps $;

do {
(set-timeout 500)
(def (foldl l z s) (if (null? l) z (foldl (tl l) (s z (hd l)) s)))
(def (foldr l z s) (if (null? l) z (s (hd l) (foldr (tl l) z s))))
(def (rev l) (foldl l () (fn (l a) (cons a l))))
(def (verb e) (list ':verb e))
(def (exact e) (refine (verb e)))
(def refine-extra-args-orig refine-extra-args)
(def (refine-extra-args refine p . ps)
(foldl ps p
(fn (acc p2)
(refine (list 'ax_mp (verb p2) (verb acc))))))
(def (result) (hd (get-goals)))
(def (target) (goal-type (result)))
};

local theorem a1i (h: $ b $): $ a -> b $ = '(ax_1 h);
local theorem a2i (h: $ a -> b -> c $): $ (a -> b) -> (a -> c) $ = '(ax_2 h);
local theorem mpd (h1: $ a -> b $) (h2: $ a -> b -> c $): $ a -> c $ = '(ax_2 h2 h1);
local theorem mpi = '(mpd (a1i h1) h2);
local theorem id: $ a -> a $ = '(mpd (! ax_1 _ a) ax_1);
local theorem syl (h1: $ a -> b $) (h2: $ b -> c $): $ a -> c $ = '(mpd h1 (a1i h2));
local theorem a1d (h: $ a -> b $): $ a -> c -> b $ = '(syl h ax_1);
local theorem a2d (h: $ a -> b -> c -> d $): $ a -> (b -> c) -> (b -> d) $ = '(syl h ax_2);
local theorem a3d (h: $ a -> ~b -> ~c $): $ a -> c -> b $ = '(syl h ax_3);
local theorem sylc (h1: $ a -> b $) (h2: $ a -> c $) (h: $ b -> c -> d $): $ a -> d $ = '(mpd h2 (syl h1 h));
local theorem syld (h1: $ a -> b -> c $) (h2: $ a -> c -> d $): $ a -> b -> d $ = '(mpd h1 (a2d (a1d h2)));
local theorem syl5 (h1: $ b -> c $) (h2: $ a -> c -> d $): $ a -> b -> d $ = '(syld (a1i h1) h2);
local theorem syl6 (h1: $ a -> b -> c $) (h2: $ c -> d $): $ a -> b -> d $ = '(syld h1 (a1i h2));
local theorem imim2i (h: $ b -> c $): $ (a -> b) -> (a -> c) $ = '(a2i (a1i h));
local theorem absurd: $ ~a -> a -> b $ = '(a3d ax_1);
local theorem com12 (h: $ a -> b -> c $): $ b -> a -> c $ = '(syl ax_1 (a2i h));
local theorem absurdr: $ a -> ~a -> b $ = '(com12 absurd);
local theorem imidm: $ (a -> a -> b) -> a -> b $ = '(a2i (com12 id));
local theorem contra: $ (~a -> a) -> a $ = '(imidm (a3d (a2i absurd)));
local theorem dne: $ ~~a -> a $ = '(syl absurd contra);
local theorem con2: $ (a -> ~b) -> (b -> ~a) $ = '(a3d (syl5 dne id));
local theorem notnot1: $ a -> ~~a $ = '(con2 id);
local theorem con3: $ (a -> b) -> (~b -> ~a) $ = '(syl (imim2i notnot1) con2);
local theorem mt2d (h1: $ a -> c $) (h2: $ a -> b -> ~c $): $ a -> ~b $ = '(sylc h2 h1 con2);
local theorem con1: $ (~a -> b) -> (~b -> a) $ = '(a3d (imim2i notnot1));
local theorem cases (h1: $ a -> b $) (h2: $ ~a -> b $): $ b $ = '(contra (syl (con1 h2) h1));
local theorem casesd (h1: $ a -> b -> c $) (h2: $ a -> ~b -> c $): $ a -> c $ =
'(cases (com12 h1) (com12 h2));
local theorem con1d (h: $ a -> ~b -> c $): $ a -> ~c -> b $ = '(syl h con1);
local theorem con2d (h: $ a -> b -> ~c $): $ a -> c -> ~b $ = '(syl h con2);
local theorem con3d (h: $ a -> b -> c $): $ a -> ~c -> ~b $ = '(syl h con3);
local theorem con4d (h: $ a -> ~b -> ~c $): $ a -> c -> b $ = '(syl h ax_3);

def and (a b: wff): wff = $ ~(a -> ~b) $;
infixl and: $/\$ prec 35;

local theorem andl: $ a /\ b -> a $ = '(con1 absurd);
local theorem andr: $ a /\ b -> b $ = '(con1 ax_1);
local theorem iand: $ a -> b -> a /\ b $ = '(syl (com12 id) con2);
local theorem iandd (h1: $ a -> b $) (h2: $ a -> c $): $ a -> b /\ c $ = '(sylc h1 h2 iand);
local theorem iandi (h1: $ a $) (h2: $ b $): $ a /\ b $ = '(iand h1 h2);
local theorem andwl (h: $ a -> c $): $ a /\ b -> c $ = '(syl andl h);
local theorem andwr (h: $ b -> c $): $ a /\ b -> c $ = '(syl andr h);
local theorem imp (h: $ a -> b -> c $): $ a /\ b -> c $ = '(sylc andl andr h);
local theorem exp (h: $ a /\ b -> c $): $ a -> b -> c $ = '(syl6 iand h);

def iff (a b: wff): wff = $ (a -> b) /\ (b -> a) $;
infixl iff: $<->$ prec 20;

local theorem bi1: $ (a <-> b) -> a -> b $ = 'andl;
local theorem bi1i (h: $ a <-> b $): $ a -> b $ = '(bi1 h);
local theorem bi2: $ (a <-> b) -> b -> a $ = 'andr;
local theorem bi2i (h: $ a <-> b $): $ b -> a $ = '(bi2 h);
local theorem ibii (h1: $ a -> b $) (h2: $ b -> a $): $ a <-> b $ = '(iandi h1 h2);
local theorem ibid (h1: $ a -> b -> c $) (h2: $ a -> c -> b $): $ a -> (b <-> c) $ = '(iandd h1 h2);
local theorem biid: $ a <-> a $ = '(ibii id id);
local theorem mpbi (h1: $ a $) (h2: $ a <-> b $): $ b $ = '(bi1i h2 h1);
local theorem mpbir (h1: $ a $) (h2: $ b <-> a $): $ b $ = '(bi2i h2 h1);
local theorem con1b: $ (~a <-> b) -> (~b <-> a) $ = '(ibid (con1d bi1) (con2d bi2));
local theorem con2b: $ (a <-> ~b) -> (b <-> ~a) $ = '(ibid (con2d bi1) (con1d bi2));
local theorem con3b: $ (a <-> b) -> (~b <-> ~a) $ = '(ibid (con3d bi1) (con3d bi2));
local theorem con4b: $ (~a <-> ~b) -> (b <-> a) $ = '(ibid (con4d bi1) (con4d bi2));
local theorem notnot: $ a <-> ~~a $ = '(ibii notnot1 dne);
local theorem bicom: $ (a <-> b) -> (b <-> a) $ = '(ibid bi2 bi1);
local theorem bitr: $ (a <-> b) -> (b <-> c) -> (a <-> c) $ =
'(exp (ibid (syld (andwl bi1) (andwr bi1)) (syld (andwr bi2) (andwl bi2))));
local theorem sylib (h1: $ a -> b $) (h2: $ b <-> c $): $ a -> c $ = '(syl h1 (bi1i h2));
local theorem sylbi (h1: $ a <-> b $) (h2: $ b -> c $): $ a -> c $ = '(syl (bi1i h1) h2);

def or (a b: wff): wff = $ ~a -> b $;
infixl or: $\/$ prec 30;

local theorem orl: $ a -> a \/ b $ = 'absurdr;
local theorem orr: $ b -> a \/ b $ = 'ax_1;
local theorem eori (h1: $ a -> c $) (h2: $ b -> c $): $ a \/ b -> c $ =
'(casesd (a1i h1) (imim2i h2));
local theorem eord (h1: $ a -> b -> d $) (h2: $ a -> c -> d $):
$ a -> b \/ c -> d $ = '(com12 (eori (com12 h1) (com12 h2)));
local theorem eor: $ (a -> c) -> (b -> c) -> a \/ b -> c $ = '(exp (eord andl andr));

term wtru: wff; prefix wtru: $T.$ prec max;
axiom tru: $ T. $;
def fal: wff = $ ~T. $; prefix fal: $F.$ prec max;

local theorem trud (h: $ T. -> a $): $ a $ = '(h tru);
local theorem notfal: $ ~F. $ = '(notnot1 tru);
local theorem efal: $ F. -> a $ = '(absurd notfal);

sort nat;
term al {x: nat} (ph: wff x): wff; prefix al: $A.$ prec 30;

def ex {x: nat} (ph: wff x): wff = $ ~(A. x ~ph) $;
prefix ex: $E.$ prec 30;

term eq (a b: nat): wff; infixl eq: $=$ prec 50;

axiom ax_gen (ph: wff) {x: nat}: $ ph $ > $ A. x ph $;
axiom ax_4 {x: nat} (ph ps: wff x): $ A. x (ph -> ps) -> A. x ph -> A. x ps $;
axiom ax_5 {x: nat} (ph: wff): $ ph -> A. x ph $;
axiom ax_6 (a: nat) {x: nat}: $ E. x x = a $;
axiom ax_7 (a b c: nat): $ a = b -> a = c -> b = c $;
axiom ax_10 {x: nat} (ph: wff x): $ ~(A. x ph) -> A. x ~ (A. x ph) $;
axiom ax_11 {x y: nat} (ph: wff x y): $ A. x A. y ph -> A. y A. x ph $;
axiom ax_12 {x y: nat} (ph: wff y): $ A. y ph -> A. x (x = y -> ph) $;

local theorem alimi (a b: wff x) (h: $ a -> b $): $ A. x a -> A. x b $ = '(ax_4 (ax_gen h));
local theorem albii (a b: wff x) (h: $ a <-> b $): $ A. x a <-> A. x b $ =
'(ibii (alimi (bi1i h)) (alimi (bi2i h)));
local theorem alimd (a: wff) (b c: wff x) (h: $ a -> b -> c $):
$ a -> A. x b -> A. x c $ = '(syl (syl h ax_5) ax_4);
local theorem exim (a b: wff x): $ A. x (a -> b) -> E. x a -> E. x b $ =
'(syl (syl (alimi con3) ax_4) con3);
local theorem iex (a: wff x): $ a -> E. x a $ =
'(syl (syl (!! ax_5 y) ax_12) (mpi ax_6 exim));
local theorem alnex (a: wff x): $ A. x ~a <-> ~(E. x a) $ = 'notnot;
local theorem alex (a: wff x): $ A. x a <-> ~(E. x ~a) $ =
'(bitr (albii notnot) alnex);
local theorem exnal (a: wff x): $ E. x ~a <-> ~(A. x a) $ = '(con2b alex);
local theorem eal (a: wff x): $ A. x a -> a $ = '(ax_3 (sylib iex exnal));
local theorem iald (a: wff) (b: wff x) (h: $ a -> b $): $ a -> A. x b $ = '(syl ax_5 (alimi h));
local theorem eale (a: nat) (b: wff x) (c: wff)
(e: $ x = a -> (b <-> c) $): $ A. x b -> c $;

Mario

Mario Carneiro

unread,
Jul 21, 2019, 4:04:11 AM7/21/19
to metamath
It looks like the message got clipped. I will try once more, but you can view the full peano.mm1 file at https://github.com/digama0/mm0/blob/3df6ed5/examples/peano.mm1 .

local theorem mpi (h1: $ b $) (h2: $ a -> b -> c $): $ a -> c $ = '(mpd (a1i h1) h2);

Thierry Arnoux

unread,
Jul 21, 2019, 6:37:38 AM7/21/19
to meta...@googlegroups.com, Mario Carneiro

Hi Mario,

Like many I'm following your developments with great interest, but I have not started playing with it.

I've been enjoying a lot developing proofs in MM using MMJ2. What would be the future of that?

Do you expect that the tools in MM1 will be so advanced that people will prefer using MM1 instead of MM, or do you expect developments on set.mm (and iset.mm) to continue in the MM format?

Will it be possible to have developments made in MM1 flow back into set.mm?

In short, what would be the different development pipelines?

  • [MMJ2] MM -> MM0 -> publication in HOL/lean?
  • [mm0-hs] MM1 -> MM0 -> MM?

Thanks,

BR,
_
Thierry

vvs

unread,
Jul 21, 2019, 2:00:53 PM7/21/19
to Metamath
I've tried it and it seems promising indeed.

But the whole interface isn't looking very interactive right now. The only feedback that I can get is editor's pop-up while hovering mouse over a problematic token. And autocomplete isn't very helpful either yet. Of course, the project is in a very early stage and so this is expected. Are you planning an improved interface for the future and what are limitations of VSCode extension? Also Plan9 interface comes to mind where any text is in fact a widget.

Mario Carneiro

unread,
Jul 21, 2019, 6:04:34 PM7/21/19
to metamath
On Sun, Jul 21, 2019 at 6:37 AM Thierry Arnoux <thierry...@gmx.net> wrote:
I've been enjoying a lot developing proofs in MM using MMJ2. What would be the future of that?

mmj2 isn't going anywhere, you can still use it as always, and I plan to continue maintenance on the project (well my maintenance record hasn't been that great... I promise I will release a new version soon). But I have sensed some structural issues with continuing to improve mmj2, and it's quite slow, and being its own editor was a bad idea. So I probably won't be doing much more than bugfixes on mmj2; I'm looking for a more radical improvement that can't be done in the current codebase and that means a new project.

Do you expect that the tools in MM1 will be so advanced that people will prefer using MM1 instead of MM, or do you expect developments on set.mm (and iset.mm) to continue in the MM format?

That's the million dollar question isn't it. Assuming this all becomes wildly successful, there are three ways I could see metamath development moving to MM1:

1. MM1 gets a command saying "import the MM database up to theorem X", and then you write some MM1 theorems, and there is a compilation export format for MM. That means that you write the proofs in MM1 but the result is an MM snippet that you can put into set.mm.

2. The MM->MM0 conversion becomes pretty and lossless enough that we use it to do a wholesale conversion of set.mm to MM0, and development continues on set.mm0, and MM0 becomes the new lingua franca.

3. Similarly, MM->MM1 becomes good enough that we convert set.mm to MM1.

I think option 2 is very unlikely to happen unless MM0 becomes much more popular than MM, but could be a reasonable long term strategy. MM0 is well specified and has the same focus on multiple verifiers, so I would call it a moral successor to MM.

Option 3 has some serious drawbacks wrt compilation speed and the metamath philosophy. I've always said that metamath compiles fast because it stores proofs not proof scripts, but that's exactly what an MM1 file is. There is no built in limit on the complexity of tactics used in an MM1 file, but these can slow down the overall compilation significantly until we look like the lean and isabelle libraries where it takes half an hour to build the library. My advisor has suggested that caching should be used to improve this, but this is a new paradigm. I guess we have some experience with caching in the source tree (the discouraged file), but it could also be used in lieu of those big ASCII blobs in metamath files.

Will it be possible to have developments made in MM1 flow back into set.mm?

This would be option 1, which I think is the most likely short term strategy. It allows individual authors to "go MM1" without a groundshift in the community. I may need to have some kind of "metamath compatibility mode" to support this since the logic differs in a few ways and notations would have to be rendered differently, but I think it is doable.

In short, what would be the different development pipelines?

  • [MMJ2] MM -> MM0 -> publication in HOL/lean?
That's an interesting idea. So far while you can run this pipeline you won't be "giving back" to the HOL community this way - you aren't using any existing HOL theorems but rather producing your own from scratch.  Currently this requires a lot of "external" work to get the result into the right form on the target, but I think you could write a new theorem and then get it on the HOL side right away given the continuous nature of the pipeline. I would still call this a work in progress though - I'm not even sure if these other target systems are even interested in this kind of translation, how we can work this into their proofs and so on.
  • [mm0-hs] MM1 -> MM0 -> MM?
This is more explicitly supported, so I expect it to be fairly good quality as a translation. The formalization project I am working on is simply MM1 -> MM0 with no further translation to MM, although it should not be hard to support a general MM0 -> MM conversion that just drops some of the guarantees on bound variables and definitions. It probably won't look that great though without a lot of work, which is essential for "blending in" if you want to actually do your work in a separate system and translate all the results.

On Sun, Jul 21, 2019 at 2:00 PM vvs <vvs...@gmail.com> wrote:
I've tried it and it seems promising indeed.

But the whole interface isn't looking very interactive right now. The only feedback that I can get is editor's pop-up while hovering mouse over a problematic token.

Suggestions are welcome. So far the only thing I have implemented is diagnostics (the squiggles). Hovers (show me the statement of the theorem) and go-to-definition should be next. By the way, F8 (go to next error) will show you errors inline rather than futzing with hovers, which is good when the error printout is substantial.
 
And autocomplete isn't very helpful either yet.

I think a context unaware autocomplete should not be so hard, and we will have to see about the speed of the context aware autocomplete.
 
Of course, the project is in a very early stage and so this is expected. Are you planning an improved interface for the future and what are limitations of VSCode extension?

I can in principle do anything listed here:


Let me know if you see anything that would fit well with the proof authoring experience.

One bummer is that vscode doesn't support semantic syntax highlighting (https://github.com/Microsoft/vscode/issues/585), which would have been a great option for improving the syntax highlighting on math blocks and lisp blocks.

Another option which Lean uses but doesn't fall under any standard vscode feature is to have a "goal view" which pops up on the right side of the screen and basically shows you the result of (stat) continuously. To do this I would have to extend the vscode extension, which I am loath to do because it decreases the portability of the extension. Currently all the work is being done from mm0-hs as a backend server and communication goes via the language server protocol, which means that it should be easy to set up a similarly featureful editor mode for any other LSP supporting editor, such as emacs.
 
Also Plan9 interface comes to mind where any text is in fact a widget.

Could you elaborate on this? I haven't used Plan9, what's the interaction story there?

Mario

vvs

unread,
Jul 21, 2019, 6:54:08 PM7/21/19
to Metamath
On Monday, July 22, 2019 at 1:04:34 AM UTC+3, Mario Carneiro wrote:
I can in principle do anything listed here:


Let me know if you see anything that would fit well with the proof authoring experience.

It seems that text formatting could be useful to provide some visual feedback.

Another option which Lean uses but doesn't fall under any standard vscode feature is to have a "goal view" which pops up on the right side of the screen and basically shows you the result of (stat) continuously. To do this I would have to extend the vscode extension, which I am loath to do because it decreases the portability of the extension. Currently all the work is being done from mm0-hs as a backend server and communication goes via the language server protocol, which means that it should be easy to set up a similarly featureful editor mode for any other LSP supporting editor, such as emacs.

Sure, that's reasonable. I think that using VSCode's webview should also be ruled out as non-portable. Though, I can't help but think about elisp integration.

Also Plan9 interface comes to mind where any text is in fact a widget.

Could you elaborate on this? I haven't used Plan9, what's the interaction story there?

In Plan 9 everything is a server, so its text editor Acme is a server which can interpret ranges of text as a message. So, you could just type something like "Open" in one of its buffers and use special mouse click to send it as a message to Acme server which opens a file. In fact the idea comes from Oberon and is described here. But I don't know if that's possible with LSP.

Mario Carneiro

unread,
Jul 21, 2019, 8:08:42 PM7/21/19
to metamath
On Sun, Jul 21, 2019 at 6:54 PM vvs <vvs...@gmail.com> wrote:
On Monday, July 22, 2019 at 1:04:34 AM UTC+3, Mario Carneiro wrote:
I can in principle do anything listed here:


Let me know if you see anything that would fit well with the proof authoring experience.

It seems that text formatting could be useful to provide some visual feedback.

What do you mean? Are you talking about an MM1 code formatter (i.e. fixing spacing and indentation)? This is actually probably going to happen, because MM1 needs a formatter anyway since it outputs MM0 files and they have to be readable. There is already an expression pretty printer that displays subproof statements in the squiggles on "_" holes. (Try getting a big expression to print, over 80 chars, and it should do indentation and alignment and all that.)
 

Another option which Lean uses but doesn't fall under any standard vscode feature is to have a "goal view" which pops up on the right side of the screen and basically shows you the result of (stat) continuously. To do this I would have to extend the vscode extension, which I am loath to do because it decreases the portability of the extension. Currently all the work is being done from mm0-hs as a backend server and communication goes via the language server protocol, which means that it should be easy to set up a similarly featureful editor mode for any other LSP supporting editor, such as emacs.

Sure, that's reasonable. I think that using VSCode's webview should also be ruled out as non-portable. Though, I can't help but think about elisp integration.

I recall back in the days before the lean goal view, it would instead post the goal view to "output", which is a command line style streaming view. You just put a "-------------------" before the goal statement, and post the goal every time the user types a character, and bam you've got a live updating view. It's a really low tech option which makes it much more portable.
 
Also Plan9 interface comes to mind where any text is in fact a widget.

Could you elaborate on this? I haven't used Plan9, what's the interaction story there?

In Plan 9 everything is a server, so its text editor Acme is a server which can interpret ranges of text as a message. So, you could just type something like "Open" in one of its buffers and use special mouse click to send it as a message to Acme server which opens a file. In fact the idea comes from Oberon and is described here. But I don't know if that's possible with LSP.

Okay, after this and some research I have a sense of what this looks like. But how would you use something like this in MM1? What kind of interactions are you considering? Something that comes close to this is the "code action" feature. If you type the right text (something that the LSP server decides deserves a code action), vscode will pop up a lightbulb next to the text, and if you click on it you can perform some set of actions. Usually this is used in programming languages for doing refactoring operations: you click on a variable name, the lightbulb pops up, you find "export to method" or similar in the dropdown and click on it, and the server updates the content of the file with the result of the refactoring.

Notice that this is changing the text area itself. While I think that excessive programmatic changing of the text area as in mmj2 is problematic, local changes can be used to simulate many of the things that mmj2 does. So for instance you could inline a tactic: Let's say (equality) is a tactic for discharging equality proofs. If you call it directly, then it proves the equality goal, but the lightbulb appears on it, and you can click the "inline tactic" button and it replaces "(equality)" in the file with "'(eqeq1d (oveq1d (oveq2)))" or whatever the equivalent direct proof would be. This is not always a valid move, for instance if the lisp program also has some side effects like adding new definitions and so on, but I imagine that many tactics will fit this mode of operation, and as long as the user is in the driver's seat here they can choose to use it only when it is applicable.

vvs

unread,
Jul 22, 2019, 7:13:54 AM7/22/19
to Metamath
On Monday, July 22, 2019 at 3:08:42 AM UTC+3, Mario Carneiro wrote:
What do you mean? Are you talking about an MM1 code formatter (i.e. fixing spacing and indentation)?

This and may be it could change some text also, e.g. substitute placeholders during unification. I don't know. I can even imagine that it could be used as a control event to apply specific tactics to part of a theorem.

Okay, after this and some research I have a sense of what this looks like. But how would you use something like this in MM1? What kind of interactions are you considering?

I don't have an exact answer, but I'm considering a session in a proof assistant as an exploration of alternatives. So, applying tactics to a batch of theorems at once wouldn't be very helpful in that particular case. Writing tactic for a specific theorem seems too cumbersome if I could just do it directly. In ideal world one should simply point to one of the options and apply it to the theorem in question to get a feedback, then either proceed to another iteration or undo and return to a previous state. Also, visual clues are very important. I can't see too much if my workspace is cluttered with irrelevant information. In fact I might want to concentrate on different parts at different times, e.g. I can select different premises and conclusions to apply deduction rules or unification. I've tried this in mmj2 and found that there is not much feedback, but unification and derivation features were most useful.
 
This is not always a valid move, for instance if the lisp program also has some side effects like adding new definitions and so on, but I imagine that many tactics will fit this mode of operation, and as long as the user is in the driver's seat here they can choose to use it only when it is applicable.

Exactly. I find it inconvenient when the system is doing something unexpected and I want to examine it first and then make a decision myself.

vvs

unread,
Jul 22, 2019, 3:50:56 PM7/22/19
to Metamath
On Monday, July 22, 2019 at 3:08:42 AM UTC+3, Mario Carneiro wrote:
Something that comes close to this is the "code action" feature. If you type the right text (something that the LSP server decides deserves a code action), vscode will pop up a lightbulb next to the text, and if you click on it you can perform some set of actions. Usually this is used in programming languages for doing refactoring operations: you click on a variable name, the lightbulb pops up, you find "export to method" or similar in the dropdown and click on it, and the server updates the content of the file with the result of the refactoring.

BTW, after reading some more docs on LSP the closest thing that resembles the above concept seems to be CodeLens, though both could be used to do cool things in proof assistant. There are plenty language servers out there that support all this functionality in creative ways and are compatible with vscode, emacs and vim.

Mario Carneiro

unread,
Jul 22, 2019, 8:00:55 PM7/22/19
to metamath
On Mon, Jul 22, 2019 at 7:13 AM vvs <vvs...@gmail.com> wrote:
Writing tactic for a specific theorem seems too cumbersome if I could just do it directly.

That depends on the tactic, and the theorem. The point of tactics is that they automate proof writing, and the reason the tactic language is commingled with the definitions and proofs is because some tactics are domain specific. For example, the arithmetic algorithm relies on the definition of +, decimal numbers and so on, and a set of theorems used as building blocks. I have been burned before trying to separate the two, because development of the tactic may require changes to the theorems and vice versa, and the result is a maintenance headache. (Consider the recent mmj2 breakage caused by renaming "set" to "setvar". If the definition checker was inline with set.mm that wouldn't have happened.)
 
In ideal world one should simply point to one of the options and apply it to the theorem in question to get a feedback, then either proceed to another iteration or undo and return to a previous state. Also, visual clues are very important. I can't see too much if my workspace is cluttered with irrelevant information. In fact I might want to concentrate on different parts at different times, e.g. I can select different premises and conclusions to apply deduction rules or unification. I've tried this in mmj2 and found that there is not much feedback, but unification and derivation features were most useful.

In MM1 the primary interaction mode is just using a "refine" proof (automatically called if you give a quoted lisp expression as the proof, like all the current proofs in peano.mm1). Here's a real interaction session I just had with MM1 for proving the equivalence relation theorems for class equality:

theorem eqsid: $ A == A $ =
'_;
 ^ |- A == A
'(ax_gen _);
         ^ |- ?a e. A <-> ?a e. A
'(!! ax_gen x _);
              ^ |- x e. A <-> x e. A
'(!! ax_gen x biid);

theorem eqscom: $ A == B -> B == A $ =
'(alimi _);
        ^ |- (?a e. A <-> ?a e. B) -> (?a e. B <-> ?a e. A)
'(!! x alimi _);
             ^ |- (x e. A <-> x e. B) -> (x e. B <-> x e. A)
'(!! x alimi bicom);

theorem eqstr: $ A == B -> B == C -> A == C $ =
'(syl _ ax_6);
        ^^^^ failed to unify: ?b -> ?c =?= E. ?e ?e = ?d
'(syl _ ax_4);
      ^ |- A == B -> A. ?a ((?a e. B <-> ?a e. C) -> (?a e. A <-> ?a e. C))
'(syl _ (!! ax_4 x));
      ^ |- A == B -> A. x ((x e. B <-> x e. C) -> (x e. A <-> x e. C))
'(syl (alimi _) (!! ax_4 x));
             ^ |- (x e. A <-> x e. B) -> (x e. B <-> x e. C) -> (x e. A <-> x e. C)
'(syl (!! alimi x _) ax_4);
                  ^ |- (x e. A <-> x e. B) -> (x e. B <-> x e. C) -> (x e. A <-> x e. C)
'(syl (!! alimi x eqtr) ax_4);
  ^^^ failed to unify:
      A == B -> B == C -> A == C
        =?=
      A. x ?f = ?g -> A. x ?g = ?h -> A. x ?f = ?h
'(syl (!! alimi x bitr) ax_4);

I would argue that the number of characters I have to type here is about the same as you would get with an mmj2 proof, but because the computer isn't constantly rewriting the text area I have more control and don't have to move the cursor all around the place.

(By the way, did you notice the automatic definition unfolding? Sweet!)
 
This is not always a valid move, for instance if the lisp program also has some side effects like adding new definitions and so on, but I imagine that many tactics will fit this mode of operation, and as long as the user is in the driver's seat here they can choose to use it only when it is applicable.

Exactly. I find it inconvenient when the system is doing something unexpected and I want to examine it first and then make a decision myself.

This is also an advantage of having a complete record of your interaction on the page so you can edit it as desired.

PS: mm0-hs now supports hover (show me the statement of the theorem) and go to definition.

vvs

unread,
Jul 23, 2019, 7:24:56 AM7/23/19
to Metamath
On Tuesday, July 23, 2019 at 3:00:55 AM UTC+3, Mario Carneiro wrote:
On Mon, Jul 22, 2019 at 7:13 AM vvs <vvs...@gmail.com> wrote:
Writing tactic for a specific theorem seems too cumbersome if I could just do it directly.

That depends on the tactic, and the theorem. The point of tactics is that they automate proof writing, and the reason the tactic language is commingled with the definitions and proofs is because some tactics are domain specific.

I suspect that my reasoning is too vague. I didn't argue against tactics per se and I just want to point out that they are redundant in case when I can just choose from several ready alternatives. Also reasoning about visible alternatives is much easier than about results of code execution.

In MM1 the primary interaction mode is just using a "refine" proof (automatically called if you give a quoted lisp expression as the proof, like all the current proofs in peano.mm1). Here's a real interaction session I just had with MM1 for proving the equivalence relation theorems for class equality:

theorem eqsid: $ A == A $ =
'_;
 ^ |- A == A
'(ax_gen _);
         ^ |- ?a e. A <-> ?a e. A
'(!! ax_gen x _);
              ^ |- x e. A <-> x e. A
'(!! ax_gen x biid);

And that's just fine as far as I'm concerned. The problem which bothers me in the first place is that there is no feedback on available alternatives. If I don't know the names of required theorems in advance then I don't have a tool to find them. And what I meant was that some form of a context menu would be nice so it was possible to click on the link and get a list of theorems suitable for unification with online docs as well. That's what CodeLens is good for.

I would argue that the number of characters I have to type here is about the same as you would get with an mmj2 proof, but because the computer isn't constantly rewriting the text area I have more control and don't have to move the cursor all around the place.

And that's good. But the point is that I have no control over unification and should know all the theorems in advance. In mmj2 I can get an interactive list of candidates. Also I don't have to put all the hypotheses there myself as they would be inferred if I chose so.

Exactly. I find it inconvenient when the system is doing something unexpected and I want to examine it first and then make a decision myself.

This is also an advantage of having a complete record of your interaction on the page so you can edit it as desired.

Right now I just have a "command line prompt" before me (well sort of) and I'm stumbled on what my choices actually are. Your examples at least show how it supposed to work but there is not enough interactive hints yet.

I hope that these are just temporary restrictions and you didn't take this discussion as a sign of discouragement. I really like how it goes and in what direction.

Mario Carneiro

unread,
Jul 23, 2019, 8:03:28 AM7/23/19
to metamath
On Tue, Jul 23, 2019 at 7:24 AM vvs <vvs...@gmail.com> wrote:
On Tuesday, July 23, 2019 at 3:00:55 AM UTC+3, Mario Carneiro wrote:
On Mon, Jul 22, 2019 at 7:13 AM vvs <vvs...@gmail.com> wrote:
Writing tactic for a specific theorem seems too cumbersome if I could just do it directly.

That depends on the tactic, and the theorem. The point of tactics is that they automate proof writing, and the reason the tactic language is commingled with the definitions and proofs is because some tactics are domain specific.

I suspect that my reasoning is too vague. I didn't argue against tactics per se and I just want to point out that they are redundant in case when I can just choose from several ready alternatives. Also reasoning about visible alternatives is much easier than about results of code execution.

That depends on the tactic as well. In my experience with lean users, most people have rather more difficulty dealing with the theorem-heavy proof style I use than calling a couple of reasonably well specified super-tactics. If the tactic is simple and predictable, then the flexibility of being a tactic means it applies in more contexts than a plain theorem would, which means you have less names to remember. In the right hands you can go very far with automation style proofs. Unfortunately I don't think those hands are mine, since I seem to have inherited a DIY attitude from metamath and don't make much essential use of tactics even when they are available.
 
In MM1 the primary interaction mode is just using a "refine" proof (automatically called if you give a quoted lisp expression as the proof, like all the current proofs in peano.mm1). Here's a real interaction session I just had with MM1 for proving the equivalence relation theorems for class equality:

theorem eqsid: $ A == A $ =
'_;
 ^ |- A == A
'(ax_gen _);
         ^ |- ?a e. A <-> ?a e. A
'(!! ax_gen x _);
              ^ |- x e. A <-> x e. A
'(!! ax_gen x biid);

And that's just fine as far as I'm concerned. The problem which bothers me in the first place is that there is no feedback on available alternatives. If I don't know the names of required theorems in advance then I don't have a tool to find them.

You are absolutely right that MM1 is not currently very helpful at suggesting theorem names. This style works for me because I live the metamath naming convention (I've adapted it slightly for peano.mm1 but it is heavily influenced by the metamath names), but we should be able to do better than that.
 
And what I meant was that some form of a context menu would be nice so it was possible to click on the link and get a list of theorems suitable for unification with online docs as well. That's what CodeLens is good for.

This is already on the todo list, probably for tomorrow: getting the list of all definitions so that symbol search works, and autocomplete. I intend to implement a scored unification system for the autocomplete so that you only see things that match the target, with deeper matches ranking higher (i.e. you don't always get ax-mp on top of the list like in mmj2 because that's not a very precise match), in addition to filtering based on the name. That all might be too computationally intensive to run continuously, but for a code action kind of thing it should be okay.
 
I would argue that the number of characters I have to type here is about the same as you would get with an mmj2 proof, but because the computer isn't constantly rewriting the text area I have more control and don't have to move the cursor all around the place.

And that's good. But the point is that I have no control over unification and should know all the theorems in advance. In mmj2 I can get an interactive list of candidates. Also I don't have to put all the hypotheses there myself as they would be inferred if I chose so.

Oh, that hadn't occurred to me. It's actually a bit tricky to use hypothesis information for unification with the lisp expression style, because the hypotheses come after the statement of the theorem. I guess you could write

(f (bar))
  ^

where (bar) is some subproof of A and the target type of (f (bar)) is B and f doesn't exist, and when you hit ctrl-space after the f it autocompletes with foo : A -> B using both pieces of information to distinguish it from other statements.


Exactly. I find it inconvenient when the system is doing something unexpected and I want to examine it first and then make a decision myself.

This is also an advantage of having a complete record of your interaction on the page so you can edit it as desired.

Right now I just have a "command line prompt" before me (well sort of) and I'm stumbled on what my choices actually are. Your examples at least show how it supposed to work but there is not enough interactive hints yet.

The language definitely needs documentation. I started writing a spec but I decided to delay it because the language is changing too fast and I didn't want to pin down any bad choices just yet. The major structure of the file is fairly stable, but the list of lisp primitives grows by the day. (The actual turing complete core of the language is already done, but there is an open ended list of hooks into the elaborator to enable useful tactic writing in lisp.)
 
I hope that these are just temporary restrictions and you didn't take this discussion as a sign of discouragement. I really like how it goes and in what direction.

Certainly not. I very much appreciate the encouragement and ideas. MM1 is very malleable right now so this is a good time to propose interaction features.


vvs

unread,
Jul 23, 2019, 9:02:14 AM7/23/19
to Metamath
On Tuesday, July 23, 2019 at 3:03:28 PM UTC+3, Mario Carneiro wrote:
In the right hands you can go very far with automation style proofs. Unfortunately I don't think those hands are mine, since I seem to have inherited a DIY attitude from metamath and don't make much essential use of tactics even when they are available.

I would argue that conventionally there could be actually two kinds of potential users: "programmers" and "mathematicians". "Programmers" feel comfortable when they constantly adjust their tools to better suit their personal requirements. And "mathematicians" (or "students" if you prefer) are just here to see proofs, they are not interested in writing the tools themselves. Depending on a particular audience you might get a different side of the opinion.

vvs

unread,
Jul 23, 2019, 11:31:05 AM7/23/19
to meta...@googlegroups.com
On Tuesday, July 23, 2019 at 3:00:55 AM UTC+3, Mario Carneiro wrote:
(By the way, did you notice the automatic definition unfolding? Sweet!)

What definition and where? MM1 or lisp? Can you give an example?

BTW, there are a few other things that seem inconsistent to me. Some variables have no hovers in math block, e.g. ps in ax_3. Also, variable names are not unified in hovers, so there could be ph->ps->ph in one place and b in another, e.g. in a1i. That means more mental calculation steps for user and is different from mmj2.

Another cosmetic problem: there is a MM0 traceback every time when some vscode settings are changed "haskell-lsp:no handler for. Object (fromList [("params",Object (fromList [("value",String "off")])),("method",String "$/setTraceNotification"),("jsonrpc",String "2.0")])".

David A. Wheeler

unread,
Jul 23, 2019, 4:37:48 PM7/23/19
to metamath, metamath
On Tue, 23 Jul 2019 08:03:14 -0400, Mario Carneiro <di....@gmail.com> wrote:
> I very much appreciate the encouragement and ideas. MM1 is
> very malleable right now so this is a good time to propose interaction
> features.

I've been following this with interest.

In the long term I think it's important to support AI, specifically machine learning approaches. Holophrasm is interesting, but it suffers from only being able to learn from existing theorems. I think the longer-term successful path for creating Metamath theorems using machine learning would use reinforcement learning or an approach similar to it as demonstrated by AlphaZero ( https://en.wikipedia.org/wiki/AlphaZero ).

To *support* this, there would need to be some sort of API that would at least let you (1) find current theorem state, and (2) allow you to apply/undo those options (and see the new state). Being able to find out "possible options" would be nice too.

--- David A. Wheeler

Mario Carneiro

unread,
Jul 23, 2019, 8:08:04 PM7/23/19
to metamath
On Tue, Jul 23, 2019 at 11:31 AM vvs <vvs...@gmail.com> wrote:
On Tuesday, July 23, 2019 at 3:00:55 AM UTC+3, Mario Carneiro wrote:
(By the way, did you notice the automatic definition unfolding? Sweet!)

What definition and where? MM1 or lisp? Can you give an example?

Here's a simple example:

def iff (a b: wff): wff = $ (a -> b) /\ (b -> a) $;
infixl iff: $<->$ prec 20;

theorem bi1: $ (a <-> b) -> a -> b $ = 'anl;

where

theorem anl: $ a /\ b -> a $;

In Metamath that proof would not work, because the goal is (a <-> b) -> a -> b, not  (a -> b) /\ (b -> a) -> a -> b. But MM0 has the notion of unfolding definitions built in, and refine will automatically unfold definitions in order to resolve a unification failure (in this case, and != iff). So that means that almost all definition unfolding is implicit, although you will be able to unfold explicitly too using a "change" tactic that says "this is my goal now, make sure it's the same as the current goal up to unfolding definitions".
 
BTW, there are a few other things that seem inconsistent to me. Some variables have no hovers in math block, e.g. ps in ax_3.

There was a bug in hover support for math blocks (note to self, don't binary search an array unless it's sorted), but it should be fixed in the latest version.

Also, variable names are not unified in hovers, so there could be ph->ps->ch in one place and b in another, e.g. in a1i. That means more mental calculation steps for user and is different from mmj2.

When you hover over a theorem statement, it displays (a pretty printed version of) the source statement, with the source variables. So in this case since ax_mp was stated with "ph" and "ps" you see it in the hover.

I'm thinking about adding a second hover that displays the "runtime type" of the assertion, i.e. what we actually got from unification. For example, if you hover over ax_1 in

theorem a1i (h: $ b $): $ a -> b $ = '(ax_1 h);

you would see

axiom ax_1 (ph ps: wff): $ ph -> ps -> ph $;
---------------------------
$ b -> a -> b $

and if you hover over syl in

theorem a1d (h: $ a -> b $): $ a -> c -> b $ = '(syl h ax_1);

you will see

theorem syl (a b c: wff): $ a -> b $ > $ b -> c $ > $ a -> c $;
---------------------------
$ a -> b $ > $ b -> c -> b $ > $ a -> c -> b $

This should go a long way to recovering a bit of the readability of mmj2 worksheets. (Another alternative for getting this style is a lisp command like (explode 'a1d) that will display the proof in an mmj2 like style, with steps and hypotheses and statements on every step.)

Another cosmetic problem: there is a MM0 traceback every time when some vscode settings are changed "haskell-lsp:no handler for. Object (fromList [("params",Object (fromList [("value",String "off")])),("method",String "$/setTraceNotification"),("jsonrpc",String "2.0")])".

I'm aware of this, but I haven't done anything about it because I am still debugging the LSP server and I want to know if I get any unknown commands from VSCode. "$/setTraceNotification" is a vscode-specific LSP notification that is not in the specification, but the spec does say that you should ignore $/ instructions you don't understand.

On Tue, Jul 23, 2019 at 4:37 PM David A. Wheeler <dwhe...@dwheeler.com> wrote:
there would need to be some sort of API that would at least let you (1) find current theorem state, and (2) allow you to apply/undo those options (and see the new state).  Being able to find out "possible options" would be nice too.

Hm, I'm starting to second guess a decision I made about metavariable representation now. Currently, metavariables are just mutable state, so they start out as "expecting type T" and end up as "assigned to term foo". This is done because metavariables can appear in many places, copied about by unification, and it is much more efficient to update only one location and have everything follow. But it makes undo tricky. The user can undo easily by just writing something else, but that reprocesses the whole file (for now) so it wouldn't be a good idea to do this in a tight loop (for example trying a hundred theorems to see which work). Lean uses a more functional style here, where tactics transform the state into a new state, and you can just restore the original state and all those unifications are undone, but it slows down straight line code, which is important for making full MM1 file checking fast. (It probably still won't be fast enough to check all of set.mm on each keypress, assuming it was all converted to a similarly structured MM1 file, but I would like saved MM1 proofs to not take much longer to run than the resulting MM0 proof takes to check.)

Instead, I think I will have (save-state) and (restore-state s) primitives so that you can implement call/cc and try/catch on the lisp side. That should allow this feature to not cause a performance penalty unless it's being used. (For the particular case of unifying random theorems from the library to see what sticks, I will have a separate, faster option using temporary metavariables. But I will defer on this discussion until I have a working prototype.)

Mario

vvs

unread,
Jul 24, 2019, 7:51:25 AM7/24/19
to Metamath
On Wednesday, July 24, 2019 at 3:08:04 AM UTC+3, Mario Carneiro wrote:
So that means that almost all definition unfolding is implicit, although you will be able to unfold explicitly too using a "change" tactic that says "this is my goal now, make sure it's the same as the current goal up to unfolding definitions".

Ah, so more automation just like applying ax_mp. That's what tactics are good for. I can imagine that some tactics could be written that will enable natural deduction or even sequent calculus proofs, though I don't know what should be done about user interface.

This should go a long way to recovering a bit of the readability of mmj2 worksheets. (Another alternative for getting this style is a lisp command like (explode 'a1d) that will display the proof in an mmj2 like style, with steps and hypotheses and statements on every step.)

I bet that in the end you will get almost as many lisp tactics as there are theorems and that means the same problem with finding their names. I think that UI should provide a good solution to explore everything in the library which is a big limitation in mmj2.

Mario Carneiro

unread,
Jul 24, 2019, 10:58:26 AM7/24/19
to metamath
On Wed, Jul 24, 2019 at 7:51 AM vvs <vvs...@gmail.com> wrote:
On Wednesday, July 24, 2019 at 3:08:04 AM UTC+3, Mario Carneiro wrote:
So that means that almost all definition unfolding is implicit, although you will be able to unfold explicitly too using a "change" tactic that says "this is my goal now, make sure it's the same as the current goal up to unfolding definitions".

Ah, so more automation just like applying ax_mp. That's what tactics are good for. I can imagine that some tactics could be written that will enable natural deduction or even sequent calculus proofs, though I don't know what should be done about user interface.

I've been thinking a lot about this, even from before I started on MM0. I would like to have a tactic state where I don't have to see that "ph ->" hanging about; instead I see all the things I can prove from my context (hypotheses, subproofs, components of the context if it's an explicit conjunction) as possible hypotheses to use, and my goal (without the ph ->), and the tactic handles routing all the simpl and adantr applications.

With MM1, I think this can be done. The main problem is that you have to have shims and layers over all the "normal" operations to make sure the abstraction doesn't leak. (stat) is just a regular function, so you could implement your own version of it; this handles the display problem. (refine) is also a regular function, although it gets called implicitly a lot. If you replace it with your own version, you can insert "syl" applications and such. But then hovers want info about what just happened, so you need hooks for that too.. It gets tricky to cover all the avenues.
 
This should go a long way to recovering a bit of the readability of mmj2 worksheets. (Another alternative for getting this style is a lisp command like (explode 'a1d) that will display the proof in an mmj2 like style, with steps and hypotheses and statements on every step.)

I bet that in the end you will get almost as many lisp tactics as there are theorems and that means the same problem with finding their names. I think that UI should provide a good solution to explore everything in the library which is a big limitation in mmj2.

The list of lisp definitions can be searched and indexed just as well as MM0 definitions. I was hoping to get to this today, but instead I ended up writing language documentation:


David A. Wheeler

unread,
Jul 24, 2019, 5:32:04 PM7/24/19
to metamath, metamath
On Wed, 24 Jul 2019 10:58:12 -0400, Mario Carneiro <di....@gmail.com> wrote:
> The list of lisp definitions can be searched and indexed just as well as
> MM0 definitions. I was hoping to get to this today, but instead I ended up
> writing language documentation:
>
> https://github.com/digama0/mm0/blob/master/mm0-hs/mm1.md

May I suggest supporting the syntax of at least simple neoteric expressions of
Scheme SRFI 105 ( https://srfi.schemers.org/srfi-105/srfi-105.html )?
This would mean the syntax reader would transform:
e(...) ⇒ (e ...)

This means that if you read an atom and it has a "(" without a space before it,
treat the preceding atom as the first element in a list. Examples:

cos(x) ⇒ (cos x), f(a b) ⇒ (f a b), exit() ⇒ (exit), and read(. options) ⇒ (read . options)

This means you can use Lisp/Scheme while using a notation that
looks a little more conventional.

I, at least, would prefer to work in that format.
That would let you write this:

(def (fact x) (if (= x 0) 1 (* x (fact (- x 1)))))

as:

def(fact(x) if(=(x 0) 1 *(x fact(-(x 1)))))

Also: May I suggest that "do blocks" use '['...']' or '('...')' instead?
It'd be nice to be able to use [] and {} for other purposes,
and it doesn't appear that curly braces have to be used for that singular case.
In particular, my "curly-infix" notation uses {}, and it'd be nice to be able
to add that later. In that notation "{..}" is a way to state a list in infix order,
so you can express the same thing as:

def(fact(x) if({x = 0} 1 {x * fact{x - 1}}))

--- David A. Wheeler

David A. Wheeler

unread,
Jul 24, 2019, 7:16:12 PM7/24/19
to metamath, metamath
On Tue, 23 Jul 2019 20:07:50 -0400, Mario Carneiro <di....@gmail.com> wrote:
> Hm, I'm starting to second guess a decision I made about metavariable
> representation now. Currently, metavariables are just mutable state...
> But it makes undo tricky.

I think having *some* way to undo is vitally important longer-term.
I suspect (save-state) and (restore-state s) would work, at least within tactics.

Of course, having *a* working prototype is more important than nothing at all :-).

--- David A. Wheeler

Mario Carneiro

unread,
Jul 24, 2019, 11:44:18 PM7/24/19
to metamath
As I knew you were the author of sweet s-expressions, I suspected this topic might come up eventually. ;) I have a collection of thoughts here, that don't reflect a particular argument in one way or another.

* I am deeply conflicted about whether to make the lisp syntax more complicated. Lisp is almost a poster boy for languages that say "we don't care about syntax".
* MM1 has two conflicting goals: Keep the language simple, and also make it easy to use. More precisely, ease of use doesn't come "out of the box" but it gives you the tools you need to make improvements to the language inside itself, so that you can make your own DSL.
* The one place where that doesn't actually hold true is in the syntax. But I would rather prefer a racket-like solution to the syntax improvement problem than baking in improvements to MM1, because I do not want MM1 to become a monolith. It should be possible to reimplement MM1 in another language, possibly with a different set of extensions. Every new feature in MM1 is weighed against the added cost of possible reimplementations.
* In case you haven't noticed, the syntax of MM0 and MM1 is heavily inspired by the lambda calculus, and it makes more sense in this context to treat f x as application rather than f(x).(Lisp is a slight departure from this since x and (x) are not the same, and both MM0 and lisp use n-ary application, so to some extent the use of notational tricks like "term foo: A > B > C;" is a forced analogy because currying doesn't actually work.)
* I personally don't find f(e1 e2) to be a major improvement of notation over (f e1 e2). I haven't used a language where function calls are written as f(x) since I stopped writing java. While this doesn't say much in the way of general language design, it makes a difference for a language for which I am the primary user, since I have a big goal and want to focus my efforts on things that will produce an actual productivity improvement.
* While I consider simple neoteric on the same level of "silly syntax" as removing semicolons, I think that curly infix has a greater possibility for readability improvements. I have never attempted to write a large program in lisp, but this is usually the first thing I notice when looking at other large programs.
* One other piece of scheme extension syntax I have seen is simply [] as synonyms for (). This is used in chicken scheme and racket, and it helps to break up the monotony of parentheses that makes lisp so notorious.
* I very much miss the notation f $ g x for f (g x) from lean and haskell, which also has a great capability for parenthesis reduction. Although $ is already taken, it wouldn't be hard to modify the parser so that (f x @ g y @ h z) translates to (f x (g y (h z))).
* I don't know how much readability of tactics actually matters though. It will never be readable enough unless you have a certain level of programming maturity anyway, and it should constitute a minority of code if it's being used correctly, so I'm comfortable with a bit of syntax ugliness here.
* As for the braces in "do" blocks, they are both unnecessary and don't interfere with the use of curly infix.
  * The whole point of bracket like delimiters is that they come in pairs, so there is no danger of confusing {x + y} with the {} of the do block. It already saw the open brace, and is looking for a close brace. So unless you think } x + y { is a good notation the usual parenthesis matching approach will work here.
  * The reason the braces in a do block are unnecessary is because the real delimiter is the semicolon (which is there to enable the simple parsing rule "every MM1 command ends in a semicolon"). It would work equally well to have "do (expr) (expr) ... (expr);" but indentation of that looks a bit weird and the braces help to make it stand out. It could equally well use () as delimiters, but this is far too likely to be misread as a lisp application: do (f x y z); would not be applying f to x y z but would be the sequence of four top level commands f, x, y, z.
* Did I mention that I've written far too many parsers recently? mm0-hs has no less than 6 parsers in it, plus a textmate grammar for the vscode extension. While I guess that makes me an expert, and I have no doubt that I could write a "proper" grammar for a programming language to make it look more like python or something, this was never my intent - I think the parser complication is already above budget. I'm still recovering from the horrible complexity of the lean parser, which is absolutely impossible to reimplement without implementing the entire program. (MM1 has a lot of unnecessary syntax which is there to make parsing easier. For example there is no need to enclose formulas in $$, but this makes for a clean separation of the static (context-free) parser and the dynamic parser. Similarly, none of the semicolons are necessary because all statements are self-closing, but it makes it easier to find the next statement in the event of a parser failure, for example if you are in the middle of typing.)

For now, syntax enhancements are a low priority, when there are more substantive gains to be had w.r.t. searchability and unification. But I will let others cast their votes on these topics.

Mario
--
You received this message because you are subscribed to the Google Groups "Metamath" group.
To unsubscribe from this group and stop receiving emails from it, send an email to metamath+u...@googlegroups.com.
To view this discussion on the web visit https://groups.google.com/d/msgid/metamath/E1hqOri-00043j-TG%40rmmprod07.runbox.

Mario Carneiro

unread,
Jul 25, 2019, 2:41:20 AM7/25/19
to metamath
...that said, implementations are cheap. MM1 supports [e1 e2] lists and {x + y} curly infix now (not neoteric though).

David A. Wheeler

unread,
Jul 25, 2019, 1:00:01 PM7/25/19
to metamath, metamath
On Wed, 24 Jul 2019 23:44:05 -0400, Mario Carneiro <di....@gmail.com> wrote:
> As I knew you were the author of sweet s-expressions, I suspected this
> topic might come up eventually. ;)

Guilty as charged :-).

> * I am deeply conflicted about whether to make the lisp syntax more
> complicated. Lisp is almost a poster boy for languages that say "we don't
> care about syntax".

Syntax isn't the most important thing, but good syntax helps readability/modifiability
by others, and that *does* matter.

> * I personally don't find f(e1 e2) to be a major improvement of notation
> over (f e1 e2). I haven't used a language where function calls are written
> as f(x) since I stopped writing java... it makes a difference for a language for
> which I am the primary user

The f(...) function call syntax is really widely used in programming languages
(e.g., C, C++, Java, C#, JavaScript, Python, Ruby, PHP, Perl, Rust, Fortran, Ada).
That makes it familiar, and familiarity helps.
I imagine you want to eventually be one of many.

> * I very much miss the notation f $ g x for f (g x) from lean and haskell...

I'll note that sweet-expressions has '$' :-).

On Thu, 25 Jul 2019 02:41:07 -0400, Mario Carneiro <di....@gmail.com> wrote:
> ...that said, implementations are cheap. MM1 supports [e1 e2] lists and
> {x + y} curly infix now (not neoteric though).

Cool! Thanks for adding basic curly infix!

It's your call of course, but I thought I'd put in a kibitz.
At the least, I want to encourage you to make choices
so that they *could* be easily added later if desired.

--- David A. Wheeler

David A. Wheeler

unread,
Jul 25, 2019, 4:04:02 PM7/25/19
to metamath, metamath
On Wed, 24 Jul 2019 10:58:12 -0400, Mario Carneiro <di....@gmail.com> wrote:
> The list of lisp definitions can be searched and indexed just as well as
> MM0 definitions. I was hoping to get to this today, but instead I ended up
> writing language documentation:
> https://github.com/digama0/mm0/blob/master/mm0-hs/mm1.md

It looks like comments-to-end-of-line begin with "--".
In most Lisps they start with ";"... why the difference?

--- David A. Wheeler

Mario Carneiro

unread,
Jul 25, 2019, 4:23:39 PM7/25/19
to metamath
The use of -- line comments in MM1 comes from MM0, which comes from Lean, which comes from Haskell. The primary syntax of MM1 is not lisp - rather, it has embedded lisp expressions in certain places and an MM0-like outer syntax. I originally considered just using pure lisp syntax for MM1 for the whole file, i.e. you might write something like:

(sort 'wff '(provable))
(term 'imp '(wff wff) 'wff)
(infixr 'imp "->" 50)
(axiom 'ax_1 '(((ph ps) wff)) "ph -> ps -> ph")
(axiom 'ax_2 '(((ph ps ch) wff)) "(ph -> ps -> ch) -> (ph -> ps) -> ph -> ch")
(axiom 'ax_mp '(((ph ps) wff) (_ "ph") (_ "ph -> ps")) "ps")
(theorem 'a1i '(((ph ps) wff) (h "ph")) "ps -> ph" '(ax_mp h ax_1)) 

You can see how it would go. I decided that syntactic similarity would help people transfer familiarity from one language to the other, which is important because they complement each other - if you are sold on MM0 then you will want MM1 to write proofs, and if you like MM1 then you will want to be able to read the presented MM0 proofs.

But as I mentioned in the other thread, I don't want to create a monolith here, and I encourage others to take these ideas and reinvent them. If you think that lisp-MM1 is better, then be my guest. As a scripting language for producing MM0 proofs, MM1 is in no way unique in the design space. Literally anything that can write a file can write an MM0 proof. The haskell implementation is simply so that I have something that I can use.

--
You received this message because you are subscribed to the Google Groups "Metamath" group.
To unsubscribe from this group and stop receiving emails from it, send an email to metamath+u...@googlegroups.com.

Mario Carneiro

unread,
Jul 25, 2019, 4:50:02 PM7/25/19
to metamath
On Thu, Jul 25, 2019 at 1:00 PM David A. Wheeler <dwhe...@dwheeler.com> wrote:
On Wed, 24 Jul 2019 23:44:05 -0400, Mario Carneiro <di....@gmail.com> wrote:
> * I personally don't find f(e1 e2) to be a major improvement of notation
> over (f e1 e2). I haven't used a language where function calls are written
> as f(x) since I stopped writing java... it makes a difference for a language for
> which I am the primary user

The f(...) function call syntax is really widely used in programming languages
(e.g., C, C++, Java, C#, JavaScript, Python, Ruby, PHP, Perl, Rust, Fortran, Ada).
That makes it familiar, and familiarity helps.
I imagine you want to eventually be one of many.

MM0 is coming from the tradition of another language family: Lisp, ML, Haskell, Elm, Idris, Coq, Agda, Lean. These language communities are inspired by Church's lambda calculus model of stateless computation, leading to functional programming, as distinct from the Von Neumann style of "model the machine as a finite state machine with an addressable memory" that lead to BCPL, B, and C, later augmented with object oriented style in C++ and Java. Most of the rest are some combination of these, without any firm position (Ada and Rust are exceptions).

After spending years in formal theorem proving, looking back at programming languages with an eye for correctness, they are to my eyes almost all abhorrent monstrosities which have long since diverged from anything approaching a formal semantics. C and C++ are trying valiantly to write it all down, and as a result we can look at the thousands of pages of standardese to see quite plainly how monstrous they are (C++ of course being a much worse offender). Lisp was the first major language to say "you can have a simple and correct programming language that is also practical", and I want to get back to that ideal. Ultimately, the goal is not MM0 - it is whatever language compilers we can build from verified components in MM0.
 
Mario

vvs

unread,
Jul 25, 2019, 5:16:57 PM7/25/19
to Metamath
On Thursday, July 25, 2019 at 11:50:02 PM UTC+3, Mario Carneiro wrote:
MM0 is coming from the tradition of another language family: Lisp, ML, Haskell, Elm, Idris, Coq, Agda, Lean. These language communities are inspired by Church's lambda calculus model of stateless computation, leading to functional programming, as distinct from the Von Neumann style of "model the machine as a finite state machine with an addressable memory" that lead to BCPL, B, and C, later augmented with object oriented style in C++ and Java. Most of the rest are some combination of these, without any firm position (Ada and Rust are exceptions).

As we are now venturing into programming languages ancestry I just can not resist temptation to put my two cents. Ada should not be considered an exception because it was heavily influenced by Pascal which was influenced by Algol W which was influenced by Algol 60. CPL was influenced by Algol while C++ was influenced by Algol 68 among others. So all those languages could be considered as Algol-like which in turn were influenced by Fortran.

This is just a small nitpick and the above quote is essentially correct.

Mario Carneiro

unread,
Jul 26, 2019, 4:06:56 AM7/26/19
to metamath
You are right, I should probably credit Algol 60 and Algol 68 for that line of languages, and Ada and Rust are certainly both members of that family. The "exception" that I refer to is that unlike the others they have attempted to make strong stances on "safety", which boils down to some program correctness claim. I think they are both a bit too sprawling as a result of industry use to make this correctness claim actually air-tight, but at least they are trying, and raising awareness about program verification techniques as a result.

vvs

unread,
Jul 26, 2019, 8:57:21 AM7/26/19
to Metamath
On Thursday, July 25, 2019 at 11:50:02 PM UTC+3, Mario Carneiro wrote:
After spending years in formal theorem proving, looking back at programming languages with an eye for correctness, they are to my eyes almost all abhorrent monstrosities which have long since diverged from anything approaching a formal semantics. C and C++ are trying valiantly to write it all down, and as a result we can look at the thousands of pages of standardese to see quite plainly how monstrous they are (C++ of course being a much worse offender). Lisp was the first major language to say "you can have a simple and correct programming language that is also practical", and I want to get back to that ideal. Ultimately, the goal is not MM0 - it is whatever language compilers we can build from verified components in MM0.

In this context you could build a different taxonomy, where there are Lisp, Forth, APL, Basic and Smalltalk at the one end of spectrum and Ada, C++ and Haskell at another. This won't be based on lambda calculus vs. Turing machine, but rather on syntax simplicity. Wasn't that the initial argument?

Giovanni Mascellani

unread,
Sep 25, 2019, 4:48:18 AM9/25/19
to meta...@googlegroups.com
Hi,

Il 19/05/19 10:31, Mario Carneiro ha scritto:
> I've also started working on mm0-c and the binary file format. I don't
> have anything specific to share yet, but one interesting feature is that
> the format will serve as read-only memory for the verifier itself, so
> that I think I can achieve verification with O(1) read/write memory (no
> stack! no heap!) and linear time checking (not exactly streaming, but
> with limited random access) so that it really can run in low memory
> contexts. The basic idea is to represent the stack as a linked list and
> embed it in the proof stream itself. Because the stack changes over
> time, it actually appears as a forest instead of a linked list. Each
> proof step contains these stack backreferences in addition to a possible
> heap backreference for actual "Z" steps.

Are there news about the binary format and the C verifier? I tried it
today, but I can make little sense to what it does. In particular, it
gives no feedback and it doesn't seem to read the MM0 file at all.

Thanks, Giovanni.
--
Giovanni Mascellani <g.masc...@gmail.com>
Postdoc researcher - Université Libre de Bruxelles

signature.asc

Antony Bartlett

unread,
Sep 25, 2019, 5:32:03 AM9/25/19
to meta...@googlegroups.com
Il 19/05/19 10:31, Mario Carneiro ha scritto:
> I've also started working on mm0-c and the binary file format. I don't
> have anything specific to share yet, but one interesting feature is that
> the format will serve as read-only memory for the verifier itself, so
> that I think I can achieve verification with O(1) read/write memory (no
> stack! no heap!) and linear time checking (not exactly streaming, but
> with limited random access) so that it really can run in low memory
> contexts. The basic idea is to represent the stack as a linked list and
> embed it in the proof stream itself. Because the stack changes over
> time, it actually appears as a forest instead of a linked list. Each
> proof step contains these stack backreferences in addition to a possible
> heap backreference for actual "Z" steps.

Sorry, I must have missed this the first time round.

So proofs that are notorious resource hogs like the Four Color Theorem and the Kepler Conjecture would "just" be very very large files?  That's exciting!

But I suppose at that scale streaming becomes important because ideally you'd want an (untrusted) program generating the proof and a verifier checking it, without ever having to store a proof as large as Flyspeck on disk?

    Best regards,

        Antony

--
You received this message because you are subscribed to the Google Groups "Metamath" group.
To unsubscribe from this group and stop receiving emails from it, send an email to metamath+u...@googlegroups.com.

Giovanni Mascellani

unread,
Sep 25, 2019, 8:05:46 AM9/25/19
to meta...@googlegroups.com
Il 25/09/19 11:31, Antony Bartlett ha scritto:
> So proofs that are notorious resource hogs like the Four Color Theorem
> and the Kepler Conjecture would "just" be very very large files?  That's
> exciting!

The proofs would already be very large files, wouldn't they (except that
nobody has coded them in Metamath yet, to my knowledge)? I think the
point made by Mario is that it would be nice to have a verifier running
in constant RAM space and taking a streaming input, which is something
that does not happen currently with Metamath (because the verifier has
to store all defined assertions and symbols to verify subsequent
theorems). I guess Mario's idea is that the readonly input already has
all the information about the defined assertions and symbols, so the
verifier just have to check them and then it can read them again. This
last bit is probably the part that makes in non streaming, and I wonder
if there is a way around.

> But I suppose at that scale streaming becomes important because ideally
> you'd want an (untrusted) program generating the proof and a verifier
> checking it, without ever having to store a proof as large as Flyspeck
> on disk?

The wild dream here (or, at least, my wild dream), if a real streaming
verification can be done, is to implement it within the CPU cache at
computer boot time, so that it can be ran even before the BIOS
initializes the dynamic RAM (so you don't have to depend on the
untrusted BIOS code). Of course this thing has a lot of technical
difficulties, including finding a way to actually stream the database in
and the result out (maybe using a serial port or GPIO, which might be
reasonably easy to initialize even without running the BIOS). Take it as
a dream, nothing more.

Incidentally, one of the results of the verification might be the
generation of code to start initializing the machine. This would
implement trusted execution of verified code from the firmware itself.
Even more of a dream.

What is reasonably attainable in the short term, and the reason why I
was asking Mario for updates on his C verifier, is to do verification at
kernel time (i.e., after BIOS has run, but before an operating system is
loaded). I already have a good part of what is needed for this, but need
to understand the C verifier before.
signature.asc

Mario Carneiro

unread,
Sep 25, 2019, 1:26:23 PM9/25/19
to metamath
On Wed, Sep 25, 2019 at 4:48 AM Giovanni Mascellani <g.masc...@gmail.com> wrote:
Hi,

Il 19/05/19 10:31, Mario Carneiro ha scritto:
> I've also started working on mm0-c and the binary file format. I don't
> have anything specific to share yet, but one interesting feature is that
> the format will serve as read-only memory for the verifier itself, so
> that I think I can achieve verification with O(1) read/write memory (no
> stack! no heap!) and linear time checking (not exactly streaming, but
> with limited random access) so that it really can run in low memory
> contexts. The basic idea is to represent the stack as a linked list and
> embed it in the proof stream itself. Because the stack changes over
> time, it actually appears as a forest instead of a linked list. Each
> proof step contains these stack backreferences in addition to a possible
> heap backreference for actual "Z" steps.

Are there news about the binary format and the C verifier?

Indeed, the C verifier has progressed significantly since that message. I worked with the above proposal for a while (with O(1) read/write memory), but eventually decided to revert to a more traditional metamath like architecture with an explicitly maintained stack. Storing expressions in situ in the proof requires currently at least 8 bytes for the derived type information (containing the set of variables that the expression depends on and the sort) which is a lot of space per instruction, even though this information is derivable by the verifier. There isn't much win for writing something in the file where you already know what's going to be there, if the way to verify it is to calculate the result yourself and equality test it. In this case you may as well calculate the result yourself and store it in trusted memory (verifier internal state), saving the trouble of reading more bytes on what is basically an IO bound computation. Additionally, the scheme for putting the stack in situ causes it to very often be widely dispersed in the proof, connected as a linked list. This leads to poor cache locality and lots of random access by comparison to storing a small array stack in trusted memory.

In the current scheme, the verifier maintains the following data structures:

1. The store is a byte buffer that contains data for all expressions that have been seen in the proof. This is where most space is needed, but even on set.mm it is much less than 1MB at peak usage. The allocation strategy here is very simple: allocate the next bit of data on the end and bump the store size; if the store runs out then fail (recompile to increase the limit). When a proof is completed the entire store is dropped. (This data is write-once, so in principle this could be stored in the proof stream in low RW memory environments.)
2. The heap is an array of expressions (pointers into the store). This manages numbered backreferences; we can't let the untrusted input point directly into the store because allocations have different sizes and we can't check if a pointer is correct, so instead we store heap indexes in the untrusted input and use a range check for validity. (This is also write-once.)
3. The stack is an array of expressions (pointers into the store). Most instructions operate on the stack top, and the pointers here keep everything fixed size. (Actually, there is one kind of stack element that is double-wide, but you can always look at the top of the stack and find out if there is more data just below it.)

Unlike metamath, which has essentially only two instructions, let's call them "thm" (a numbered step) and "save" (the Z step), there are many different opcodes in MM0B. The instruction stream is variable length byte aligned, containing an opcode byte followed by a 0,1,2,4 byte data field. The top two bits of the opcode byte say how long the data field is, and the other six bits give the opcode itself. (See types.c for a complete list of opcodes.) An interesting one is the "Thm" opcode. Given a data value n, it pops an expression e from the stack (the expression of the theorem to be proved), then looks up the theorem thms[n] to find out how many variable arguments (not hypotheses) it takes and pops them from the stack. It then calls the unifier for thms[n] against the expression e.

A new concept in the verifier is that of a unifier for a theorem. This is basically a way to write the statement of the theorem - the hypotheses and conclusion - in a way that makes matching against it easy. It is also a sequence of opcodes but the expressions are represented in *prefix* polish notation instead of postfix RPN. The reason for this is because when you want to check a theorem matches a claim you have to work from the outside in. For clarity, it uses a disjoint set of opcodes (named CMD_UNIFY_* in types.c) but they need not actually be disjoint from the CMD_PROOF_* opcodes because they appear in different places. The unifier maintains the following additional data structures:

4. The unify heap is an array of expressions (pointers into the store). You should think of this as the variable substitution, in that variable n is being replaced with the expression uheap[n]. Interestingly, there are opcodes that can grow the heap even after it is initialized with the expressions popped by the "thm" opcode; this is used to allow highly redundant expressions in a theorem statement to be efficiently matched.
5. The unify stack is an array of expressions (pointers into the store). These are expressions that will be matched and deconstructed by the unify opcodes. Importantly, unification never allocates new expressions to the store - the only way new allocations happen is through the CMD_PROOF_TERM(_SAVE) opcode. This means that the user (the oracle that produces the proof file) always has the ability to ensure that equal expressions are pointer-equal, so unification can do pointer equality instead of a structural check.

For example, suppose we want to prove |- (1+1)+(1+1) given theorem foo: |- x + x. (Ignore that this makes no sense.) Suppose that c1 is "1" and cadd is "+" as a binary function. Then as assembly, the unify script for foo looks like:

foo:             # v0 + v0
  UTerm cadd     # v0, v0
  URef 0         # v0
  URef 0         #

and the proof looks like:

Term c1          # 1
Save             # 1        1 => heap[0]
Ref 0            # 1, 1
Term cadd        # 1 + 1
Save             # 1 + 1    (1+1) => heap[1]
Ref 1            # 1 + 1, 1 + 1
Ref 1            # 1 + 1, 1 + 1, 1 + 1
Term cadd        # 1 + 1, (1+1) + (1+1)
Thm foo          # |- (1+1) + (1+1)

In the unify script, each line is annotated with the state of the unify stack after executing that instruction. In the application "Thm foo", the unify stack will start out with the singleton (1+1) + (1+1), and the unify heap contains the substitution v0 := 1+1, and the unify script is responsible for deconstructing the unify stack until it is empty. The UTerm instruction pops (1+1)+(1+1), ensures that the head symbol is +, and pushes the arguments, which is why (1+1) ends up on the stack twice. Then the two URef 0 steps check that uheap[0] = (1+1) via pointer equality.

The proof script contains Save steps after every subterm that will be reused, in much the same way as metamath uses "Z" steps. In this case this is the expressions heap[0] = (1) and heap[1] = (1+1). Later Ref steps refer to these. (In this case there are no variables in the proof, but if there were the heap would be initialized with them, so Ref is also used to refer to variables.) Because there are only three Term calls in this code, the store contains three cells like so:

0: c1()
16: cadd(0, 0)
40: cadd(16, 16)

The offsets depend on the size of the cells. Here 0 stores the expression (1), 16 stores (1+1), 40 stores (1+1)+(1+1), and all other pointer values are invalid.

This is all just about making metamath style checking asymptotically efficient (and although I have not done so formally (yet), I believe I can prove that verification is O(mn) where m is the length of the longest theorem statement, so quadratic worst case but only in the case that you have extremely long statements that take up half the file). To do MM0 checking, it also has to support definitional unfolding, which it does through a convertibility judgment. Essentially, there is a "=" operator built into the language, and if you can prove a = b and |- a, then |- b. To make conversion checking less verbose, it is actually inverted, in the sense that the verifier puts convertibility obligations on the stack and has to discharge them before it can get back to proving regular theorems. The rules look like this:

Conv: [e1, |- e2] -> [|- e1, e1 =?= e2]
Refl: [e =?= e] -> []
Symm: [e1 =?= e2] -> [e2 =?= e1]
Cong: [t e1 ... en =?= t e1' ... en'] -> [en =?= en, ..., e1 =?= e1]
Unfold: [t e1 ... en, e] -> check that t is a definition and execute the unifier for t against e with substitution (e1,...,en). The unifier script may contain UDummy opcodes that pop additional elements from the stack.
  Then run [t e1 ... en =?= e'] -> [e =?= e']
ConvCut: [e1, e2] -> [e1 = e2, e1 =?= e2]
ConvRef n: [e1 =?= e2] -> [], where heap[n] = (e1 = e2)

Here the notation [a,b,c] -> [d,e] means pop c, pop b, pop a, push d, push e on the stack. Notice that none of the convertibility rules require a data argument except ConvRef (which is optional, only needed for referencing saved conversions), because the expressions on the stack say already what needs to be done. In fact, it's possible to get away without any convertility opcodes at all and just trigger conversion on a mismatch (which is what MM1 does, as well as dependent type theory), but I think it violates the explicitness goals, and furthermore it could cause uncontrolled allocation (allocations not defined by a Term) which would make pointer equality testing incomplete.

I tried it today, but I can make little sense to what it does. In particular, it
gives no feedback and it doesn't seem to read the MM0 file at all.

The current version of mm0-c is only half done, since as you have noted it ignores the MM0 file. I wanted to get some relevant performance data on it with only the binary checking part enabled for my ITP talk (and it is looking pretty good: my best performance numbers say 500 ms for set.mm and 5 ms for peano.mm1). This will get slightly worse once the MM0 reader is working, but not significantly so since for big proofs like flyspeck the MM0 file is asymptotically much smaller than the proof.

However, it does report success or failure through the error code. If you compile it with -D BARE then that is all you get, but with the default options it will do its best to stackdump and give relevant surrounding information if it fails. The reason it is so quiet is that it will eventually need stdout for delivering MM0 output commands, so it can only chat on stderr. In short, no output and error code 0 means success.

On Wed, Sep 25, 2019, 5:32 AM Antony Bartlett <a...@akb.me.uk> wrote:
Sorry, I must have missed this the first time round.

So proofs that are notorious resource hogs like the Four Color Theorem and the Kepler Conjecture would "just" be very very large files?  That's exciting!

But I suppose at that scale streaming becomes important because ideally you'd want an (untrusted) program generating the proof and a verifier checking it, without ever having to store a proof as large as Flyspeck on disk?

I'm actually planning to try to convert flyspeck to MM0 as a stress test. The main worry is that the proofs are written in an inefficient way as a result of the multiple translations and so would be inordinately long. I still want to write it to disk because it is unlikely that the HOL Light -> MM0 conversion will run any faster than just running the proof in HOL Light, but the whole point is that this conversion is a one-time cost, and the resulting MM0 proof is comparatively quick to check. If I always run the conversion and the MM0 verifier in tandem then there isn't much gained in terms of speed, although perhaps you have more cause to believe the MM0 verification (or you will once I've proved functional correctness of the verifier).

On Wed, Sep 25, 2019 at 8:05 AM Giovanni Mascellani <g.masc...@gmail.com> wrote:
The proofs would already be very large files, wouldn't they (except that
nobody has coded them in Metamath yet, to my knowledge)?

The current flyspeck proof is stored as an ML program that produces the proof through a carefully managed sequence of trusted function calls, which if traced would give you the actual proof. You can view this as a very efficient kind of compression, since the program that generates the proof might be much smaller than the proof itself (like BB(n) smaller). The only bound you can give on HOL Light verification time is thus O(BB(n)) which is practically worthless. Still, proofs in practice don't take *that* long, and Mark Adams has sent me http://www.proof-technologies.com/flyspeck/replaying.html which is a proof trace of Flyspeck that is 159 MB after gzip, so I don't think MM0 will be more than an order of magnitude larger than this. The isabelle component is maybe a bigger problem, because it comes with its own huge library and everything I have seen suggests that it is wildly inefficient from the metamath point of view. It would be great if both parts could be ported, though, because in that case it might be possible to hook them together for the first time (so that MM0 would be the first theorem prover to have an unconditional proof of the Kepler Conjecture!).
 
I think the point made by Mario is that it would be nice to have a verifier running
in constant RAM space and taking a streaming input, which is something
that does not happen currently with Metamath (because the verifier has
to store all defined assertions and symbols to verify subsequent
theorems). I guess Mario's idea is that the readonly input already has
all the information about the defined assertions and symbols, so the
verifier just have to check them and then it can read them again. This
last bit is probably the part that makes in non streaming, and I wonder
if there is a way around.

The MM0 binary format is designed so that all theorem statements are together and separate from the proofs, so you can keep only the theorem statement part of the file in memory as random access and stream the proofs. As I've already mentioned above, MM0 is optimized for the case where the statements are much smaller than the proofs, which is generally the case in any theorem written by a human. It's not clear if you can prove that a random access portion of memory of about this size is needed, but I don't see any way to improve on it unless you start restricting which theorems can be used in which contexts or add the ability to delete and reclaim memory for a theorem (and in this case obviously the memory is not read-only anymore).

> But I suppose at that scale streaming becomes important because ideally
> you'd want an (untrusted) program generating the proof and a verifier
> checking it, without ever having to store a proof as large as Flyspeck
> on disk?

The wild dream here (or, at least, my wild dream), if a real streaming
verification can be done, is to implement it within the CPU cache at
computer boot time, so that it can be ran even before the BIOS
initializes the dynamic RAM (so you don't have to depend on the
untrusted BIOS code). Of course this thing has a lot of technical
difficulties, including finding a way to actually stream the database in
and the result out (maybe using a serial port or GPIO, which might be
reasonably easy to initialize even without running the BIOS). Take it as
a dream, nothing more.

I also mentioned this dream briefly in my ITP talk on MM0 and x86 (youtube link forthcoming). The 5ms check time and 307KB of proof data (fitting well within the 4-8MB of L3 cache on modern Intel processors) in peano.mm1 makes this prospect eminently feasible, even without any fancy streaming.

Incidentally, one of the results of the verification might be the
generation of code to start initializing the machine. This would
implement trusted execution of verified code from the firmware itself.
Even more of a dream.

Unfortunately, it's pretty hard to get yourself into the process before the OS load point unless you have an Intel private key thanks to Intel's idea of what trusted computing base means.

What is reasonably attainable in the short term, and the reason why I
was asking Mario for updates on his C verifier, is to do verification at
kernel time (i.e., after BIOS has run, but before an operating system is
loaded). I already have a good part of what is needed for this, but need
to understand the C verifier before.

The C verifier makes very few demands of the runtime system. The only IO that the verify() function can do is to exit(1), and to write() to stdout (and it doesn't actually use stdout yet). (If you don't do a -D BARE compile, it will also printf to stderr when something goes wrong.) All other IO is concentrated in the main() function. It needs to load the proof file, which requires fstat() and open(), and it maps it using mmap(). Currently it uses libc functions to do these things but they are all shims over linux syscalls with the same names, and that's what the verified x86 code will target (it's not going to link against libc).

If you don't have an OS, then those syscalls will need to be implemented or replaced with something equivalent. verify() only requires that you pass it a block of memory containing the proof file (as well as a block of memory containing the MM0 file, eventually). It doesn't matter how you get the data in there, but presumably you will have to read the data from disk somehow by interfacing with the HD driver, and of course it is soundness-critical that you read the correct MM0 file data.

Mario

Antony Bartlett

unread,
Sep 25, 2019, 3:37:24 PM9/25/19
to meta...@googlegroups.com
Dear Mario,

Hales talks about

> more than 23,000 inequalities. The verification of all nonlinear inequalities in HOLLight on the Microsoft Azure cloud took approximately 5000 processor-hours.
(https://arxiv.org/abs/1501.02155)

This must be in your


> proof trace of Flyspeck that is 159 MB after gzip

because you say it's the Isabelle component that you don't have.  You can see from 5000 processor-hours why I was expecting a minimum of terabytes.  So HOLLight must be just very high-level and therefore relatively slow to execute then I guess.  That's encouraging: no need for streaming as I'd assumed, naturally you want to write it to disc as you say.

Thank you so much for your response.


> (so that MM0 would be the first theorem prover to have an unconditional proof of the Kepler Conjecture!).

And good luck with making history!

    Best regards,

        Antony


--
You received this message because you are subscribed to the Google Groups "Metamath" group.
To unsubscribe from this group and stop receiving emails from it, send an email to metamath+u...@googlegroups.com.

Mario Carneiro

unread,
Sep 25, 2019, 4:07:36 PM9/25/19
to metamath
On Wed, Sep 25, 2019 at 3:37 PM Antony Bartlett <a...@akb.me.uk> wrote:
Dear Mario,

Hales talks about

> more than 23,000 inequalities. The verification of all nonlinear inequalities in HOLLight on the Microsoft Azure cloud took approximately 5000 processor-hours.
(https://arxiv.org/abs/1501.02155)

This must be in your

> proof trace of Flyspeck that is 159 MB after gzip

because you say it's the Isabelle component that you don't have.  You can see from 5000 processor-hours why I was expecting a minimum of terabytes.  So HOLLight must be just very high-level and therefore relatively slow to execute then I guess.  That's encouraging: no need for streaming as I'd assumed, naturally you want to write it to disc as you say.

I'm still piecing together the high level architecture of the proof. I think Mark's trace only includes the paper part of the proof. I talked to Alexey Solovyev at ITP about the numeric component (the nonlinear inequalities), which dominate the runtime (that's the 23000 inequalities mentioned in your quote), and I think I have a much better story specifically regarding natural number computations, so I want to see if I can interpose on the proof approach they take (which involves computing in a huge base and filling up memory with the multiplication table) and get something a bit more cache friendly. The fact that it's not in ML also makes a huge difference, as does the fact that all the decisions are being made up front - even just tracing and replaying the proof in HOL light speeds it up significantly. The isabelle component is the tame graph enumeration, which seems like it should be a paper proof with a small computational component, but I don't know how extractable that computational component is. The last component is the linear inequalities, but linear inequalities have short proofs that are hard to find so this is easy for an oracle proof. These might also be in Mark's trace, I'm not sure.

Mario

Giovanni Mascellani

unread,
Sep 26, 2019, 6:57:59 AM9/26/19
to meta...@googlegroups.com
Hi,

Il 25/09/19 19:26, Mario Carneiro ha scritto:
> In the current scheme, the verifier maintains the following data structures:

Thanks for the description!

> Unfortunately, it's pretty hard to get yourself into the process before
> the OS load point unless you have an Intel private key thanks to Intel's
> idea of what trusted computing base means.

I knew about Secure Boot (which can usually be customized or disabled).
This means that also the BIOS payload is signed by Intel? Although it
seems that projects like coreboot and libreboot manage to have something
running before instead of the BIOS. Maybe they only work on computers
that do not have this protection?

> If you don't have an OS, then those syscalls will need to be implemented
> or replaced with something equivalent. verify() only requires that you
> pass it a block of memory containing the proof file (as well as a block
> of memory containing the MM0 file, eventually). It doesn't matter how
> you get the data in there, but presumably you will have to read the data
> from disk somehow by interfacing with the HD driver, and of course it is
> soundness-critical that you read the correct MM0 file data.

I just made it to work inside my asmc system:

https://gitlab.com/giomasce/asmc/-/jobs/304668187

The CI uses of course a virtual machine, but I am pretty confident it
will run ok also on real hardware (I have already tested asmc on real
hardware, and eventually I will also try it with MM0 as a payload). This
is quite possibly the lowest level a theorem checking engine has ever
executed, although I don't have systematic data. Well, it will be when I
actually run it on real hardware.

The stack is still not very trustable, as it entails my C compiler
written in G (definitely not bug-free) and tinycc (also known to have
bugs around). But it doesn't depend neither on Linux nor on gcc.

The code for mm0-c is essentially untouched, except for [1], which is
due to the fact that asmc does not implement automatic resource release
(it does not have the concept of process and everything happens in the
same address space), and the CI detects as a failure when some resource
is not released.

[1]
https://github.com/giomasce/mm0/commit/1eb268389afe782634b48e37b36a5495d62b8f7f
signature.asc

Mario Carneiro

unread,
Sep 30, 2019, 4:42:13 PM9/30/19
to metamath
My talk at ITP about MM0 is now on YouTube:

https://youtu.be/7hAShC6K_vA
It is loading more messages.
0 new messages