Cool and a good idea. I wrote my own tree-sitter-shen grammar I can post here. It extends the vanilla Shen grammar with new features behind a few compilation flags. I'm now rewriting Scryer Shen in Common Lisp and I've introduced a functor construction in Shen to mirror ISO Prolog's functors.
Perhaps we can compare notes. I had to expand several rules significantly in my specification to get the more ambiguous parts of the Shen grammar to parse. I also attached the generated parser to a Common Lisp REPL using the cl-tree-sitter library and wrote a pretty printer for the parsed Shen grammar using the Common Lisp Pretty Printing System, for round trip debugging.
Google Groups won't let me post the grammar.js file directory so here are its contents, sorry for the wall of text:
/**
* @file tree-sitter parser for the Shen programming language.
* @author Mark Thom <
markjor...@gmail.com>
* @license MIT
*/
/// <reference types="tree-sitter-cli/dsl" />
// @ts-check
// Toggle this to enable/disable functor support
const functor_ext = true;
const functor = $ =>
seq(
'(',
field("functor", $.functor_symbol),
repeat(field("argument", $.item)),
')'
);
const functor_pattern = $ =>
seq(
'(',
field("functor", $.functor_symbol),
repeat(field("argument", $.pattern)),
')'
);
// Base item rule (no functor syntax)
const base_item = $ => choice(
$.base_pattern,
seq('[', field("list", repeat1($.item)), ']'),
prec(1, seq('[', field("head", repeat1($.item)), '|', field("tail", $.item), ']')),
$.abstraction,
$.application,
);
// Conditionally extended item rule
const extended_item = $ => choice(
functor($),
base_item($)
);
const keyword = word => token(prec(1, word));
module.exports = grammar({
name: "shen",
extras: $ => [/\s/],
rules: {
source_file: $ => repeat($.definition),
datatype_kw: $ => keyword('datatype'),
defmacro_kw: $ => keyword('defmacro'),
defprolog_kw: $ => keyword('defprolog'),
define_kw: $ => keyword('define'),
colon: $ => keyword(':'),
semicolon: $ => keyword(';'),
if_kw: $ => keyword('if'),
let_kw: $ => keyword('let'),
let_bang_kw: $ => keyword('let!'),
lambda_kw: $ => keyword(choice('/.', 'lambda')),
type_open_kw: $ => token(prec(2, '{')),
type_close_kw: $ => token(prec(2, '}')),
arrow: $ => token(prec(2, choice('->', '<-'))),
left_double_arrow: $ => token(prec(2, '-->')),
right_double_arrow: $ => token(prec(2, '<--')),
where_keyword: $ => token(prec(2, 'where')),
alpha: _ => /[a-zA-Z\.=\-*/+_?$!@~><&%\'#`;:{}]/,
digit: _ => /[0-9]/,
lowercase_alpha: _ => /[a-z=\-*/+_?$!@~><&%\'#`;:{}]/,
signs: _ => token(repeat1(choice('+','-'))),
integer: _ => token(/[0-9]+/),
float: _ => token(choice(
seq(/[0-9]+/, '.', /[0-9]+/),
seq('.', /[0-9]+/)
)),
number: _ => token(prec(2,
/[-+]?(?:\d*\.\d+|\d+)(?:[eE][-+]?\d+)?/
)),
underline: $ => token(prec(2, repeat1('_'))),
double_underline: $ => token(prec(2, repeat1('='))),
functor_symbol: $ => token(
prec(2, /@[a-z=\-*/+?$!@~><&%\'#`:;{}][a-zA-Z0-9\.=\-*/+_?$!@~><&%\'#`:{}]*/),
),
symbol_literal: $ => choice(
token(prec(1, /[a-z=\-*/+?$!@~><&%\'#`:;][a-zA-Z0-9\.=\-*/+_?$!@~><&%\'#`:]*/)),
keyword('{'),
keyword('}'),
),
variable_literal: $ => choice(
token(prec(1, /[A-Z][a-zA-Z0-9\.=\-*/+_?$!@~><&%\'#`:]*/)),
),
lowercase_literal: $ => choice(
token(prec(1, /[a-z][a-zA-Z0-9\.=\-*/+_?$!@~><&%\'#`:]*/)),
),
placeholder: $ => token(prec(2, '_')),
pattern: $ => choice(
$.placeholder,
$.base_pattern,
seq('[', repeat1(field("head", $.pattern)), optional(seq('|', field("tail", $.pattern))), ']'),
seq('(', 'cons', field("car", $.pattern), field("cdr", $.pattern), ')'),
functor_ext ? functor_pattern($) :
seq('(', choice('@p', '@s', '@v'),
field("first", $.pattern), repeat1(field("rest", $.pattern)), ')'),
),
boolean_literal: $ => token(prec(2, choice('true', 'false'))),
string_literal: $ => token(prec(1, /"([^"\\]|\\["\\/bfnrt])*"/)),
abstraction: $ => seq(
'(',
$.lambda_kw,
field("parameters", repeat1($.variable_literal)),
field("body", $.item),
')',
),
application: $ => seq(
'(',
field("items", repeat1($.item)),
')',
),
rule: $ => seq(
repeat(field("patterns", $.pattern)),
$.arrow,
field("result", $.item),
optional(seq($.where_keyword, field("where", $.item)))
),
item: $ => functor_ext ? extended_item($) : base_item($),
base_pattern: $ => choice(
field("boolean", $.boolean_literal),
field("symbol", $.symbol_literal),
field("variable", $.variable_literal),
field("string", $.string_literal),
field("number", $.number),
field("empty", seq('(',')')),
field("nil", seq('[',']')),
),
definition: $ => choice(
$.datatype_definition,
$.prolog_definition,
$.shen_def,
$.application,
),
shen_def: $ => seq(
'(',
field("keyword", $.define_kw),
field("name", $.lowercase_literal),
optional(field("type", seq(
$.type_open_kw,
field("type_expr", $.type),
$.type_close_kw
))),
repeat1(field("rule", $.rule)),
')',
),
datatype_definition: $ => seq(
'(',
field("keyword", $.datatype_kw),
field("name", $.lowercase_literal),
repeat1(field("rules", $.datatype_rule)),
')',
),
side_condition: $ => choice(
seq($.if_kw, field("condition", $.item)),
seq($.let_kw, field("binding", $.prolog_pattern), field("value", $.item)),
seq($.let_bang_kw, field("binding", $.prolog_pattern), field("value", $.item)),
),
scheme: $ => prec.left(1, seq(
field("context", $.formula),
optional(
seq(
field("context", repeat(seq(keyword(','), $.formula))),
keyword('>>'),
field("conclusion", $.formula),
),
),
)),
simple_scheme: $ => prec.left(2, seq(
field("formula", $.formula),
$.semicolon,
)),
formula: $ => choice(
prec(1, seq(field("term", $.item), $.colon, field("type", $.item))),
$.item
),
type: $ => choice(
prec(1, seq($.left_double_arrow, $.type)),
$.inner_type,
),
inner_type: $ => choice(
$.base_pattern,
$.application,
seq('[', field("head", $.pattern), '|', field("tail", $.pattern), ']'),
seq('[', repeat1(field("element", $.pattern)), ']'),
prec.right(2, seq($.type, $.left_double_arrow, $.type)), // A --> B
),
datatype_rule: $ => seq(
field("conditions", repeat($.side_condition)),
field("pre_premises", repeat($.simple_scheme)),
choice(
seq(
$.double_underline,
field("conclusion", $.formula),
$.semicolon
),
seq(
$.underline,
field("conclusion", $.scheme),
$.semicolon,
),
seq(
field("premises", repeat(seq($.scheme, $.semicolon))),
$.underline,
field("conclusion", $.scheme),
$.semicolon,
),
)
),
/* // this is a more natural datatype_rule grammar but it's too
// ambiguous for tree-sitter.
datatype_rule: $ => choice(
seq(
field("conditions", repeat($.side_condition)),
field("premises", repeat(seq($.scheme, $.semicolon))),
$.underline,
field("conclusion", $.scheme),
$.semicolon
),
seq(
field("conditions", repeat($.side_condition)),
field("premises", repeat1($.simple_scheme)),
$.double_underline,
field("conclusion", $.formula),
$.semicolon
)
),
*/
prolog_definition: $ => seq(
'(',
$.defprolog_kw,
field("name", $.lowercase_literal),
field("clauses", repeat1($.clause)),
')'
),
prolog_pattern: $ => choice(
$.placeholder,
$.base_pattern,
seq('[', field("head", repeat1($.prolog_pattern)), '|', field("tail", $.prolog_pattern), ']'),
field("list", seq('[', repeat1($.prolog_pattern), ']')),
seq('(', 'cons', field("car", $.prolog_pattern), field("cdr", $.prolog_pattern), ')'),
...(functor_ext ? [functor_pattern($)] : []),
),
clause: $ => prec.left(1, seq(
field("head", repeat($.prolog_pattern)), $.right_double_arrow, optional(field("tail", $.tail)),
$.semicolon,
)),
tail: $ => choice(
seq(field("cut", keyword('!')), optional(field("rest", $.tail))),
seq(field("goal", $.application), optional(field("rest", $.tail))),
),
}
});