I've re-written the parsing and execution functions,
attempting to apply the same ideas. There is now a private
header with the table and other "gobbledegook". And now
just functions in the .c file, with the big one up front.
Execution in APL proceeds right-to-left and this is
accomplished with a relatively straightforward algorithm.
We have 2 stacks (it could also be done with a queue and
a stack) called the left-stack and the right-stack.
The left stack starts at the left edge and expands
on the right.
|- 0 1 2 3 top
The right stack is the opposite, anchored at the right
and growing to the left.
top 3 2 1 0 -|
Of course this are just conceptual distinctions: they're
both just stacks. The left stack is initialized with a
mark object to indicate the left edge, and then
the entire expression. The right stack has a single
null object to indicate the right edge.
|-2*1+⍳4 -|
At each step, we A) move one object to the right stack,
|-2*1+⍳ 4-|
|-2*1+ ⍳4-|
|-2*1 +⍳4-|
If there are at least 4 objects on the right stack, then
classifies the top 4 elements with a set of predicate
functions and then it checks through the list of
grammatical patterns, the above case matching this
production,
/* p[0] p[1] p[2] p[3] func pre x y z post,2*/\
_(L1, EDGE+AVN, VRB, VRB, NOUN, monad, -1, 2,3,-1, 1, 0) \
application of a monadic verb. The numbers in the production indicate
which elements should be preserved, and which should be passed to the
handler function. The result from the handler function is interleaved
back onto the right stack.
|-2*1 +A-|
where A represents the array object returned from the iota function.
(Incidentally this is a lazy array, generating its values on-demand.)
Eventually the expression ought to reduce to 3 objects: a mark,
some result object, and a null. Anything else is an error
TODO handle this error.
I'll give all the header files again, to keep this message self-contained.
josh@LAPTOP-ILO10OOF ~/inca/olmec
$ !.
./olmec
50331691(3,2b) = 134217728(8,0)
50331693(3,2d) = 134217729(8,1)
50331690(3,2a) = 134217730(8,2)
50340724(3,2374) = 134217731(8,3)
50331683(3,23) = 134217732(8,4)
50340723(3,2373) = 134217733(8,5)
50331771(3,7b) = 134217734(8,6)
50331692(3,2c) = 134217735(8,7)
50331707(3,3b) = 134217736(8,8)
50331686(3,26) = 150994944(9,0)
50331712(3,40) = 150994945(9,1)
2*1+⍳4
0032 002a 0031 002b 2373 0034 000d 0000
1
5
2(0,2) 50331690(3,2a) 1(0,1) 67108864(4,0) 4(0,4)
0x0
->4(0,4)
->67108864(4,0)
lookup
1
==134217733(8,5)
->134217728(8,0)
->1(0,1)
match 1
monad
->50331690(3,2a)
lookup
==134217730(8,2)
match 3
dyad
->2(0,2)
->167772160(10,0)
match 3
dyad
83886080(5,0)
1
4
0: 2(0,2)
1: 4(0,4)
2: 6(0,6)
3: 8(0,8)
josh@LAPTOP-ILO10OOF ~/inca/olmec
$ vi ex_private.h
josh@LAPTOP-ILO10OOF ~/inca/olmec
$ head ex.c
#include <stdarg.h>
#include <stdint.h>
#include <stdio.h>
#include <stdlib.h>
#include "ar.h"
#include "en.h"
#include "st.h"
#include "wd.h"
#include "vb.h"
josh@LAPTOP-ILO10OOF ~/inca/olmec
$ cat ar.h
#ifndef AR_H_
#define AR_H_
#include "../ppnarg.h"
typedef struct ar {
int type;
int rank; // number of dimensions
int *dims; // size of each dimension
int cons; // constant term of the indexing formula
int *weight; // corresponding coefficient in the indexing formula
int *data; // address of first array element
int *(*func)(struct ar *,int); // data function (if function type)
} *array;
enum type {
normal,
indirect,
function
};
int productdims(int rank, int dims[]);
array array_new_dims(int rank, int dims[]);
array array_new_function(int rank, int dims[],
int *data, int datan, int *(*func)(array,int)); // type=function
int *constant(array a,int idx);
int *j_vector(array a,int idx);
void loaddimsv(int rank, int dims[], va_list ap);
array (array_new)(int rank, ...);
#define array_new(...) (array_new)(PP_NARG(__VA_ARGS__),__VA_ARGS__)
array cast_dims(int data[], int rank, int dims[]); // type=indirect
array (cast)(int data[], int rank, ...); // type=indirect
#define cast(data,...) (cast)(data,PP_NARG(__VA_ARGS__),__VA_ARGS__)
array clone(array a); // type=indirect
array copy(array a);
int *elema(array a, int ind[]);
int *elemv(array a, va_list ap);
int *elem(array a, ...);
int *vector_index(int ind, int dims[], int n, int vec[]);
int ravel_index(int vec[], int dims[], int n);
void transpose2(array a);
void transpose(array a, int shift);
void transposea(array a, int spec[]);
array slice(array a, int i); // type=indirect
array slicea(array a, int spec[]); // type=indirect
array slices(array a, int s[], int f[]); // type=indirect
array extend(array a, int extra); // type=indirect
array cat(array x, array y);
array iota(int n); // type=function
$ cat st.h
/* symbol table */
typedef struct st {
int key;
int val;
int n;
struct st **tab /*[n]*/ ;
} *symtab;
symtab makesymtab(int n);
/* mode=0: prefix match
mode=1: defining search */
symtab findsym(symtab st, int **spp, int *n, int mode);
void def(symtab st, int name, int v);
josh@LAPTOP-ILO10OOF ~/inca/olmec
$ cat wd.h
// scan up to n chars from s and produce 1D array of encoded expression
array scan_expression(int *s, int n);
josh@LAPTOP-ILO10OOF ~/inca/olmec
$ cat vb.h
#define VERBTAB(_) \
/*base monad dyad f g h mr lr rr*/ \
_('+', vid, vplus, 0, 0, 0, 0, 0, 0 ) \
_('-', vneg, vminus, 0, 0, 0, 0, 0, 0 ) \
_('*', vsignum, vtimes, 0, 0, 0, 0, 0, 0 ) \
_(0x2374/*rho*/, vshapeof, vreshape, 0, 0, 0, 0, 0, 0 ) \
_('#', vtally, 0, 0, 0, 0, 0, 0, 0 ) \
_(0x2373/*iota*/, viota, 0, 0, 0, 0, 0, 0, 0 ) \
_('{', vhead, vtake, 0, 0, 0, 0, 1, 0 ) \
_(',', vravel, vcat, 0, 0, 0, 0, 0, 0 ) \
_(';', vraze, vlink, 0, 0, 0, 0, 0, 0 ) \
/**/
typedef struct verb {
int id;
int (*monad)(int,struct verb*);
int (*dyad)(int,int,struct verb*);
int f,g,h; /* operator arguments */
int mr,lr,rr; /* monadic,left,right rank*/
} *verb;
void init_vb(symtab st);
josh@LAPTOP-ILO10OOF ~/inca/olmec
$ cat ex.h
// predicate table contains predicate functions
// and associated enum values
#define PREDTAB(_) \
_( ANY = 1, qa, 1 ) \
_( VAR = 2, qp, gettag(x)==PROG \
|| (gettag(x)==PCHAR && getval(x)!=0x2190 /*leftarrow*/ ) ) \
_( NOUN = 4, qn, gettag(x)==LITERAL \
|| gettag(x)==CHAR \
|| gettag(x)==ARRAY ) \
_( VRB = 8, qv, gettag(x)==VERB ) \
_( DEX = 16, qx, 0 ) /*dextri-monadic verb*/\
_( ADV = 32, qo, gettag(x)==ADVERB && ((verb)getptr(x))->monad ) \
_( LEV = 64, qe, 0 ) /*sinister adverb*/\
_( CONJ = 128, qj, gettag(x)==ADVERB && ((verb)getptr(x))->dyad ) \
_( MARK = 256, qm, gettag(x)==MARKOBJ ) \
_( ASSN = 512, qc, gettag(x)==PCHAR && getval(x) == 0x2190 ) \
_( LPAR = 1024, ql, gettag(x)==LPAROBJ ) \
_( RPAR = 2048, qr, gettag(x)==RPAROBJ ) \
_( NUL = 4096, qu, gettag(x)==NULLOBJ ) \
/**/
// declare predicate functions
#define PRED_DECL(X,Y,...) int Y(int);
PREDTAB(PRED_DECL)
// declare predicate enums and composed patterns
#define PRED_ENUM(X,...) X,
enum predicate { PREDTAB(PRED_ENUM)
EDGE = MARK+ASSN+LPAR,
AVN = VRB+NOUN+ADV };
// execute an expression e with environment st
int execute_expression(array e, symtab st);
josh@LAPTOP-ILO10OOF ~/inca/olmec
$ cat ex_private.h
/* stack type
the size is generously pre-calculated
and so we can skip all bounds checking.
stkp->top is the size (index of next empty slot for next push)
stkp->top-1 is the topmost element
*/
typedef struct stack { int top; object a[1];} stack; /* top==0::empty */
#define stackpush(stkp,el) ((stkp)->a[(stkp)->top++]=(el))
#define stackpop(stkp) ((stkp)->a[--((stkp)->top)])
#define stacktop(stkp) ((stkp)->a[(stkp)->top-1])
int parse_and_lookup_name(stack *lstk, stack *rstk, object x, symtab st);
/* predicate functions are instantiated according to the table
defined in the header.
the q[] function array is used by classify to apply all
predicate functions yielding a sum of all applicable codes
defined in the table. Specific qualities or combinations
may then be determined easily by masking.
*/
#define PRED_FUNC(X,Y,...) int Y(object x){ return __VA_ARGS__; }
PREDTAB(PRED_FUNC)
#define PRED_ENT(X,Y,...) Y,
static int (*q[])(object) = { PREDTAB(PRED_ENT) };
/* encode predicate applications into a binary number
which can be compared to a pattern with a mask */
static inline int classify(object x){
int i,v,r;
for (i=0, v=1, r=0; i<sizeof q/sizeof*q; i++, v*=2)
if (q[i](x))
r |= v;
return r;
}
// the Parse Table defines the grammar of the language
// At each stack move, the top four elements of the right stack
// are checked against each of these patterns. A matching pattern
// returns element t[pre] from the temp area to the right stack
// then calls func(t[x],t[y],t[z]) and pushes the result to the
// right stack, then pushes t[post] and t[post2].
#define PARSETAB(_) \
/* p[0] p[1] p[2] p[3] func pre x y z post,2*/\
_(L0, EDGE, VRB, NOUN, ANY, monad, 3, 1,2,-1, 0,-1) \
_(L1, EDGE+AVN, VRB, VRB, NOUN, monad, -1, 2,3,-1, 1, 0) \
_(L2, ANY, NOUN, DEX, ANY, monad, 3, 2,1,-1, 0,-1) \
_(L3, EDGE+AVN, NOUN, VRB, NOUN, dyad, -1, 1,2,3, 0,-1) \
_(L4, EDGE+AVN, NOUN+VRB, ADV, ANY, adv, 3, 1,2,-1, 0,-1) \
_(L5, ANY, LEV, NOUN+VRB, ANY, adv, 3, 2,1,-1, 0,-1) \
_(L6, EDGE+AVN, NOUN+VRB, CONJ, NOUN+VRB, conj_, -1, 1,2,3, 0,-1) \
_(L7, VAR, ASSN, AVN, ANY, spec, 3, 0,2,-1, -1,-1) \
_(L8, LPAR, ANY, RPAR, ANY, punc, 3, 1,-1,-1, -1,-1) \
_(L9, MARK, ANY, RPAR, ANY, punc, 3, 1,-1,-1, 0,-1) \
_(L10,ANY, LPAR, ANY, NUL, punc, 3, 2,-1,-1, 0,-1) \
/**/
// generate labels to coordinate table and execution
#define PARSETAB_INDEX(label, ...) label,
enum { PARSETAB(PARSETAB_INDEX) };
// create parsetab array of structs containing the patterns
#define PARSETAB_PAT(label, pat1, pat2, pat3, pat4, ...) \
{pat1, pat2, pat3, pat4},
typedef struct parsetab { int c[4]; } parsetab;
static parsetab ptab[] = { PARSETAB(PARSETAB_PAT) };
int check_pattern(int *c, parsetab *ptab, int i);
// perform the grammar production, transforming the stack
#define PARSETAB_ACTION(label,p1,p2,p3,p4, func, pre,x,y,z,post,post2) \
case label: { \
if (pre>=0) stackpush(rstk,t[pre]); \
stackpush(rstk,func(x>=0?t[x]:0,y>=0?t[y]:0,z>=0?t[z]:0,st)); \
if (post>=0) stackpush(rstk,t[post]); \
if (post2>=0) stackpush(rstk,t[post2]); \
} break;
size_t sum_symbol_lengths(array e, int n);
int monad(int f, int y, int dummy, symtab st);
int dyad(int x, int f, int y, symtab st);
int adv(int f, int g, int dummy, symtab st);
int conj_(int f, int g, int h, symtab st);
int spec(int name, int v, int dummy, symtab st);
int punc(int x, int dummy, int dummy2, symtab st);
josh@LAPTOP-ILO10OOF ~/inca/olmec
$ cat ex.c
#include <stdarg.h>
#include <stdint.h>
#include <stdio.h>
#include <stdlib.h>
#include "ar.h"
#include "en.h"
#include "st.h"
#include "wd.h"
#include "vb.h"
#include "ex.h"
typedef int object;
#include "ex_private.h"
int check_pattern(int *c, parsetab *ptab, int i){
return c[0] & ptab[i].c[0]
&& c[1] & ptab[i].c[1]
&& c[2] & ptab[i].c[2]
&& c[3] & ptab[i].c[3];
}
// execute expression e using environment st and yield result
int execute_expression(array e, symtab st){
int n = e->dims[0];
int i,j;
object x;
stack *lstk,*rstk;
int docheck;
j=sum_symbol_lengths(e,n);
lstk=malloc(sizeof*lstk + (n+j+1) * sizeof*lstk->a);
lstk->top=0;
stackpush(lstk,mark);
for (i=0; i<n; i++)
stackpush(lstk,e->data[i]); // push entire expression to left stack
rstk=malloc(sizeof*rstk + (n+j+1) * sizeof*rstk->a);
rstk->top=0;
stackpush(rstk,null);
while(lstk->top){ //left stack not empty
x=stackpop(lstk);
printf("->%d(%d,%x)\n", x, gettag(x), getval(x));
if (qp(x)){ // x is a pronoun?
if (parse_and_lookup_name(lstk, rstk, x, st) == null)
return null;
} else {
stackpush(rstk,x);
}
docheck = 1;
while (docheck){ //check rstk with patterns and reduce
docheck = 0;
if (rstk->top>=4){ //enough elements to check?
int c[4];
for (j=0; j<4; j++)
c[j] = classify(rstk->a[rstk->top-1-j]);
//printf("%d %d %d %d\n", c[0], c[1], c[2], c[3]);
for (i=0; i<sizeof ptab/sizeof*ptab; i++){
if (check_pattern(c, ptab, i)) {
object t[4];
printf("match %d\n", i);
t[0] = stackpop(rstk);
t[1] = stackpop(rstk);
t[2] = stackpop(rstk);
t[3] = stackpop(rstk);
switch(i){
PARSETAB(PARSETAB_ACTION)
}
docheck = 1; //stack changed: check again
break;
}
}
}
}
}
//assemble results and return
//TODO check/handle extra elements on stack
//(interpolate?, enclose and cat?)
stackpop(rstk); // mark
x = stackpop(rstk);
free(lstk);
free(rstk);
return x;
}
size_t sum_symbol_lengths(array e, int n){
int i,j;
for (i=j=0; i<n; i++) { // sum symbol lengths
if (gettag(e->data[i])==PROG) {
//printf("%p\n", getptr(e->data[i]));
j+=((array)getptr(e->data[i]))->dims[0];
}
}
return j;
}
/* Parser Actions,
each function is called with x y z parameters defined in PARSETAB
*/
int monad(int f, int y, int dummy, symtab st){
printf("monad\n");
verb v = getptr(f);
return v->monad(y,v);
}
int dyad(int x, int f, int y, symtab st){
printf("dyad\n");
verb v = getptr(f);
return v->dyad(x,y,v);
}
int adv(int f, int g, int dummy, symtab st){
printf("adverb\n");
verb v = getptr(g);
return v->monad(f,v);
}
int conj_(int f, int g, int h, symtab st){
printf("conj\n");
verb v = getptr(g);
return v->dyad(f,h,v);
}
//specification
int spec(int name, int v, int dummy, symtab st){
def(st, name, v);
return v;
}
int punc(int x, int dummy, int dummy2, symtab st){
return x;
}
// lookup name in environment unless to the left of assignment
// if the full name is not found, but a defined prefix is found,
// push the prefix back to the left stack and continue lookup
// with remainder. push value to right stack.
int parse_and_lookup_name(stack *lstk, stack *rstk, object x, symtab st){
if (rstk->top && qc(stacktop(rstk))){ //assignment: no lookup
stackpush(rstk,x);
} else {
printf("lookup\n");
int *s;
int n;
switch(gettag(x)){
case PCHAR: { // single char
s = &x;
n = 1;
} break;
case PROG: { // longer name
array a = getptr(x);
s = a->data;
n = a->dims[0];
} break;
}
int *p = s;
symtab tab = findsym(st,&p,&n,0);
if (tab->val == null) {
printf("error undefined prefix\n");
return null;
}
while (n){ //while name
printf("%d\n", n);
stackpush(lstk,tab->val); //pushback value
s = p;
tab = findsym(st,&p,&n,0); //lookup remaining name
if (tab->val == null) {
printf("error undefined internal\n");
return null;
}
}
//replace name with defined value