Google Groups no longer supports new Usenet posts or subscriptions. Historical content remains viewable.
Dismiss

any tricks to golf this code further?

14 views
Skip to first unread message

luser- -droog

unread,
Aug 3, 2011, 2:30:27 AM8/3/11
to
I've just wasted a week trying to solve this problem on code-golf:
http://codegolf.stackexchange.com/questions/284/write-an-interpreter-for-the-untyped-lambda-calculus

I think I've squeezed it dry, but I've got to know if anybody has any
*really* sneaky tricks to make it shorter.

Here is the last version before reducing function names and replacing
repeated sequences with macros. It copies the argument string into
scratch memory, and then manipulates (mutilates?) the expression using
cursors and a stripped down suite of McCarthy primitives.

The winning solution was 320 chars of Python. But C lacks a library
call for string replacement. Even with my cleverest hackery, I can't
get the block below 778 chars. Any comments
or howls of horror are appreciated.

#include<stdio.h>
#include<string.h>
char*m,*n;atom(x){return x?(islower(m[x])?m[x]:0):0;}
eq(x,y){return x&&y&&atom(x)==atom(y);}
cell(x){char*t=n,d=0;if(!x||!m[x])return 0;if(m[x]==' ')++x;
if(atom(x)){*n++=m[x];*n++=0;return(n-m)-2;}
if(m[x]=='\\'){memcpy(n,m+x,4);n+=4;*n++=0;return(n-m)-5;}
do{d=m[x]?(m[x]=='('?d+1:(m[x]==')'?d-1:d)):0;*n++=m[x++];}
while(d);*n++=0;return t-m;}
car(x){return x?cell(x+1):0;}
cdr(x){return car(x)?cell(x+strlen(m+car(x))+1):0;}
cons(x,y){char*t=n;return x?(sprintf(n,y?"(%s %s)":"(%s)",m+x,m
+y),n+=strlen(n)+1,t-m):0;}
subst(x,y,z){if(!x||!y||!z)return 0;return atom(z)?(eq(z,y)?x:z):
cons(m[z+1]=='\\'?car(z):subst(x,y,car(z)),cdr(z)?
subst(x,y,cdr(z)):0);}
eval(x){int a;return atom(x)?x:atom(a=car(x))?x:m[a]=='\\'?
cons(a,eval(cdr(x))):
m[car(a)]=='\\'?eval(subst(eval(cdr(x)),cell(a+3),cdr(a))):
eval( cons(eval(a),cdr(x)?eval(cdr(x)):0));}
try(char*s){char*t=strcpy(n,s);n+=strlen(n)+1;int x=t-m;
printf("input: %s\n", s);printf("eval => %s\n", m+eval(x));n=m+1;}

char*samp[]={
"a","a","b","b",
"((\\ a. a) (b))", "(b)",
"((\\ x. x) (\\ y. (\\ z. z)))", "(\\ y. (\\ z. z))",
"(\\ x. ((\\ y. y) x))", "(\\ x. x)",
"(((\\ x. (\\ y. x)) (\\ a. a)) (\\ b. b))", "(\\ a. a)",
"((\\ x. (\\ y. y)) (\\ a. a))", "(\\ y. y)",
"(((\\ x. (\\ y. y)) (\\ a. a)) (\\ b. b))", "(\\ b. b)",
"((\\x. (x x)) (\\x. (x x)))", "undef",
NULL};
#include<unistd.h>

unsigned sz;
#include<signal.h>
void fix(x){signal(SIGSEGV,fix);brk(m+(sz*=2));}
main(){
char**t;
signal(SIGSEGV,fix);
m=n=sbrk(sz=10*getpagesize());
*n++=0;
for(t=samp;*t;t+=2){
try(*t);
printf("s.b. => %s\n\n", t[1]);
}
return 0;
}

luser- -droog

unread,
Aug 3, 2011, 9:16:54 PM8/3/11
to
This should be usefull to anyone attempting to read the previous post:
a lambda calculus interpreter using cursors. This is the pointer
version I used as reference in writing the cursor version. It shows
some experiments in replacing statements with expressions (and
sometimes the other way, to debug it), and replacing functions with
macros (although implicit int allows functions to be still shorter).

Apologies for bad line breaks, but this is for reading, not running.

#include<stdio.h>
#include<string.h>
#include<unistd.h>
char*new,*br,*v="abcdefghijklmnopqrstuvwxyz";
char* atom(char*x) {return ( (x)? (strchr(v,*x)? (strchr(v,*x)): 0):
0);
}
char* eq(char*x,char*y) {return ( ((x)&&(y)&&(atom(x)==atom(y)))?
atom(x): 0);
}

char*cell(char*x,int d){
char*t=new;
if(!x||!*x/*||*x==')'*/)return 0;
if(*x==' ')++x;
if(!d&&atom(x)){ *new++=*x; *new++='\0'; return new-2; }
if(!d&&*x=='\\'){ memcpy(new,x,4); new[4]='\0'; new+=5; return
t; }
do{d=*x?(*x=='('?d+1:(*x==')'?d-1:d)):0;*new++=*x++;}while(d);
//do{ switch(*x){ case 0:d=0;break; case'(':d++;break;
case')':d--;break; } *new++=*x++; }while(d>0);
*new++='\0';
return t;
}

//char* car(char*x) {return (x? cell((x)+1,0): 0); }
#define car(X) (X?cell(X+1,0):0)
//char* cdr(char*x) {return (car(x)? cell((x)+(strlen(car(x))+1),0):
0); }
#define cdr(X) (car(X)?cell((X)+(strlen(car(X))+1),0):0)

char*cons(char*x,char*y){
char*t=new;
if(!x)return 0;
sprintf(new,y?"(%s %s)":"(%s)",x,y);
//if(y) sprintf(new,"(%s %s)",x,y); else sprintf(new,"(%s)",x);
new+=strlen(new)+1;
return t;
}

char*subst(char*x,char*y,char*z){
if(!x||!y||!z)return 0;
//printf("subst %s %s %s\n", x, y, z);
if(atom(z))
if(eq(z,y)) return x;
else return z;
else
if(cdr(z))
if(z[1]=='\\') return cons(car(z), subst(x,y,cdr(z)));
else return cons(subst(x,y,car(z)), subst(x,y,cdr(z)));
else
return cons(subst(x,y,car(z)), 0);
//return (atom(z)) ? (eq(z,y)?x:z) :
cons(subst(x,y,car(z)),cdr(z)? subst(x,y,cdr(z)): 0);
}

char*eval(char*e){
char*a,*r;
if(atom(e)) {r=e;goto ret;}
if(atom(a=car(e))) {r=e;goto ret;}
if(*a=='\\'){
r=cons(a,eval(cdr(e)));
//r=cons(car(a),eval(cdr(a)));
goto ret; }
if(*car(a)=='\\'){
char*p=new; //snag param
*new++=car(a)[2];
*new++='\0';
r=eval(subst(eval(cdr(e)),p,cdr(a)));
goto ret;
}
r=eval(cons(eval(a), cdr(e)?eval(cdr(e)):0));
ret:
//printf(":eval %s= %s\n", e, r?r:"null");
return r;
}

void try(char*s){
printf("try %s:\n", s);
#if 0
printf("atom %s= %s\n", s, atom(s));
printf("cell0 %s= %s\n", s, cell(s,0));
printf("cell1 %s= %s\n", s, cell(s,1));
++s;
printf("cell0 %s= %s\n", s, cell(s,0));
printf("cell1 %s= %s\n", s, cell(s,1));
--s;
printf("car %s= %s\n", s, car(s));
printf("cdr %s= %s\n", s, cdr(s));
printf("cons car cdr= %s\n", cons(car(s),cdr(s)));
printf("subst x a %s= %s\n", s, subst("x","a",s));
printf("subst x b %s= %s\n", s, subst("x","b",s));
#endif
printf("eval => %s\n", eval(s));
new=br;
}

char*in[]={
//"a","a","b","b",


"((\\ a. a) (b))", "(b)",
"((\\ x. x) (\\ y. (\\ z. z)))", "(\\ y. (\\ z. z))",
"(\\ x. ((\\ y. y) x))", "(\\ x. x)",
"(((\\ x. (\\ y. x)) (\\ a. a)) (\\ b. b))", "(\\ a. a)",
"((\\ x. (\\ y. y)) (\\ a. a))", "(\\ y. y)",
"(((\\ x. (\\ y. y)) (\\ a. a)) (\\ b. b))", "(\\ b. b)",
"((\\x. (x x)) (\\x. (x x)))", "undef",
NULL};

int main(){
new=br=sbrk(getpagesize()); //one page scratch space
char**t;
for(t=in;*t;t+=2){
try(*t);
printf("s.b. => %s\n",t[1]);
puts("");
}
#if 0
try("a");
try("(a)");
try("(a b)");
try("(a (b))");
#endif
return 0;
}

io_x

unread,
Aug 5, 2011, 2:06:11 AM8/5/11
to

"luser- -droog" <mij...@yahoo.com> ha scritto nel messaggio
news:27f55e9c-638f-4083-9069-

> #include<stdio.h>
> #include<string.h>
> char*m,*n;atom(x){return x?(islower(m[x])?m[x]:0):0;}
> eq(x,y){return x&&y&&atom(x)==atom(y);}
> cell(x){char*t=n,d=0;if(!x||!m[x])return 0;if(m[x]==' ')++x;
> if(atom(x)){*n++=m[x];*n++=0;return(n-m)-2;}
> if(m[x]=='\\'){memcpy(n,m+x,4);n+=4;*n++=0;return(n-m)-5;}
> do{d=m[x]?(m[x]=='('?d+1:(m[x]==')'?d-1:d)):0;*n++=m[x++];}
> while(d);*n++=0;return t-m;}
> car(x){return x?cell(x+1):0;}

if competition is for write less chars...
they have to count the letter different from ' ' and \n
so one can indent something

BartC

unread,
Aug 5, 2011, 7:13:26 AM8/5/11
to
luser- -droog" <mij...@yahoo.com> wrote in message
news:27f55e9c-638f-4083...@k8g2000yqk.googlegroups.com...

> The winning solution was 320 chars of Python. But C lacks a library
> call for string replacement. Even with my cleverest hackery, I can't
> get the block below 778 chars. Any comments
> or howls of horror are appreciated.

778? The code you posted is nearer to 2000 characters.

Since io_x has posted, I'm surprised he hasn't suggested his usual
techniques to reduce code size, such as:

#define R return

and replacing 'return' with 'R' everywhere.

The reduction will be slight however.

--
Bartc

luser- -droog

unread,
Aug 10, 2011, 7:27:03 PM8/10/11
to
On Aug 5, 6:13 am, "BartC" <b...@freeuk.com> wrote:
> luser- -droog" <mijo...@yahoo.com> wrote in message

The 778 is *after* reducing all identifiers to single characters and
using that R macro as well. But these are basically mechanical
transformations and reduce the semantic value of the identifiers. By
following the link, the 778 (reduced since) version can be seen.

What I was interested in here was reducing the size of the "logic";
as this has much more potential for radical gains.

luser- -droog

unread,
Aug 10, 2011, 7:35:21 PM8/10/11
to
On Aug 5, 1:06 am, "io_x" <a...@b.c.invalid> wrote:
> "luser- -droog" <mijo...@yahoo.com> ha scritto nel messaggio

The rules for counting are defined for each question. For this one the
count was source-set characters. If one chose to use utf8 to use the
real lambda symbol, it would count as one character despite being a
multi-byte sequence. But space and newline are bona-fide characters in
any charset. So they count. Part of the challenge is to remove "all"
unnecessary characters.

My latest version is down to 644 (omitting main and the test cases).

#include<stdio.h>
#include<string.h>
#define X m[x]
#define R return
char*n,*m;int u,w,d;C(x,y){w=n-m;n+=sprintf(n,y?"(%s %s)":"(%s)",&X,m
+y)+1;R w;}T(x){R X>>5==3;}
L(x){R X==92;}O(x,j){w=n-m;memcpy(n,&X,j);n+=j;*n++=0;R w;}E(x){X==' '?
++x:0;R
X==41?0:L(x)?O(x,4):P(x);}P(x){d=0,w=x;do{X==40?d++:X==41?d--:0;++x;}
while(d>0);R
O(w,x-w);}D(x){u=E(x+1);R u?E(x+1+strlen(m+u)):0;}V(x){int a=E(x
+1),b=D(x);R
T(x)|T(a)?x:L(a)?C(a,V(b)):L(E(a+1))?V(S(V(b),E(a+3),D(a))):V(C(V(a),b?
V(b):0));}S(w,y,x){R
T(x)?(X==m[y]?w:x):C(L(w+1)?E(x+1):S(w,y,E(x+1)),D(x)?S(w,y,D(x)):0);}
Y(char*s){n+=strlen(s=strcpy(n,s))+1;printf("%s\n%s\n\n",s,m+V(s-
m));n=m+1;}

char*s[]={


"((\\ a. a) (b))",

"((\\ x. x) (\\ y. (\\ z. z)))",

"(\\ x. ((\\ y. y) x))",

"(((\\ x. (\\ y. x)) (\\ a. a)) (\\ b. b))",

"((\\ x. (\\ y. y)) (\\ a. a))",

"(((\\ x. (\\ y. y)) (\\ a. a)) (\\ b. b))",

"((\\x. (x x)) (\\x. (x x)))",0};
#include<unistd.h>
main(){char**k;n=m=sbrk(4096);*n++=0;for(k=s;*k;k++)Y(*k);R 0;}

io_x

unread,
Aug 11, 2011, 2:34:23 AM8/11/11
to

"luser- -droog" <mij...@yahoo.com> ha scritto nel messaggio
news:7ea8f68e-6ead-4427...@s7g2000yqk.googlegroups.com...

On Aug 5, 1:06 am, "io_x" <a...@b.c.invalid> wrote:
> "luser- -droog" <mijo...@yahoo.com> ha scritto nel messaggio
> news:27f55e9c-638f-4083-9069-
>
> > #include<stdio.h>
> > #include<string.h>
> > char*m,*n;atom(x){return x?(islower(m[x])?m[x]:0):0;}
> > eq(x,y){return x&&y&&atom(x)==atom(y);}
> > cell(x){char*t=n,d=0;if(!x||!m[x])return 0;if(m[x]==' ')++x;
> > if(atom(x)){*n++=m[x];*n++=0;return(n-m)-2;}
> > if(m[x]=='\\'){memcpy(n,m+x,4);n+=4;*n++=0;return(n-m)-5;}
> > do{d=m[x]?(m[x]=='('?d+1:(m[x]==')'?d-1:d)):0;*n++=m[x++];}
> > while(d);*n++=0;return t-m;}
> > car(x){return x?cell(x+1):0;}
>
> if competition is for write less chars...
> they have to count the letter different from ' ' and \n
> so one can indent something

The rules for counting are defined for each question. For this one the
count was source-set characters. If one chose to use utf8 to use the
real lambda symbol, it would count as one character despite being a
multi-byte sequence. But space and newline are bona-fide characters in
any charset. So they count. Part of the challenge is to remove "all"
unnecessary characters.

#they are wrong on blank spaces and '\n'
#because there is not one reason to count blank spaces and '\n'
#infact if there is a list of code of name-size
# a1.c 1000, a2.c 2000, a3.c 3000 etc
#that not count the spaces
#it is enought to delete all them (spaces and '\n') for obtain
#the same list conserver the order of the list

luser- -droog

unread,
Aug 11, 2011, 7:33:32 PM8/11/11
to
On Aug 11, 1:34 am, "io_x" <a...@b.c.invalid> wrote:
> "luser- -droog" <mijo...@yahoo.com> ha scritto nel messaggionews:7ea8f68e-6ead-4427...@s7g2000yqk.googlegroups.com...

> On Aug 5, 1:06 am, "io_x" <a...@b.c.invalid> wrote:
>
>
>
> > "luser- -droog" <mijo...@yahoo.com> ha scritto nel messaggio
> > news:27f55e9c-638f-4083-9069-
>
> > > #include<stdio.h>
> > > #include<string.h>
> > > char*m,*n;atom(x){return x?(islower(m[x])?m[x]:0):0;}
> > > eq(x,y){return x&&y&&atom(x)==atom(y);}
> > > cell(x){char*t=n,d=0;if(!x||!m[x])return 0;if(m[x]==' ')++x;
> > > if(atom(x)){*n++=m[x];*n++=0;return(n-m)-2;}
> > > if(m[x]=='\\'){memcpy(n,m+x,4);n+=4;*n++=0;return(n-m)-5;}
> > > do{d=m[x]?(m[x]=='('?d+1:(m[x]==')'?d-1:d)):0;*n++=m[x++];}
> > > while(d);*n++=0;return t-m;}
> > > car(x){return x?cell(x+1):0;}
>
> > if competition is for write less chars...
> > they have to count the letter different from ' ' and \n
> > so one can indent something
>
> The rules for counting are defined for each question. For this one the
> count was source-set characters. If one chose to use utf8 to use the
> real lambda symbol, it would count as one character despite being a
> multi-byte sequence. But space and newline are bona-fide characters in
> any charset. So they count. Part of the challenge is to remove "all"
> unnecessary characters.
>
> #they are wrong on blank spaces and '\n'
> #because there is not one reason to count blank spaces and '\n'

It's up to the whim of the questoner!

> #infact if there is a list of code of name-size
> # a1.c 1000,  a2.c  2000,  a3.c  3000 etc
> #that not count the spaces
> #it is enought to delete all them (spaces and '\n') for obtain
> #the same list conserver the order of the list
>
> My latest version is down to 644 (omitting main and the test cases).

Take a closer look at this golfed code (now down to 642!). The only
whitespace
in there is *necessary* to separate the tokens. I suspect we're
arguing *the
same thing* back and forth at each other. I think we actually agree in
substance
and are disputing vacuous details.

#include<stdio.h>
#include<string.h>
#define X m[x]
#define R return

char*n,*m;Y(char*s){n+=strlen(s=strcpy(n,s))+1;printf("%s\n%s\n\n",s,m
+V(s-m));n=m+1;}int
u,w,d;C(x,y){w=n-m;n+=sprintf(n,y?"(%s %s)":"(%s)",&X,m+y)+1;R w;}T(x)


{R
X>>5==3;}L(x){R X==92;}O(x,j){w=n-m;memcpy(n,&X,j);n+=j;*n++=0;R

w;}E(x){X==' '?++x:0;R

Andrey Tarasevich

unread,
Sep 2, 2011, 2:02:39 PM9/2/11
to
On 8/2/2011 11:30 PM, luser- -droog wrote:
> ...

Instead of `return(n-m)-2` you can simply do `return n-m-2` even if `n`
and `m` are pointers.

luser- -droog

unread,
Sep 8, 2011, 2:24:14 AM9/8/11
to
On Sep 2, 1:02 pm, Andrey Tarasevich <andreytarasev...@hotmail.com>
wrote:

Yeah. I've got a bit of a blind spot about associativity.
I was ultimately able to factor all of those copying clauses to use a
helper function:

O(x,j){w=n-m;memcpy(n,&X,j);n+=j;*n++=0;R w;}

so return(n-m)-2 and return(n-m)-5 no longer occur, because the value
is stashed in w before n is modified.

Thanks, though. That would certainly squeeze 1 (or 2) vital
characters.

I've been thinking it should be possible to rewrite the whole thing as
a while-switch statement, implementing recursion with an explicit
stack and
continue. Every function would be an expression. That could factor out
all
the 'return's entirely.

Moshbear dot Net

unread,
Sep 11, 2011, 1:50:21 AM9/11/11
to
On Aug 3, 2:30 am, luser- -droog <mijo...@yahoo.com> wrote:
> I've just wasted a week trying to solve this problem on code-golf:http://codegolf.stackexchange.com/questions/284/write-an-interpreter-...

>
> I think I've squeezed it dry, but I've got to know if anybody has any
> *really* sneaky tricks to make it shorter.
>
> Here is the last version before reducing function names and replacing
> repeated sequences with macros. It copies the argument string into
> scratch memory, and then manipulates (mutilates?) the expression using
> cursors and a stripped down suite of McCarthy primitives.
>
> The winning solution was 320 chars of Python. But C lacks a library
> call for string replacement. Even with my cleverest hackery, I can't
> get the block below 778 chars. Any comments
> or howls of horror are appreciated.
>

Read the ISO spec three times, front to back. You will find ways to
further golf your code. It helped me a lot. Then again, I sold K&R
because I had no use for it anymore.

0 new messages