Google Groups no longer supports new Usenet posts or subscriptions. Historical content remains viewable.

Dismiss

code review: int-code module

83 views

Skip to first unread message

luser droog

unread,

Feb 11, 2016, 1:12:40 AM2/11/16

I'm working on a new version of my APL interpreter.
And I really want to share this code and get some feedback.

I've devised an int-encoding for my new interpreter which
packs all data into integer handles. The benefit for array
operations is all data items will have a uniform size no
matter what the content actually is. This replaces the
intptr_t hackery (ab)used in earlier versions.

So this version explicitly uses 32bit data but does not
confuse/conflate int and ptr types.

Other modules and some bloggish description are at:
https://github.com/luser-dr00g/inca/tree/master/olmec

I think this "int-code" as distinct from "byte-code" is
really nifty. It originally included headers for the various
types to define typed cache tables. But then the code worked
out more simply to use `void **`. Consequently this module
currently does not depend upon any other code. But of course
the other code may be browsed at the above url.

BTW, this project is not being written in "codegolf-style".
Brevity is a goal as an indicator of simplicity, but ultra-
terseness is too detrimental to longer-term readability.

Thoughts?

josh@LAPTOP-ILO10OOF ~/inca/olmec
$ cat en.h
typedef struct datum {
unsigned int val:24;
unsigned int tag:8;
} datum;

typedef union integer {
datum data;
int32_t int32;
} integer;

enum tag {
LITERAL, /* val is a 24-bit 2's comp integer */
CHAR, /* val is a 21-bit Unicode code point padded with zeros */
NUMBER, /* val is an index in the number table */
PROG, /* val is an executable code fragment */
ARRAY, /* val is a(n index to a) boxed array */
SYMTAB, /* val is a(n index to a) symbol table */
NULLOBJ, /* val is irrelevant (s.b. 0) */
VERB,
ADV,
MARK, /* val is irrelevant (s.b. 0) */
};

extern int null;
extern int mark;

void init_en();

int gettag(int d);
int getval(int d);
int newdata(int tag, int val);

int cache(int tag, void *ptr);
void *getptr(int d);

josh@LAPTOP-ILO10OOF ~/inca/olmec
$ cat en.c
/* the array data are always just straight 32bit integers.
but we treat as a 7bit tag and 24bit integer value.

this file defines the sub-typing of data items.
*/

#include <stdarg.h>
#include <stdint.h>
#include <stdlib.h>

#include "en.h"

int gettag(int d){
if (d<0) return 0; /* negatives are literals */
integer int32;
int32.int32 = d;
datum dat = int32.data;
return dat.tag;
}

int getval(int d){
if (d<0) return d;
integer int32;
int32.int32 = d;
datum dat = int32.data;
return dat.val;
}

int newdata(int tag, int val){
datum dat = { .tag = tag, .val = val };
integer int32 = { .data = dat };
return int32.int32;
}

integer nulldata = { .data = { .tag = NULLOBJ, .val = 0 } };
int null /* = nulldata.int32 */;
integer markdata = { .data = { .tag = MARK, .val = 0 } };
int mark /* = markdata.int32 */;

void init_en(){
null = nulldata.int32;
mark = markdata.int32;
}

size_t numused, nummax;
void **numtab;

size_t progused, progmax;
void **progtab;

size_t arrused, arrmax;
void **arrtab;

size_t symused, symmax;
void **symtabtab;

int addnewtocache(size_t *used, size_t *max, void ***data, void *ptr){
if (*used == *max){
*max = *max * 7 + 11;
void *tmp = realloc(*data, *max * sizeof(void*));
if (!tmp) return null;
*data = tmp;
}
int z = (*used)++;
(*data)[z] = ptr;
return z;
}

int cache(int tag, void *ptr){
switch(tag){
case LITERAL: return null;
case CHAR: return null;
case NUMBER:
return newdata(tag, addnewtocache(&numused, &nummax, &numtab, ptr));
case PROG:
return newdata(tag, addnewtocache(&progused, &progmax, &progtab, ptr));
case ARRAY:
return newdata(tag, addnewtocache(&arrused, &arrmax, &arrtab, ptr));
case SYMTAB:
return newdata(tag, addnewtocache(&symused, &symmax, &symtabtab, ptr));
case NULLOBJ: return null;
}
}

void *getptr(int d){
if (d<0) return NULL;
switch(gettag(d)){
case LITERAL: return NULL;
case CHAR: return NULL;
case NUMBER: return &numtab[getval(d)];
case PROG: return &progtab[getval(d)];
case ARRAY: return &arrtab[getval(d)];
case SYMTAB: return &symtabtab[getval(d)];
case NULLOBJ: return NULL;
}
}

josh@LAPTOP-ILO10OOF ~/inca/olmec
$

luser droog

unread,

Feb 12, 2016, 6:34:16 AM2/12/16

On Thursday, February 11, 2016 at 12:12:40 AM UTC-6, luser droog wrote:
> I'm working on a new version of my APL interpreter.
> And I really want to share this code and get some feedback.
>
> I've devised an int-encoding for my new interpreter which
> packs all data into integer handles. The benefit for array
> operations is all data items will have a uniform size no
> matter what the content actually is. This replaces the
> intptr_t hackery (ab)used in earlier versions.
>
> So this version explicitly uses 32bit data but does not
> confuse/conflate int and ptr types.
>
> Other modules and some bloggish description are at:
> https://github.com/luser-dr00g/inca/tree/master/olmec
>

> void *getptr(int d){
> if (d<0) return NULL;
> switch(gettag(d)){
> case LITERAL: return NULL;
> case CHAR: return NULL;
> case NUMBER: return &numtab[getval(d)];
> case PROG: return &progtab[getval(d)];
> case ARRAY: return &arrtab[getval(d)];
> case SYMTAB: return &symtabtab[getval(d)];
> case NULLOBJ: return NULL;
> }
> }
>

Remove all those '&'s. Return the pointer from the
table, not a pointer into the table.

luser droog

unread,

Feb 29, 2016, 3:45:09 AM2/29/16

On Friday, February 12, 2016 at 5:34:16 AM UTC-6, luser droog wrote:
> On Thursday, February 11, 2016 at 12:12:40 AM UTC-6, luser droog wrote:
> > I'm working on a new version of my APL interpreter.
> > And I really want to share this code and get some feedback.
> >
> > I've devised an int-encoding for my new interpreter which
> > packs all data into integer handles. The benefit for array
> > operations is all data items will have a uniform size no
> > matter what the content actually is. This replaces the
> > intptr_t hackery (ab)used in earlier versions.
> >
> > So this version explicitly uses 32bit data but does not
> > confuse/conflate int and ptr types.
> >
> > Other modules and some bloggish description are at:
> > https://github.com/luser-dr00g/inca/tree/master/olmec
> >
>

Following the model from the other thread, this revision adds
a long explanatory comment at the top.

josh@LAPTOP-ILO10OOF ~/inca/olmec
$ cat en.h

typedef struct datum { // these two should be reversed for Big-Endian
unsigned int val:24;
unsigned int tag:8; // hi-bit of tag should overlay the sign bit

} datum;

typedef union integer {
datum data;
int32_t int32;
} integer;

enum tag {
LITERAL, /* val is a 24-bit 2's comp integer */

NUMBER, /* val is an index in the number table */
CHAR, /* val is a 21-bit Unicode code point padded with zeros */

PCHAR, /* val is a an executable char */
PROG, /* val is an (index to an) executable code fragment (ARRAY of PCHAR)*/

ARRAY, /* val is a(n index to a) boxed array */
SYMTAB, /* val is a(n index to a) symbol table */
NULLOBJ, /* val is irrelevant (s.b. 0) */

VERB, /* val is a(n index to a) verb object */
ADVERB, /* val is a(n index to a) verb object */
MARKOBJ, /* val is irrelevant (s.b. 0) */
LPAROBJ,
RPAROBJ,

};

extern int null;
extern int mark;

void init_en();

int gettag(int d);
int getval(int d);
int newdata(int tag, int val);

int cache(int tag, void *ptr);
void *getptr(int d);

int getfill(int d);

josh@LAPTOP-ILO10OOF ~/inca/olmec
$ cat en.c

/* Encoding
*
* this file defines the sub-typing of data atoms.
* All data are packed into integer handles. The benefit for
* array operations is all data atoms will have a uniform
* size no matter what the content actually is. This replaces
* the intptr_t hackery (ab)used in earlier versions
* (not portable to 64bit build).

* the array data are always just straight 32bit integers.

* but we treat as a 7bit tag and 24bit integer value.
* An immediate integer value is indicated by a negative
* sign-bit or all-zero tag.

* Composite objects (boxed or reference objects) have
* an associated pointer stored in an array associated
* with the tag. Thus an array object can be enclosed
* into a scalar (integer handle) with

int x;
x = cache(ARRAY, array_new(3,3)); //3x3 matrix

* the array data structure (which is implicitly a pointer
* to its struct) can be retrived from the handle
* with

array a;
a = getptr(x);

*/

#include <stdarg.h>
#include <stdint.h>

#include <stdio.h>

#include <stdlib.h>

#include "en.h"

int gettag(int d){
if (d<0) return 0; /* negatives are literals */
integer int32;
int32.int32 = d;
datum dat = int32.data;
return dat.tag;
}

int getval(int d){
if (d<0) return d;
integer int32;
int32.int32 = d;
datum dat = int32.data;
return dat.val;
}

int newdata(int tag, int val){

if (tag==LITERAL && val<0) return val;

datum dat = { .tag = tag, .val = val };
integer int32 = { .data = dat };

int x = int32.int32;
//printf("newdata %d %d %d\n", tag, val, x);
return x;

}

integer nulldata = { .data = { .tag = NULLOBJ, .val = 0 } };
int null /* = nulldata.int32 */;

integer markdata = { .data = { .tag = MARKOBJ, .val = 0 } };

int mark /* = markdata.int32 */;

void init_en(){
null = nulldata.int32;
mark = markdata.int32;
}

int addnewtocache(size_t *used, size_t *max, void ***data, void *ptr){
if (*used == *max){
*max = *max * 7 + 11;
void *tmp = realloc(*data, *max * sizeof(void*));
if (!tmp) return null;
*data = tmp;
}
int z = (*used)++;
(*data)[z] = ptr;

//printf("addnew %d %p %p\n", z, ptr, (*data)[z]);
return z;

}

size_t numused, nummax;
void **numtab;

size_t progused, progmax;
void **progtab;

size_t arrused, arrmax;
void **arrtab;

size_t symused, symmax;
void **symtabtab;

size_t verbused, verbmax;
void **verbtab;

size_t advused, advmax;
void **advtab;

int cache(int tag, void *ptr){
//printf("cache %p\n", ptr);
switch(tag){
default:
case LITERAL:

case CHAR: return null;
case NUMBER:
return newdata(tag, addnewtocache(&numused, &nummax, &numtab, ptr));

case PROG: {
//printf("cache prog\n");
int x = newdata(tag, addnewtocache(&progused, &progmax, &progtab, ptr));
//printf("cache %d(%d,%d) %p\n", x, gettag(x), getval(x), getptr(x));
return x;
}
case ARRAY:
//printf("cache array\n");

return newdata(tag, addnewtocache(&arrused, &arrmax, &arrtab, ptr));
case SYMTAB:
return newdata(tag, addnewtocache(&symused, &symmax, &symtabtab, ptr));

case VERB:
return newdata(tag, addnewtocache(&verbused, &verbmax, &verbtab, ptr));
case ADVERB:
return newdata(tag, addnewtocache(&advused, &advmax, &advtab, ptr));
case NULLOBJ: return null;
}
}

void *getptr(int d){
if (d<0) return NULL;
switch(gettag(d)){

default:
case LITERAL:
case CHAR: return NULL;
case NUMBER: return numtab[getval(d)];
case PROG: return progtab[getval(d)];
case ARRAY: return arrtab[getval(d)];
case SYMTAB: return symtabtab[getval(d)];
case VERB: return verbtab[getval(d)];
case ADVERB: return advtab[getval(d)];
case NULLOBJ: return NULL;
}
}

int getfill(int d){
switch(gettag(d)){
default:
case LITERAL: return newdata(LITERAL, (1<<24)-1);
case CHAR: return newdata(CHAR, 0);
}
}

josh@LAPTOP-ILO10OOF ~/inca/olmec
$

0 new messages