I'm trying to write a little virtual machine interpreter
based on the minimal instruction set described in:
http://www.jucs.org/jucs_2_11/conditional_branching_is_not/Rojas_R.html
And it's going swimingly until I need to branch
in the "assembly" code.
1736(2)12:55 AM:~ 0> cat minvm.c
#include <stdio.h>
#include <stdlib.h>
void error(char *msg) {
fprintf(stderr, "Error: %s\n", msg);
}
typedef unsigned addr;
typedef unsigned word;
extern addr ip;
extern word acc;
extern word mem[];
void clear (void) { acc = 0; }
void inc (void) { ++acc; }
void load (addr A) { acc = mem[A]; }
void store (addr A) { mem[A] = acc; }
void brz (addr X) { if (!acc) ip = X; }
void (*optab[])() = { clear, inc, load, store, brz };
enum opcode { CLR, INC, LOAD, STORE, BRZ, HALT };
#define CLR(A) CLR, STORE, A
#define INC(A) LOAD, A, INC, STORE, A
#define MOV(A,B) LOAD, A, STORE, B
#define BRZ(A,X) LOAD, A, BRZ, X
#define GOTO(X) CLR, BRZ, X
enum tempvar { T0 = 900, T1, T2, T3, T4, T5 };
#define CMPL(A) CLR(T1), loop: INC(A), BRZ(A,end), INC(T1),
GOTO(loop), end: MOV(T1,A)
addr ip = 0;
word acc;
word mem[1000] = {
CLR(0), /* clear location zero */
MOV(0,1), /* copy into one */
HALT };
int exec (void) {
word inst;
while ((inst = mem[ip++]) != HALT) {
switch (inst) {
case CLR: case INC: optab[inst](); break;
case LOAD: case STORE: case BRZ: optab[inst](mem[ip++]);
break;
default: error("Illegal Instruction");
}
}
return 0;
}
int main (void) {
return exec();
}
Without being specific, this kind of stuff is usually possible to
achieve by preprocessor tricks. Basically, you define the actual array
contents (as well as the 'labels') via macros, and put them in a
separate file. Then you include the file twice: the first time, the
macros are defined such that they expand to enumeration constants; the
second time, they expand to the actual array initializers. Voil�, you
have a set of enums corresponding to array indices.
Sort of like this:
--- foo.c ---
enum {
#define FOO(e1, e2) e1,
#include "foo.inc"
#undef FOO
foo_count
};
int foo_array[foo_count] = {
#define FOO(e1, e2) e2,
#include "foo.inc"
#undef FOO
};
--- foo.inc ---
FOO(first, second)
FOO(second, fourth)
FOO(third, -1)
FOO(fourth, first)
..............................
The first parameter of FOO() defines a label as an enumeration constant;
the second defines a value of foo_array[label], and that value can refer
to other labels. You should be able to adapt that technique to your
interpreter code.
--
Marcin Grzegorczyk
Yes. I like it. Thank you.
As a first round of adaptation, I've translated to
an X-macro version rather than multiple inclusion.
#define Labels(A, B) A ,
#define Actual(A, B) B ,
#define data(_) \
_(L0, 0) \
_(L1, 1) \
_(L2, L0) \
enum {
data(Labels)
foo_count
};
unsigned char arr[foo_count] = {
data(Actual)
};
Next I'll need to generate the label names
automatically, make aliases, maybe add them
for multiple levels.
And then, perhaps I'll finish reading the article ;)