My partly-written "forth in C"

luser droog

unread,

Jan 1, 2019, 4:05:36 PM1/1/19

to

I found this incomplete code on my disk that I worked on several months
ago but didn't finish. But I still think it's pretty cool, and with all
the other "meta" threads, I thought I might get some useful feedback or
at the very least some entertaining insults.

First is a header file which implements a macro assembler for 8086
machine code in C preprocessor macros.

$ cat asm8086.h
#define LEA(to,m,r,r_m) to+0x8d,MRM(m,r,r_m)
#define MOV(to,m,r,r_m) to+0x8b,MRM(m,r,r_m)
#define ADD(to,m,r,r_m) to+0x03,MRM(m,r,r_m)
#define SUB(to,m,r,r_m) to+0x2b,MRM(m,r,r_m)
#define F -2
#define MRM(m,r,r_m) 0##m##r##r_m
#define Z 0
#define B 1
#define W 2
#define R 3
#define AX 0
#define CX 1
#define DX 2
#define BX 3
#define SP 4
#define BP 5
#define SI 6
#define DI 7
#define BX_SI 0
#define BX_DI 1
#define BP_SI 2
#define BP_DI 3
#define SI_ 4
#define DI_ 5
#define BP_ 6
#define BX_ 7
#define TEST(m,r,r_m) 0x85,MRM(m,r,r_m)
#define IMUL(m, r_m) 0xf7,MRM(m,5,r_m)
#define INC_(m, r_m) 0xff,MRM(m,0,r_m)
#define DEC_(m, r_m) 0xff,MRM(m,1,r_m)
#define JMP_(m, r_m) 0xff,MRM(m,5,r_m)
#define POP(r) 0x58+r
#define PUSH(r) 0x50+r
#define ADDAX 0x05
#define LODS 0xAD
#define JZ 0x74
#define HALT 0xF4
//eof

Next is the macros which build the dictionary in memory.

$ cat fdict.h
typedef unsigned char UC;
typedef unsigned short US;
enum { MAX_NAME = 8, MAX_CODE_PARAM = 20, MAX_WORD_PARAM = 10 };

struct code_entry {
US link;
UC name_len;
UC name[ MAX_NAME ];
US code;
UC param[ MAX_CODE_PARAM ];
};

struct word_entry {
US link;
UC name_len;
UC name[ MAX_NAME ];
US code;
US param[ MAX_WORD_PARAM ];
}l

#define CODE(n, e, ...) \
const US c_ ## e = P_PARAM_PTR; \
{ \
code_entry x = { \
.link = - sizeof x, \
.name_len = sizeof( # n ) - 1, \
.name = # n , \
.code = P_PARAM_PTR, \
.param = { __VA_ARGS__ , NEXT } \
}; \
memcpy( p, x, sizeof x ); \
p += sizeof x; \
} \
/*end CODE()*/

#define P_PARAM_PTR ( p - start ) + offset_of( code_entry, param )

#define WORD(n, e, ...) \
const US c_ ## e = P_CODE_PTR; \
{ \
word_entry x = { \
.link = - sizeof x, \
.name_len = sizeof( # n ) - 1, \
.name = # n , \
.code = c_enter, \
.param = { __VA_ARGS__ , c_exit } \
}; \
memcpy( p, x, sizeof x ); \
p += sizeof x; \
} \
/*end WORD()*/

#define P_CODE_PTR ( p - start ) + offset_of( word_entry, code )
//eof

It's not quite as transparent as I'd ideally like. The fixed MAX
sizes that I have to manually maintain. But it's close.

Next is the function which uses these macros to build the
memory image. Missing lots of higher functionality, especially
the inner interpreter loop and text interpreter routines.

$ cat forth3.h

#include "asm8086.h"
#include "fdict.h"
/* W = BX
IP = SI
PSP = SP
RSP = BP
X = AX
TOS_in_memory */
#define NEXT LODS, JMP_(R,AX)
#define PUSHRSP(r) LEA(,B,BP,BP_),minus(4), MOV(F,B,r,BP_),0
#define POPRSP(r) MOV(,B,r,BP_),0, LEA(,B,BP,BP_),4
#define minus(x) 1+(0xff^x)

static inline int
forth(char *start){
char *p = start;
{ UC x[] = { CALL, 00 }; memcpy( p, x, sizeof x ); p += 16; }
CODE(enter, enter, PUSHRSP(SI), ADDAX,4,0, MOV(,R,DI,AX))
CODE(exit, exit, POPRSP(SI))
CODE(lit, lit, LODS, PUSH(AX))
CODE(drop, drop, POP(AX))
CODE(swap, swap, POP(AX), POP(BX), PUSH(AX), PUSH(BX))
CODE(dup, dup, MOV(,R,BX,SP), MOV(,B,AX,BX_),0, PUSH(AX))
CODE(over, over, MOV(,R,BX,SP), MOV(,B,AX,BX_),2, PUSH(AX))
CODE(rot, rot, POP(AX), POP(BX), POP(CX), PUSH(BX), PUSH(AX), PUSH(CX))
CODE(-rot, nrot, POP(AX), POP(BX), POP(CX), PUSH(AX), PUSH(CX), PUSH(BX))
CODE(2drop, 2drop, POP(AX), POP(AX))
CODE(2dup, 2dup, MOV(,R,BX,SP), MOV(,B,AX,BX_),0, MOV(,B,CX,BX_),2, PUSH(AX), PUSH(CX))
CODE(1+, oneplus, MOV(,R,BX,SP), INC_(B,BX_),0)
CODE(1-, oneminus, MOV(,R,BX,SP), DEC_(B,BX_),0)
CODE(+, plus, POP(AX), MOV(,R,BX,SP), ADD(F,B,AX,BX_),0)
CODE(-, minus, POP(AX), MOV(,R,BX,SP), SUB(F,B,AX,BX_),0)
CODE(*, star, POP(AX), POP(BX), IMUL(R,BX), PUSH(AX))
CODE(!, bang, POP(BX), POP(AX), MOV(F,Z,AX,BX_))
CODE(@, at, POP(BX), MOV(,Z,AX,BX_), PUSH(AX))
CODE(+!, plusbang, POP(BX), POP(AX), ADD(F,Z,AX,BX_))
CODE(-!, minusbang, POP(BX), POP(AX), SUB(F,Z,AX,BX_))
CODE(bye, bye, HALT)
WORD(double, double, c_dup, c_plus)
WORD(dubdub, dubdub, c_double, c_double)
return 0;
}
//eof

These are all intended to work with my 8086 emulator, also in C.
https://github.com/luser-dr00g/8086

Materials consulted in writing this so far:
http://www.forth.org/fig-forth/fig-forth_8086-8088_ver_10.pdf
http://forthfiles.net/ting/sysguidefig.pdf
ftp://ftp.oldskool.org/pub/misc/temp/8086_family_Users_Manual.pdf

Any comments welcome. What should I work on next, or fix, or add?

minf...@arcor.de

unread,

Jan 1, 2019, 6:09:43 PM1/1/19

to

With due respect: it seems a proof-of-concept study to me to spill 8086 opcodes.
So far so good.

If your target is your own CPU emulator, build a slow prototype emulator next,
perhaps in some "slow" Forth, or in "normal" C with its inline assembler.

If your target is a Forth system, you could reconstruct eforth by your method.

Albert van der Horst

unread,

Jan 2, 2019, 6:16:06 AM1/2/19

to

In article <fd6a7eee-d0dc-40e8...@googlegroups.com>,

luser droog <mij...@yahoo.com> wrote:
>I found this incomplete code on my disk that I worked on several months
>ago but didn't finish. But I still think it's pretty cool, and with all
>the other "meta" threads, I thought I might get some useful feedback or
>at the very least some entertaining insults.
>
>First is a header file which implements a macro assembler for 8086
>machine code in C preprocessor macros.
>
>$ cat asm8086.h

<SNIP>

>//eof
>
>
>These are all intended to work with my 8086 emulator, also in C.
>https://github.com/luser-dr00g/8086
>
>Materials consulted in writing this so far:
>http://www.forth.org/fig-forth/fig-forth_8086-8088_ver_10.pdf
>http://forthfiles.net/ting/sysguidefig.pdf
>ftp://ftp.oldskool.org/pub/misc/temp/8086_family_Users_Manual.pdf
>
>Any comments welcome. What should I work on next, or fix, or add?

This reminds me of "FORTRAN programmers can write FORTRAN In
any language."
You'll end up with about the same source as my generic Forth ciforth.
(Now 5.129 1.1 was the fig-Forth you refer to.)
However, I use the right languages and tools for the job.

1. If you do macro's, use a real macro processor: m4
2. If you use assembler, take advantage of the most important
thing an assembler can do for you, system dependant executable
layout. This replaces fighting the link scripts through a
c-interface. (This is more significant than having the
mnemonics available, those can be had using macro's.)

The mechanism is explained in
http://home.hccnet.nl/a.w.m.van.der.horst/cifgenps.zip

An overview of all that has been done using this mechanism.
http://home.hccnet.nl/a.w.m.van.der.horst/ciforth.html

Once I used a real macro processor I integrated tests and docs in
the source.

If this is an exercise, by all means continue.
I saved the message because it is interesting, but
I would be more interested in having your lisp in c working.

Groetjes Albert
--
Albert van der Horst, UTRECHT,THE NETHERLANDS
Economic growth -- being exponential -- ultimately falters.
albert@spe&ar&c.xs4all.nl &=n http://home.hccnet.nl/a.w.m.van.der.horst

luser droog

unread,

Jan 5, 2019, 1:38:15 AM1/5/19

to

Thanks for looking. I have done some stuff with m4, but it frightens
me somewhat. It certainly frightens everyone I've told about it.

And I remembered why this whole project foundered. It's because the
emulator doesn't have any means of getting input. So I switched
gears to try to add that and went down a further rabbit hole after
discovering "The 80x86 is an Octal Machine". And I set about trying
to rewrite the whole emulator. And somewhere in that struggle the
whole impetus petered out.

http://www.dabo.de/ccc99/www.camp.ccc.de/radio/help.txt

So, I've turned back to the lisp interpreter. I fixed a bug,
but it was in code that I think was not active so it shouldn't
affect your bug. But I don't understand what's going wrong.
Can you compile it with -g and get gdb to get a backtrace?
(the latest commit of course)

I'm slowly replacing the reader functions with more functional
functinos. Local variables become arguments. Initialization
statements become expressions in the caller. Loops become
recursion.

Next big step will be replacing the custom character encoding
with plain old utf8, or maybe utf7 so I can use the bit as
a terminator. Maybe it needs a STRING object type, too.

Albert van der Horst

unread,

Jan 5, 2019, 4:51:14 AM1/5/19

to

In article <848e7338-ca01-4548...@googlegroups.com>,

It is much less frightening then what Anton e.a. try to do,
get a c-compiler to do your bidding. Sometimes m4 does unexpected
things, but it always make sense if you look closer.
Really, do look at cifgen.ps.
If you use a vi that finds
corresponding brackets, it is quite easy.

>
>And I remembered why this whole project foundered. It's because the

<SNIP>

luser droog

unread,

Jan 15, 2019, 5:30:11 PM1/15/19

to

On Wednesday, January 2, 2019 at 5:16:06 AM UTC-6, Albert van der Horst wrote:

> I would be more interested in having your lisp in c working.
>

It's working much better now. Atoms are limited to 10 characters but it's
a parameter now. Internally used atoms are built as enum symbols so they
can be used directly in the source.

The old 6-bit code is gone.

Rod Pemberton

unread,

Jun 2, 2019, 11:24:49 PM6/2/19

to

On Tue, 1 Jan 2019 13:05:35 -0800 (PST)
luser droog <mij...@yahoo.com> wrote:

> I found this incomplete code on my disk that I worked on several
> months ago but didn't finish. But I still think it's pretty cool, and
> with all the other "meta" threads, I thought I might get some useful
> feedback or at the very least some entertaining insults.
>

> [snip]

I'm not really sure why you need the CODE words, as you can apparently
call C equivalent functions, e.g., dup versus c_dup. In other words,
you could replace the CODE words with WORD words calling the C function
for your primitives. Then, your Forth isn't dependent on a particular
assembly language, i.e., it'd be as "portable" as C is portable.

Why did you choose to limit the lengths of the code and word params?
e.g., apparent C implementation limitations.

Rod Pemberton
--
Once upon a time, many decades ago in a place far away, humble people
sought their freedom, and lost. "Ideas are bulletproof."

luser droog

unread,

Jun 3, 2019, 1:15:08 PM6/3/19

to

On Sunday, June 2, 2019 at 10:24:49 PM UTC-5, Rod Pemberton wrote:
> On Tue, 1 Jan 2019 13:05:35 -0800 (PST)
> luser droog <mij...@yahoo.com> wrote:
>
> > I found this incomplete code on my disk that I worked on several
> > months ago but didn't finish. But I still think it's pretty cool, and
> > with all the other "meta" threads, I thought I might get some useful
> > feedback or at the very least some entertaining insults.
> >
> > [snip]
>
> I'm not really sure why you need the CODE words, as you can apparently
> call C equivalent functions, e.g., dup versus c_dup. In other words,
> you could replace the CODE words with WORD words calling the C function
> for your primitives. Then, your Forth isn't dependent on a particular
> assembly language, i.e., it'd be as "portable" as C is portable.

Well, damn, you're probably right. I could eliminate 90% of the code.
But I also had another purpose which was to exercise and extend my
8086 interpreter. Since I haven't done much more with any of it,
perhaps none of my purposes was very important to me, sigh.

> Why did you choose to limit the lengths of the code and word params?
> e.g., apparent C implementation limitations.
>

Yes, rereading it, I couldn't see (and still don't) an easy way of
declaring a structure with a variable length array in the middle.
Of course it's still possible to do accomplish the same effect in C,
but I didn't see a nice transparent way of declaring it. But also
I think can't use 'sizeof' to compute the link fields as naively
if the structure sizes are variable.

Of course the very code and word params that you point out ARE at the
end of the structs and so perfectly viable for a variable length member.
I think I chose not do this purely for consistency with the handling
of names. That way I could treat all such counted things uniformly.

It would certainly improve memory efficiency to lose the padding.
Good catch.

Rod Pemberton

unread,

Jul 22, 2019, 4:36:11 AM7/22/19

to

On Mon, 3 Jun 2019 10:15:05 -0700 (PDT)
luser droog <mij...@yahoo.com> wrote:

> > Why did you choose to limit the lengths of the code and word params?
> > e.g., apparent C implementation limitations.
> >
>
> Yes, rereading it, I couldn't see (and still don't) an easy way of
> declaring a structure with a variable length array in the middle.
> Of course it's still possible to do accomplish the same effect in C,
> but I didn't see a nice transparent way of declaring it. But also

Are you still here?

Did you see the code near the bottom of my post in reply to "minforth"
a while ago? "minforth" is now posting as "A. K." (For some reason, I
was mistakenly thinking that prior conversation was with you.)

https://groups.google.com/d/msg/comp.lang.forth/NUqpHQsrODE/EymClikhAQAJ
Usenet msg-id peb6ml$1f7s$1...@gioia.aioe.org

IT shows one way to represent the word's header and CFA/PFA in C for
ITC via two C objects, a struct and an array. The struct links to
other structs. The array is the compiled address list (CFA/PFA).

Look for "func1" or "func2" under "First output file (interpretable
Forth in C):" section, as there is another section with different file
output.

HTH,

Rod Pemberton

P.S. Let me know if you see this reply, as I haven't noticed a post
from you to c.l.f. in a while.

luser droog

unread,

Jul 22, 2019, 4:36:16 PM7/22/19

to

Got it. Thanks. Been busy with a geometric problem over in
comp.lang.postscript.