ps scanner - finite state machines

luser- -droog

unread,

Mar 25, 2013, 2:08:41 AM3/25/13

to

Rough draft translation of xpost's scanner into postscript. I'm thinking that I can "compile" this into the systemdict data structure to bootstrap the next interpreter. Or extend it into that golf-library scanner I was talking about.

%!

/digit (0123456789) def
/alpha (abcdefghijklmnopqrstuvwxyz\
ABCDEFGHIJKLMNOPQRSTUVWXYZ) def
/lower alpha 0 26 getinterval def
/upper alpha 26 26 getinterval def
/alnum 36 string def
digit alnum copy pop
alnum 10 upper putinterval
/l-u lower 0 get upper 0 get sub def
%alnum =

/indexof {
exch 1 string dup 0 4 3 roll put % str (c)
search { % post match pre
length exch pop exch pop
}{ % str
pop -1
} ifelse
} def

/within { % char str . bool
exch 1 string dup 0 4 3 roll put % str (c)
search { % post match pre
pop pop pop true
}{ % str
pop false
} ifelse
} def
/israd { (#) 0 get eq } def
/isalpha { alpha within } def
/isdigit { digit within } def
/isupper { upper within } def
/isalnum { dup digit within { pop true }{ alpha within } ifelse } def
/israddig { isalnum } def
/isdot { (.) 0 get eq } def
/ise { (eE) within } def
/issign { (+-) within } def
/isdel { (()<>[]{}/%) within } def

% the automaton type
% [ predicate yes-transition no-transition ]

% automaton to match a simple decimal number
% /^[+-]?[0-9]+$/
/fsm_dec [
[ /issign cvx 1 1 ] % 0
[ /isdigit cvx 2 -1 ] % 1
[ /isdigit cvx 2 -1 ] % 2
] def
/accept_dec { 2 eq } def

% automaton to match a radix number
% /^[0-9]+[#][a-Z0-9]+$/
/fsm_rad [
[ /isdigit cvx 1 -1 ] % 0
[ /isdigit cvx 1 2 ] % 1
[ /israd cvx 3 -1 ] % 2
[ /israddig cvx 4 -1 ] % 3
[ /israddig cvx 4 -1 ] % 4
] def
/accept_rad { 4 eq } def

% automaton to match a real number
% /^[+-]?(d+(.d*)?)|(d*.d+)([eE][+-]?d+)?$/
% where d = [0-9]
/fsm_real [
[ /issign cvx 1 1 ] % 0
[ /isdigit cvx 2 4 ] % 1
[ /isdigit cvx 2 3 ] % 2
[ /isdot cvx 6 7 ] % 3
[ /isdot cvx 5 -1 ] % 4
[ /isdigit cvx 6 -1 ] % 5
[ /isdigit cvx 6 7 ] % 6
[ /ise cvx 8 -1 ] % 7
[ /issign cvx 9 9 ] % 8
[ /isdigit cvx 10 -1 ] % 9
[ /isdigit cvx 10 -1 ] % 10
] def
/accept_real {
dup 2 eq % integer
1 index 6 eq % real
2 index 10 eq % exponent
or or exch pop
} def

/check { 5 dict begin {/accept/fsm/str}{exch def}forall
/sta 0 def
/i 0 def
/e str length 1 sub def
{
%str length 0 eq { exit } if
i e gt { exit } if
sta -1 eq { exit } if
str i get
fsm sta get 0 get exec {
/sta fsm sta get 1 get def % sta=fsm[sta].y
%/str str 1 str length 1 sub getinterval def % s++
/i i 1 add def
}{
/sta fsm sta get 2 get def % sta=fsm[sta].n
} ifelse
} loop
sta accept
end } def

/radix {
0 exch
(#) search pop %dup =
cvi exch pop exch % 0 base (digits)
0 1 2 index length 1 sub
%pstack()=
{ % sum base (str) i
2 copy get
dup lower within { l-u sub } if
alnum indexof % sum base (str) i digit
%dup =
5 4 roll 4 index mul % base (str) i digit sum*base
add % base (str) i sum=sum*base+digit
4 1 roll pop % sum base (str)
} for
pop pop
} def

%
/grok { 1 dict begin /s exch def
{
s fsm_dec /accept_dec load check {
s cvi exit
} if
s fsm_rad /accept_rad load check {
%s =
s radix exit
} if
s fsm_real /accept_real load check {
s cvr exit
} if

exit } loop
end } def

/toke {
} def

(-012345) fsm_dec /accept_dec load check
(16#ABCDEF012) fsm_rad /accept_rad load check
(-23.5e10) fsm_real /accept_real load check
(37) grok
%clear
(16#30) grok
(-1234.5) grok
pstack

luser- -droog

unread,

Mar 30, 2013, 12:40:38 PM3/30/13

to

A little more fleshed-out.

%!

/digit (0123456789) def
/alpha (ABCDEFGHIJKLMNOPQRSTUVWXYZ\
abcdefghijklmnopqrstuvwxyz) def
/upper alpha 0 26 getinterval def
/lower alpha 26 26 getinterval def
/alnum 62 string def
digit alnum copy pop
alnum 10 alpha putinterval
/u-l upper 0 get lower 0 get sub def

%alnum =

/indexof {
exch 1 string dup 0 4 3 roll put % str (c)
search { % post match pre
length exch pop exch pop
}{ % str
pop -1
} ifelse
} def

/within { % char str . bool
exch 1 string dup 0 4 3 roll put % str (c)
search { % post match pre
pop pop pop true
}{ % str
pop false
} ifelse
} def
/israd { (#) 0 get eq } def
/isalpha { alpha within } def
/isdigit { digit within } def
/isupper { upper within } def

/isalnum { dup isdigit { pop true }{ isalpha } ifelse } def

/israddig { isalnum } def
/isdot { (.) 0 get eq } def
/ise { (eE) within } def
/issign { (+-) within } def
/isdel { (()<>[]{}/%) within } def

/isspace { ( \t\n) within } def
/isreg { dup isspace { pop false }{ isdel not } ifelse } def

% str fsm accept-proc . bool
% execute fsm-acceptor against string yielding true/false

/check { 5 dict begin {/accept/fsm/str}{exch def}forall
/sta 0 def
/i 0 def
/e str length 1 sub def
{
%str length 0 eq { exit } if
i e gt { exit } if
sta -1 eq { exit } if
str i get
fsm sta get 0 get exec {
/sta fsm sta get 1 get def % sta=fsm[sta].y
%/str str 1 str length 1 sub getinterval def % s++
/i i 1 add def
}{
/sta fsm sta get 2 get def % sta=fsm[sta].n
} ifelse
} loop
sta accept
end } def

% convert a string to integer using radix
/cvri { % string radix . num
0 3 1 roll exch % 0 base str
dup 0 get issign {
dup 0 get (-) 0 get eq 4 1 roll
1 1 index length 1 sub getinterval
}{ false 4 1 roll } ifelse % bool sum base str i
0 1 2 index length 1 sub { % bool sum base str i
2 copy get % bool sum base str i s_i
dup lower within { u-l add } if
%dup =()=
alnum indexof % bool sum base str i digit
5 4 roll 4 index mul % bool base str i digit sum*base
add % bool base str i sum=sum*base+digit
4 1 roll pop % bool sum base str
} for
pop pop % bool sum
exch { neg } if % num
} def

% interpret a string containing a radix number
/radix {
(#) search pop exch pop % (digits) (radix) % split
10 cvri % (digits) radix
cvri % num
} def

% postscript "switch" statement
/grok-dict <<
(\() 0 get {
}

(\<) 0 get {
}

({) 0 get {
}

(/) 0 get {
}

/default {
}
>> def

% interpret a string using fsm-acceptors and convertors

/grok { 1 dict begin /s exch def

%s ==

{
s fsm_dec /accept_dec load check {

s 10 cvri exit } if

s fsm_rad /accept_rad load check {

s radix exit } if
s fsm_real /accept_real load check {
s cvr exit } if

grok-dict s 0 get
2 copy known not {
pop /default } if
get exec

exit } loop
end } def

/puff {
{
si src length ge { exit } if
src si get isreg {
buf bi src si get put
/bi bi 1 add def
/si si 1 add def
}{
exit
} ifelse
} loop
} def

% string . (remainder) token bool
/toke {
<< exch
/src exch
/si 0
/buf 500 string
/bi 0
>> begin
{
src si get isspace not
{ exit } if
/si si 1 add def
} loop
pstack()=
si src length ge { src null }{
buf bi src si get put
/bi bi 1 add def
/si si 1 add def
si src length lt {
src si get isdel not {
puff
pstack()=
} if
} if
src si 1 index length 1 index sub getinterval
buf 0 bi getinterval grok
} ifelse
end } def

{
(-012345) fsm_dec /accept_dec load check
(16#ABCDEF012) fsm_rad /accept_rad load check
(-23.5e10) fsm_real /accept_real load check

(-37) grok
%clear
(16#30) grok
(16#0a) grok
(16#0B) grok
(16#-0A) radix % scanner won't accept this, but `radix` converts it
(16#Ff) grok
(-1234.5) grok
pstack
} pop

( 5#01234321 ) toke
pstack

luser- -droog

unread,

Mar 31, 2013, 3:59:54 AM3/31/13

to

On Saturday, March 30, 2013 11:40:38 AM UTC-5, luser- -droog wrote:
> A little more fleshed-out.

>
>
>
> % postscript "switch" statement
>
> /grok-dict <<
>
> (\() 0 get {
>
> }
>
>
>
> (\<) 0 get {
>
> }
>
>
>
> ({) 0 get {
>
> }
>
>
>
> (/) 0 get {
>
> }
>
>
>
> /default {
>
> }
>
> >> def
>
>
>
> % interpret a string using fsm-acceptors and convertors
>
> /grok { 1 dict begin /s exch def
>
> %s ==
>
> {
>
> s fsm_dec /accept_dec load check {
>
> s 10 cvri exit } if
>
> s fsm_rad /accept_rad load check {
>
> s radix exit } if
>
> s fsm_real /accept_real load check {
>
> s cvr exit } if
>
>
>
> grok-dict s 0 get
>
> 2 copy known not {
>
> pop /default } if
>
> get exec
>
>
>
> exit } loop
>
> end } def
>
>

This raises an aspect of PostScript that I've never quite understood.

How exactly are executable procedures constructed by the /real/ postscript interpreter? I've always been confused by the part that says execution is "deferred". My very first draft of a ps-interpreter (it was called `flips`. I never showed it.) actually had a flag called `bool deferred` and procedures were built by the main interpreter loop by not performing any execution actions, but merely allowing objects to accumulate until the scanner hit the '}' and it would set the flag to false for the inner loop to act upon after the scanner returned.

But that version didn't have a `token` operator at all. In fact discovering the need for the whole scanner to be *in* the `token` operator led directly to the re-write `podvig` which used malloc'd linked-lists to accumulate objects, then count them and copy into an array. `xpost` does the same thing, although I made the code a lot prettier. It's still dirty work.

But `token` just needs to call itself recursively, right? `mark` ... `counttomark ...` that should present any surprises right?

'Deferred'-execution is really about *not executing* executable procedures hot off the scanner, right? No-need to honkey-rig the main loop.

--
honkey rig
honkey rig
honkey rig

luser- -droog

unread,

Apr 23, 2013, 2:33:15 AM4/23/13

to

The hexstrings part is really hairy. I'm second-guessing my idea to "compile"
this for xpost3.

Still need to do regular strings (all those escapes!),
procedures, and immediate names.

It's curious that immediate names are only supposed to work
inside of procedures. I think xpost2 allows them at the top-
level. Ooops.

%!

/digit (0123456789) def
/alpha (ABCDEFGHIJKLMNOPQRSTUVWXYZ\
abcdefghijklmnopqrstuvwxyz) def
/upper alpha 0 26 getinterval def
/lower alpha 26 26 getinterval def
/alnum 62 string def
digit alnum copy pop
alnum 10 alpha putinterval
/u-l upper 0 get lower 0 get sub def
%alnum =

/indexof { % char str . index-of-char-in-string

exch 1 string dup 0 4 3 roll put % str (c)
search { % post match pre

length 3 1 roll pop pop

}{ % str
pop -1
} ifelse
} def

/within { % char str . bool

indexof 0 ge

} def

/israd { (#) 0 get eq } def
/isalpha { alpha within } def
/isdigit { digit within } def

/isxdigit { %dup lower within { u-l add } if
alnum indexof 16 lt } def

/isupper { upper within } def
/isalnum { dup isdigit { pop true }{ isalpha } ifelse } def
/israddig { isalnum } def
/isdot { (.) 0 get eq } def
/ise { (eE) within } def
/issign { (+-) within } def
/isdel { (()<>[]{}/%) within } def
/isspace { ( \t\n) within } def
/isreg { dup isspace { pop false }{ isdel not } ifelse } def

% the automaton type
% [ predicate yes-transition no-transition ]

% automaton to match a simple decimal number
% /^[+-]?[0-9]+$/
/fsm_dec [

[ /issign 1 1 ] % 0
[ /isdigit 2 -1 ] % 1
[ /isdigit 2 -1 ] % 2

] def
/accept_dec { 2 eq } def

% automaton to match a radix number
% /^[0-9]+[#][a-Z0-9]+$/
/fsm_rad [

[ /isdigit 1 -1 ] % 0
[ /isdigit 1 2 ] % 1
[ /israd 3 -1 ] % 2
[ /israddig 4 -1 ] % 3
[ /israddig 4 -1 ] % 4

] def
/accept_rad { 4 eq } def

% automaton to match a real number
% /^[+-]?(d+(.d*)?)|(d*.d+)([eE][+-]?d+)?$/
% where d = [0-9]
/fsm_real [

[ /issign 1 1 ] % 0
[ /isdigit 2 4 ] % 1
[ /isdigit 2 3 ] % 2
[ /isdot 6 7 ] % 3
[ /isdot 5 -1 ] % 4
[ /isdigit 6 -1 ] % 5
[ /isdigit 6 7 ] % 6
[ /ise 8 -1 ] % 7
[ /issign 9 9 ] % 8
[ /isdigit 10 -1 ] % 9
[ /isdigit 10 -1 ] % 10

] def
/accept_real {
dup 2 eq % integer
1 index 6 eq % real
2 index 10 eq % exponent
or or exch pop
} def

% str fsm accept-proc . bool
% execute fsm-acceptor against string yielding true/false
/check { 5 dict begin {/accept/fsm/str}{exch def}forall
/sta 0 def
/i 0 def
/e str length 1 sub def
{
%str length 0 eq { exit } if
i e gt { exit } if
sta -1 eq { exit } if
str i get

fsm sta get 0 get cvx exec {

} def

% interpret a string containing a radix number
/radix {
(#) search pop exch pop % (digits) (radix) % split
10 cvri % (digits) radix
cvri % num
} def

% postscript "switch" statement
/grok-dict <<

(\() 0 get { %string
}

(<) 0 get { %hex string
/dst src length 2 idiv 1 add string def
/di 0 def
%/bi 0 def
/si si s length 1 sub sub def % un-puff
src si get %dup =

/si si 1 add def

dup (\<) 0 get eq {
pop %mark
(<<) cvn cvx
}{ %else
{ % c
%dup =
dup (\>) 0 get eq { pop exit } if

dup isspace { pop }{

dup lower within { u-l add } if

dup isxdigit {
alnum indexof
%dup =
4 bitshift

{ % eat whitespace until 2nd digit
si src length ge { (0) 0 get exit } if
src si get

/si si 1 add def

dup isspace not { exit } if
pop
} loop

dup lower within { u-l add } if

dup isxdigit {
alnum indexof
%dup =
or
}{
/toke cvx /syntaxerror signalerror
} ifelse

}{
/toke cvx /syntaxerror signalerror
} ifelse

%buf exch bi exch put
%/bi bi 1 add def
dst exch di exch put
/di di 1 add def

} ifelse

si src length ge { exit } if
src si get

/si si 1 add def
} loop

%buf 0 bi getinterval
pop src si 1 index length 1 index sub getinterval
dst 0 di getinterval
} ifelse
}

(>) 0 get { %dict
src si get
(\>) 0 get eq {

/si si 1 add def

pop src si 1 index length 1 index sub getinterval
(>>) cvn cvx
%counttomark 2 idiv dup dict begin { def } repeat currentdict end
}{
/toke cvx /syntaxerror signalerror
} ifelse
}

({) 0 get { %procedure
}

(/) 0 get { %literal name
pop src si 1 index length 1 index sub getinterval
s 1 s length 1 sub getinterval
cvn
}

/default { %bareword: executable name
s cvn cvx

}
>> def

% interpret a string using fsm-acceptors and convertors
/grok { 1 dict begin /s exch def
%s ==
{

s fsm_dec //accept_dec check {

s 10 cvri exit } if

s fsm_rad //accept_rad check {
s radix exit } if
s fsm_real //accept_real check {

s cvr exit } if

grok-dict s 0 get
2 copy known not {
pop /default } if
get exec

exit } loop
end } def

/puff {
{
si src length ge { exit } if
src si get isreg {
buf bi src si get put
/bi bi 1 add def
/si si 1 add def
}{
exit
} ifelse
} loop
} def

% string . (remainder) token bool
/toke {
<< exch
/src exch

/si 0 % src-index
/buf 500 string
/bi 0 % buf-index

>> begin
{
src si get isspace not
{ exit } if
/si si 1 add def
} loop

%pstack()=

si src length ge { src null }{
buf bi src si get put
/bi bi 1 add def
/si si 1 add def
si src length lt {
src si get isdel not {
puff

%pstack()=

} if
} if
src si 1 index length 1 index sub getinterval
buf 0 bi getinterval grok
} ifelse
end } def

{
(-012345) fsm_dec /accept_dec load check
(16#ABCDEF012) fsm_rad /accept_rad load check
(-23.5e10) fsm_real /accept_real load check
(-37) grok

(16#30) grok
(16#0a) grok
(16#0B) grok
(16#-0A) radix % scanner won't accept this, but `radix` converts it
(16#Ff) grok
(-1234.5) grok

pstack clear
( 2#10000001 ) toke pstack clear ()=
(<30 31 32 33 34 35 36 37 38 39 >) toke
(<<) toke exch pop /this (this) /that (that) (>>) toke
(/name)toke
(exec)toke
pstack clear
} exec%pop

pstack clear

luser- -droog

unread,

Jun 2, 2013, 6:21:04 AM6/2/13

to

Started work on the "compiled" version I've been talking about. Thought you all might like to see it. It's just some stupid macros to wrap the constructor functions.

#include <stdbool.h> /* ob.h:bool */
#include <stdlib.h> /* NULL */

#include "m.h"
#include "ob.h"
#include "s.h"
#include "itp.h"
#include "st.h"
#include "ar.h"
#include "di.h"
#include "op.h"
#include "nm.h"

void initoptok(context *ctx, object sd) {
oper *optab = (void *)(ctx->gl->base + adrent(ctx->gl, OPTAB));
object n,op;
object ar;
int i;

object alnum = cvlit(consbst(ctx, 62,
"0123456789"
"ABCDEFGHIJKLMNOPQRSTUVWXYZ"
"abcdefghijklmnopqrstuvwxyz"));
object digit = arrgetinterval(alnum, 0, 10);
object alpha = arrgetinterval(alnum, 10, 52);
object upper = arrgetinterval(alnum, 10, 26);
object lower = arrgetinterval(alnum, 36, 26);
object u_l = consint('A' - 'a');

#define N(t) cvx(consname(ctx, #t))
#define L(t) consint(t)

#define ARR(n) ar = cvx(consbar(ctx, n)); i = 0
#define ADD(x) barput(ctx, ar, i++, x)
#define DEF(name) bdcput(ctx, sd, N(name), ar)
#define ADDSUB(n) { object sar = consbar(ctx, n); { object ar = sar; i = 0
#define ENDSUB } ADD(sar); }

/*
ARR(13);
ADD(N(exch)); ADD(L(1)); ADD(N(string)); ADD(N(dup));
ADD(L(0)); ADD(L(4)); ADD(L(3)); ADD(N(roll)); ADD(N(put));
ADD(N(search));
ADDSUB(6);
ADD(N(length)); ADD(L(3)); ADD(L(1)); ADD(N(roll));
ADD(N(pop)); ADD(N(pop)); ENDSUB
ADDSUB(2); ADD(N(pop)); ADD(L(-1)); ENDSUB
ADD(N(ifelse));
DEF(indexof);
*/
ARR(10);
ADD(L(-1)); ADD(L(3)); ADD(L(1)); ADD(N(roll)); ADD(L(0)); ADD(N(exch));
ADDSUB(7);
ADD(L(2)); ADD(N(index)); ADD(N(eq));
ADDSUB(4); ADD(L(3)); ADD(L(1)); ADD(N(roll)); ADD(N(exit)); ENDSUB;
ADD(N(if));
ADD(L(1)); ADD(N(add)); ENDSUB;
ADD(N(forall)); ADD(N(pop)); ADD(N(pop));
DEF(indexof);

ARR(3); ADD(N(indexof)); ADD(L(0)); ADD(N(ge)); DEF(within);
ARR(2); ADD(L('#')); ADD(N(eq)); DEF(israd);
ARR(2); ADD(alpha); ADD(N(within)); DEF(isalpha);
ARR(2); ADD(digit); ADD(N(within)); DEF(isdigit);
ARR(4); ADD(alnum); ADD(N(indexof)); ADD(L(16)); ADD(N(lt)); DEF(isxdigit);
ARR(2); ADD(upper); ADD(N(within)); DEF(isupper);
ARR(2); ADD(alnum); ADD(N(within)); DEF(isalnum);
ARR(1); ADD(N(isalnum)); DEF(israddig);
ARR(2); ADD(L('.')); ADD(N(eq)); DEF(isdot);
ARR(2); ADD(consbst(ctx, 2, "eE")); ADD(N(within)); DEF(ise);
ARR(2); ADD(consbst(ctx, 2, "+-")); ADD(N(within)); DEF(issign);
ARR(2); ADD(consbst(ctx, 10, "()<>[]{}/%")); ADD(N(within)); DEF(isdel);
ARR(2); ADD(consbst(ctx, 3, " \t\n")); ADD(N(within)); DEF(isspace);
ARR(5); ADD(N(dup)); ADD(N(isspace));
ADDSUB(2); ADD(N(pop)); ADD(N(false)); ENDSUB;
ADDSUB(2); ADD(N(isdel)); ADD(N(not)); ENDSUB;
ADD(N(ifelse)); DEF(isreg);

ARR(3); ar = cvlit(ar);
ADDSUB(3); ADD(N(issign)); ADD(L(1)); ADD(L(1)); ENDSUB; % 0
ADDSUB(3); ADD(N(isdigit)); ADD(L(2)); ADD(L(-1)); ENDSUB; % 1
ADDSUB(3); ADD(N(isdigit)); ADD(L(2)); ADD(L(-1)); ENDSUB; % 2
DEF(fsm_dec);
ARR(2); ADD(L(2)); ADD(N(eq)); DEF(accept_dec);

ARR(5); ar = cvlit(ar);
ADDSUB(3); ADD(N(isdigit)); ADD(L(1)); ADD(L(-1)); ENDSUB; % 0
ADDSUB(3); ADD(N(isdigit)); ADD(L(1)); ADD(L(2)); ENDSUB; % 1
ADDSUB(3); ADD(N(israd)); ADD(L(3)); ADD(L(-1)); ENDSUB; % 2
ADDSUB(3); ADD(N(israddig)); ADD(L(4)); ADD(L(-1)); ENDSUB; % 3
ADDSUB(3); ADD(N(israddig)); ADD(L(4)); ADD(L(-1)); ENDSUB; % 4
DEF(fsm_rad);
ARR(2); ADD(L(4)); ADD(N(eq)); DEF(accept_rad);

ARR(11); ar = cvlit(ar);
ADDSUB(3); ADD(N(issign)); ADD(L(1)); ADD(L(1)); ENDSUB; % 0
ADDSUB(3); ADD(N(isdigit)); ADD(L(2)); ADD(L(4)); ENDSUB; % 1
ADDSUB(3); ADD(N(isdigit)); ADD(L(2)); ADD(L(3)); ENDSUB; % 2
ADDSUB(3); ADD(N(isdot)); ADD(L(6)); ADD(L(7)); ENDSUB; % 3
ADDSUB(3); ADD(N(isdot)); ADD(L(5)); ADD(L(-1)); ENDSUB; % 4
ADDSUB(3); ADD(N(isdigit)); ADD(L(6)); ADD(L(-1)); ENDSUB; % 5
ADDSUB(3); ADD(N(isdigit)); ADD(L(6)); ADD(L(7)); ENDSUB; % 6
ADDSUB(3); ADD(N(ise)); ADD(L(8)); ADD(L(-1)); ENDSUB; % 7
ADDSUB(3); ADD(N(issign)); ADD(L(9)); ADD(L(9)); ENDSUB; % 8
ADDSUB(3); ADD(N(isdigit)); ADD(L(10)); ADD(L(-1)); ENDSUB; % 9
ADDSUB(3); ADD(N(isdigit)); ADD(L(10)); ADD(L(-1)); ENDSUB; % 10
DEF(fsm_real);
ARR(15);
ADD(N(dup)); ADD(L(2)); ADD(N(eq)); %integer
ADD(L(1)); ADD(N(index)); ADD(L(6)); ADD(N(eq)); %real
ADD(L(2)); ADD(N(index)); ADD(L(10)); ADD(N(eq)); %exponent
ADD(N(or)); ADD(N(or)); ADD(N(exch)); ADD(N(pop));
DEF(accept_real);

//op = consoper(ctx, "string", Istring, 1, 1, integertype); INSTALL;
}

luser- -droog

unread,

Jun 9, 2013, 3:16:41 PM6/9/13

to

On Sunday, June 2, 2013 5:21:04 AM UTC-5, luser- -droog wrote:
> Started work on the "compiled" version I've been talking about. Thought you all might like to see it. It's just some stupid macros to wrap the constructor functions.
>

Got it to read an integer. But radix is returning wrong results, and real is crashing.

Now shows a "combined" view with the relevant postscript code as comments to the array construction blocks.

http://code.google.com/p/xpost/source/browse/optok.c

luser- -droog

unread,

Jun 12, 2013, 2:40:19 AM6/12/13

to

Fixed my scanner problem. It was failing on these inputs:
" 16#ff " (hexadecimal (radix 16) number 255)
" -.48 " (real number)
Problem arose because of that initial space in the input. The scanner successfully grabbed substrings (offset 1, length 5, for the first example; offset 1, length 4, for the second). But then while extracting the C-string from the string-object, the offset was being ignored. So the actual strings being converted were:
" 16#f"
" -.4"
Which were the bad results.
Of course, it took two days of reading a 20,000-line trace dump to figure it out. And the problem didn't happen till the very end. Seriously.

19340
19341 eval
19342 Stack: 0:<string 13 2 3 5>1:<string 13 4 3 1>
19343 Executing: <name 10 0 56>
19344 <name "cvr">
19345 load:<name 10 0 56>0:<dict 262 10 10 0>1:<dict 262 20 82 0>0:<name 10 0 78>1:<na me 10 0 70>2:<name 10 0 62>3:4:5:6:<name 10 0 71>7:<name 10 0 63>8:9:10:11:<name 10 0 72>12:<name 10 0 64>13:14:15:<name 10 0 81>16:<name 10 0 73>17:<name 10 0 65>18:19:20:<name 10 0 82>21:22:<name 10 0 66>23:<name 10 0 58>24:25:<name 10 0 83>26:27:<name 10 0 67>28:<name 10 0 59>29:30:<name 10 0 84>31:32:<name 10 0 68> 33:<name 10 0 60>34:35:36:<name 10 0 77>37:<name 10 0 69>38:<name 10 0 61>39:40:
19346 0:<name 10 0 53>1:<name 10 0 22>2:3:<name 10 0 41>4:<name 10 0 10>5:6:<name 10 0 29>7:8:<name 10 0 48>9:<name 10 0 17>10:11:<name 10 0 36>12:<name 10 0 5>13:<na me 10 0 55>14:<name 10 0 24>15:<name 10 0 74>16:<name 10 0 43>17:<name 10 0 12>1 8:19:<name 10 0 31>20:21:<name 10 0 50>22:<name 10 0 19>23:24:<name 10 0 38>25:< name 10 0 7>26:<name 10 0 57>27:<name 10 0 26>28:29:<name 10 0 45>30:<name 10 0 14>31:32:<name 10 0 33>33:<name 10 0 2>34:<name 10 0 52>35:<name 10 0 21>36:37:< name 10 0 40>38:<name 10 0 9>39:40:<name 10 0 28>41:42:<name 10 0 47>43:<name 10 0 16>44:45:<name 10 0 35>46:<name 10 0 4>47:<name 10 0 54>48:<name 10 0 23>49:5 0:<name 10 0 42>51:<name 10 0 11>52:53:<name 10 0 30>54:55:<name 10 0 49>56:<nam e 10 0 18>57:58:<name 10 0 37>59:<name 10 0 6>60:<name 10 0 56>61:<name 10 0 25> 62:<name 10 0 75>63:<name 10 0 44>64:<name 10 0 13>65:66:<name 10 0 32>67:<name 10 0 1>68:<name 10 0 51>69:<name 10 0 20>70:71:<name 10 0 39>72:<name 10 0 8>73: 74:<name 10 0 27>75:76:<name 10 0 46>77:<name 10 0 15>78:79:<name 10 0 34>80:<na me 10 0 84>
19347
19348 eval
19349 Stack: 0:<string 13 2 3 5>1:<string 13 4 3 1>
19350 Executing: <operator 49>
19351 <operator 49 cvr>cvr -.4
19352
19353 eval
19354 Stack: 0:<string 13 2 3 5>1:<real -0.400000>
19355 Executing: <array 261 1 176 1>
19356
19357 eval
19358 Stack: 0:<string 13 2 3 5>1:<real -0.400000>
19359 Executing: <name 10 0 53>
19360 <name "exit">

luser- -droog

unread,

Jun 21, 2013, 3:56:17 AM6/21/13

to

The scanner is working and mostly complete. Re-wrote most
of the postscript to remove local dictionaries. This should make the scanner re-entrant.
http://code.google.com/p/xpost/source/browse/optok.c

Remaining issues: //immediate names. A naïve approach would be to add a variable to toke_dict to track whether we're inside a procedure (so immediate names are allowed) or outside (where immediate names are disallowed). But grok_dict might be shared by several contexts. Another option is to push a new dictionary with this name. Perhaps toke_dict defines /in_executable_array to false, but when scanning a proc, do `<</in_executable_true>>begin` ... `end` around the recursive calls. This approach seems viable.

Another piece is ASCII85. I don't have the energy right now. :)

Another piece (that I'm putting off still, but the day looms closer) is Binary encodings.

But the piece standong in the way of running the interpreter interactively, is dealing with the file/string polymorphism in the token operator. I've kept this scanner code under the name `toke` as it's clearly not a full implementation of `token`, since it only accepts strings. But files can easily be read-off into strings. It's called buffering and happens all the time. So my thought is, to use the function that implements the %statementedit special file to read-off a statement into a string, then call `toke` on that string, and stash the remainder (the returned substring) into the file object's payload structure (the shared part), modifying all file-reading operators to read first from this lookahead_buffer, if present.

This appears to let me re-use a lot of existing code to get this working. But something about it leaves me uneasy. Adding a buffer to the file may have unforeseen consequences. Or so I fear.

Helge Blischke

unread,

Jun 21, 2013, 5:22:40 AM6/21/13

to

luser- -droog wrote:

> The scanner is working and mostly complete. Re-wrote most
> of the postscript to remove local dictionaries. This should make the
> scanner re-entrant. http://code.google.com/p/xpost/source/browse/optok.c
>
> Remaining issues: //immediate names. A naïve approach would be to add a
> variable to toke_dict to track whether we're inside a procedure (so
> immediate names are allowed) or outside (where immediate names are
> disallowed).

Where does the PLRM state that immediate evaluated names are disaloowed
outside of procedures????

> But grok_dict might be shared by several contexts. Another
> option is to push a new dictionary with this name. Perhaps toke_dict
> defines /in_executable_array to false, but when scanning a proc, do
> `<</in_executable_true>>begin` ... `end` around the recursive calls. This
> approach seems viable.
>
> Another piece is ASCII85. I don't have the energy right now. :)
>
> Another piece (that I'm putting off still, but the day looms closer) is
> Binary encodings.
>
> But the piece standong in the way of running the interpreter
> interactively, is dealing with the file/string polymorphism in the token
> operator. I've kept this scanner code under the name `toke` as it's
> clearly not a full implementation of `token`, since it only accepts
> strings. But files can easily be read-off into strings. It's called
> buffering and happens all the time. So my thought is, to use the function
> that implements the %statementedit special file to read-off a statement
> into a string, then call `toke` on that string, and stash the remainder
> (the returned substring) into the file object's payload structure (the
> shared part), modifying all file-reading operators to read first from this
> lookahead_buffer, if present.
>
> This appears to let me re-use a lot of existing code to get this working.
> But something about it leaves me uneasy. Adding a buffer to the file may
> have unforeseen consequences. Or so I fear.

Helge

Helge Blischke

unread,

Jun 21, 2013, 5:22:40 AM6/21/13

to

luser- -droog wrote:

> The scanner is working and mostly complete. Re-wrote most
> of the postscript to remove local dictionaries. This should make the
> scanner re-entrant. http://code.google.com/p/xpost/source/browse/optok.c
>
> Remaining issues: //immediate names. A naïve approach would be to add a
> variable to toke_dict to track whether we're inside a procedure (so
> immediate names are allowed) or outside (where immediate names are
> disallowed).

Where does the PLRM state that immediate evaluated names are disaloowed
outside of procedures????

> But grok_dict might be shared by several contexts. Another
> option is to push a new dictionary with this name. Perhaps toke_dict
> defines /in_executable_array to false, but when scanning a proc, do
> `<</in_executable_true>>begin` ... `end` around the recursive calls. This
> approach seems viable.
>
> Another piece is ASCII85. I don't have the energy right now. :)
>
> Another piece (that I'm putting off still, but the day looms closer) is
> Binary encodings.
>
> But the piece standong in the way of running the interpreter
> interactively, is dealing with the file/string polymorphism in the token
> operator. I've kept this scanner code under the name `toke` as it's
> clearly not a full implementation of `token`, since it only accepts
> strings. But files can easily be read-off into strings. It's called
> buffering and happens all the time. So my thought is, to use the function
> that implements the %statementedit special file to read-off a statement
> into a string, then call `toke` on that string, and stash the remainder
> (the returned substring) into the file object's payload structure (the
> shared part), modifying all file-reading operators to read first from this
> lookahead_buffer, if present.
>
> This appears to let me re-use a lot of existing code to get this working.
> But something about it leaves me uneasy. Adding a buffer to the file may
> have unforeseen consequences. Or so I fear.

Helge

luser- -droog

unread,

Jun 21, 2013, 12:20:03 PM6/21/13

to

On Friday, June 21, 2013 4:22:40 AM UTC-5, Helge Blischke wrote:
> luser- -droog wrote:
>
>
>
> > The scanner is working and mostly complete. Re-wrote most
>
> > of the postscript to remove local dictionaries. This should make the
>
> > scanner re-entrant. http://code.google.com/p/xpost/source/browse/optok.c
>
> >
>
> > Remaining issues: //immediate names. A naïve approach would be to add a
>
> > variable to toke_dict to track whether we're inside a procedure (so
>
> > immediate names are allowed) or outside (where immediate names are
>
> > disallowed).
>
> Where does the PLRM state that immediate evaluated names are disaloowed
>
> outside of procedures????

Woah. It doesn't! Thank you. It's so much simpler not to do this.

luser- -droog

unread,

Jul 27, 2013, 3:56:07 AM7/27/13

to

On Friday, June 21, 2013 2:56:17 AM UTC-5, luser- -droog wrote:
>
> http://code.google.com/p/xpost/source/browse/optok.c
>
[snip]

>
> Another piece is ASCII85. I don't have the energy right now. :)
>
> Another piece (that I'm putting off still, but the day looms closer) is Binary encodings.
>
> But the piece standong in the way of running the interpreter interactively, is dealing with the file/string polymorphism in the token operator. I've kept this scanner code under the name `toke` as it's clearly not a full implementation of `token`, since it only accepts strings. But files can easily be read-off into strings. It's called buffering and happens all the time. So my thought is, to use the function that implements the %statementedit special file to read-off a statement into a string, then call `toke` on that string, and stash the remainder (the returned substring) into the file object's payload structure (the shared part), modifying all file-reading operators to read first from this lookahead_buffer, if present.
>
> This appears to let me re-use a lot of existing code to get this working. But something about it leaves me uneasy. Adding a buffer to the file may have unforeseen consequences. Or so I fear.

I did all that. And it was a rewarding experience. I got (was forced to)
test most of the other operators in the process of getting the scanner
(in postscript) to run. Hundreds of lines of trace-output just to follow
a procedure executing a loop. Thousands of lines of trace to follow
a whole token. Dozens of thousands for a line!

But the file->%lineedit->string->toke(ps) jury-rig did provide an
interactive mode of sorts. With a "prompt" consisting of several seconds
of scrolling unreadable text, leaving a screen-full of stack-dumps
following by a blank line and a blinking cursor. When 'quit' /quit/!, it
was marvelous.

But screw that. I hemmed-and-hawed and rewrote it in C. And it's
better than it used to be. A little less fancy-macro, and little
more sensible-naming-conventions.

I'm pretty sure I've already reported this to the group elsewhere,
but this message is to button-up the thread for posterity.
That's the end of the 'postscript-scanner' adventure.

-luserdroog [josh]

Appendix:

The final version of the code. (Also available in the old revisions
of optok.c, link above ^.]

The language is C, constructing postscript procedures in memory
using macros which (somewhat) hide the constructor-calls.
The postscript code for each block is in the comment preceding it.
This postscript code was tested with ghostscript, then copied
into the comments to guide the array-construction. So, there's
very little need to *read* all the C. Just read the postscript part.

The postscript code gets very stack-heavy to avoid making allocations.

/* systemdict<<token
definition of the scanner procedures.

For better or worse, many functions were named
according to the metaphor of the evil vice *smoking*.

token (considered as germanized verb infinitive of "toke")
1. snip (the tip of the cigar)
2. puff (and light of course)
3. grok (analyze and interpret)
3.a,b,c. if (check (fsm_dec,fsm_rad,fsm_real) interpretation
3.d. lookup first char in grok_dict for interpretation
3.d.1. ( string )
3.d.2. < hexstring > | <<
3.d.3. >>
3.d.4. /literal-name
3.d.5. default: executable-name (including [])

This is a postscript translation of the C-version from xpost2.
*/

#include <stdbool.h> /* ob.h:bool */

#include <stdio.h>

#include <stdlib.h> /* NULL */

#include "m.h"
#include "ob.h"
#include "s.h"
#include "itp.h"
#include "st.h"
#include "ar.h"
#include "di.h"
#include "op.h"
#include "nm.h"

#include "optok.h"

/*
This was part of the baggage necessary to call the
`toke` function from the interpreter's main loop.
This is an internal operator that does not get installed
in systemdict. It is called only by the arrstrhandler
array which contains a postscript procedure.
*/
object arrstrhandler;
void strhandler (context *ctx) {
object post, any;
any = pop(ctx->lo, ctx->os);
post = pop(ctx->lo, ctx->os);
push(ctx->lo, ctx->es, post);
if (type(any) == arraytype)
push(ctx->lo, ctx->os, any);
else
push(ctx->lo, ctx->es, any);
printf("strhandler: os: ");
dumpstack(ctx->lo, ctx->os);
printf("es: ");
dumpstack(ctx->lo, ctx->es);
puts("");
}

/*
Initialize the 'tok' operators
*/

void initoptok(context *ctx, object sd) {
oper *optab = (void *)(ctx->gl->base + adrent(ctx->gl, OPTAB));
object n,op;
object ar;
int i;

object td;
td = consbdc(ctx, 20); // % tokedict, as in `/toke{//tokedict begin ...`

/* "Alphabets" of the scanner */

object alnum = cvlit(consbst(ctx, 62,
"0123456789"
"ABCDEFGHIJKLMNOPQRSTUVWXYZ"
"abcdefghijklmnopqrstuvwxyz"));

alnum = cvlit(alnum);
object digit = cvlit(arrgetinterval(alnum, 0, 10));
object alpha = cvlit(arrgetinterval(alnum, 10, 52));
object upper = cvlit(arrgetinterval(alnum, 10, 26));
object lower = cvlit(arrgetinterval(alnum, 36, 26));
object u_l = consint('A' - 'a'); // ie. 'a' + u_l == 'A'

/* constructor shortcuts */
#define N(t) cvx(consname(ctx, #t))
#define L(t) consint(t) // because I() looked weird, read: "Literal"

/* proc (array) constructor macros */

#define ARR(n) ar = cvx(consbar(ctx, n)); i = 0
#define ADD(x) barput(ctx, ar, i++, x)

#define DEF(name) bdcput(ctx, td, N(name), ar)
#define ADDSUB(n) { object sar = consbar(ctx, n); { object ar = sar; int i = 0
#define ENDSUB } ADD(sar); }

/* /toupper { dup lower within { u-l add } if } */
ARR(5);
ADD(N(dup)); ADD(lower); ADD(N(within));
ADDSUB(2); ADD(u_l); ADD(N(add)); ENDSUB;
ADD(N(if));
DEF(toupper);

/* char str -indexof- idx
return index of char in string
/indexof { % char str . idx
-1 3 1 roll 0 exch { % -1 c n s_n
2 index eq { % -1 c n
3 1 roll exit % n -1 c
} if % -1 c n
1 add % -1 c n+1
} forall pop pop % n|-1
} def */

ARR(10);
ADD(L(-1)); ADD(L(3)); ADD(L(1)); ADD(N(roll)); ADD(L(0)); ADD(N(exch));
ADDSUB(7);
ADD(L(2)); ADD(N(index)); ADD(N(eq));
ADDSUB(4); ADD(L(3)); ADD(L(1)); ADD(N(roll)); ADD(N(exit)); ENDSUB;
ADD(N(if));
ADD(L(1)); ADD(N(add)); ENDSUB;
ADD(N(forall)); ADD(N(pop)); ADD(N(pop));
DEF(indexof);

dumpdic(ctx->gl, td);

/* char str -within- bool
test char is in string

/within { % char str . bool
indexof 0 ge
} def

*/

ARR(3); ADD(N(indexof)); ADD(L(0)); ADD(N(ge)); DEF(within);

/* predicates for the finite state machines (automata)

/israd { (#) 0 get eq } def
/isalpha { alpha within } def
/isdigit { digit within } def
/isxdigit { %dup lower within { u-l add } if
alnum indexof 16 lt } def
/isupper { upper within } def
/isalnum { dup isdigit { pop true }{ isalpha } ifelse } def
/israddig { isalnum } def
/isdot { (.) 0 get eq } def
/ise { (eE) within } def
/issign { (+-) within } def
/isdel { (()<>[]{}/%) within } def
/isspace { ( \t\n) within } def

/isreg { dup isspace { pop false }{ isdel not } ifelse } def */

ARR(2); ADD(L('#')); ADD(N(eq)); DEF(israd);
ARR(2); ADD(alpha); ADD(N(within)); DEF(isalpha);
ARR(2); ADD(digit); ADD(N(within)); DEF(isdigit);
ARR(4); ADD(alnum); ADD(N(indexof)); ADD(L(16)); ADD(N(lt)); DEF(isxdigit);
ARR(2); ADD(upper); ADD(N(within)); DEF(isupper);
ARR(2); ADD(alnum); ADD(N(within)); DEF(isalnum);
ARR(1); ADD(N(isalnum)); DEF(israddig);
ARR(2); ADD(L('.')); ADD(N(eq)); DEF(isdot);

ARR(2); ADD(cvlit(consbst(ctx, 2, "eE"))); ADD(N(within)); DEF(ise);
ARR(2); ADD(cvlit(consbst(ctx, 2, "+-"))); ADD(N(within)); DEF(issign);
ARR(2); ADD(cvlit(consbst(ctx, 10, "()<>[]{}/%"))); ADD(N(within)); DEF(isdel);
ARR(2); ADD(cvlit(consbst(ctx, 3, " \t\n"))); ADD(N(within)); DEF(isspace);

ARR(5); ADD(N(dup)); ADD(N(isspace));
ADDSUB(2); ADD(N(pop)); ADD(N(false)); ENDSUB;
ADDSUB(2); ADD(N(isdel)); ADD(N(not)); ENDSUB;
ADD(N(ifelse)); DEF(isreg);

/* automata states

% the automaton type
% [ predicate yes-transition no-transition ]

% automaton to match a simple decimal number
% /^[+-]?[0-9]+$/
/fsm_dec [
[ /issign 1 1 ] % 0
[ /isdigit 2 -1 ] % 1
[ /isdigit 2 -1 ] % 2
] def

/accept_dec { 2 eq } def */
ARR(3); ar = cvlit(ar);
ADDSUB(3); ADD(N(issign)); ADD(L(1)); ADD(L(1)); ENDSUB; // 0
ADDSUB(3); ADD(N(isdigit)); ADD(L(2)); ADD(L(-1)); ENDSUB; // 1
ADDSUB(3); ADD(N(isdigit)); ADD(L(2)); ADD(L(-1)); ENDSUB; // 2
object fsm_dec = ar;
//DEF(fsm_dec);
ARR(2); ADD(L(2)); ADD(N(eq));
object accept_dec = ar;
//DEF(accept_dec); //Don't define, just embed

/* % automaton to match a radix number

% /^[0-9]+[#][a-Z0-9]+$/
/fsm_rad [
[ /isdigit 1 -1 ] % 0
[ /isdigit 1 2 ] % 1
[ /israd 3 -1 ] % 2
[ /israddig 4 -1 ] % 3
[ /israddig 4 -1 ] % 4
] def

/accept_rad { 4 eq } def */
ARR(5); ar = cvlit(ar);
ADDSUB(3); ADD(N(isdigit)); ADD(L(1)); ADD(L(-1)); ENDSUB; // 0
ADDSUB(3); ADD(N(isdigit)); ADD(L(1)); ADD(L(2)); ENDSUB; // 1
ADDSUB(3); ADD(N(israd)); ADD(L(3)); ADD(L(-1)); ENDSUB; // 2
ADDSUB(3); ADD(N(israddig)); ADD(L(4)); ADD(L(-1)); ENDSUB; // 3
ADDSUB(3); ADD(N(israddig)); ADD(L(4)); ADD(L(-1)); ENDSUB; // 4
object fsm_rad = ar;
//DEF(fsm_rad);
ARR(2); ADD(L(4)); ADD(N(eq));
object accept_rad = ar;
//DEF(accept_rad);

/* % automaton to match a real number

% /^[+-]?(d+(.d*)?)|(d*.d+)([eE][+-]?d+)?$/
% where d = [0-9]
/fsm_real [
[ /issign 1 1 ] % 0
[ /isdigit 2 4 ] % 1
[ /isdigit 2 3 ] % 2
[ /isdot 6 7 ] % 3
[ /isdot 5 -1 ] % 4
[ /isdigit 6 -1 ] % 5
[ /isdigit 6 7 ] % 6
[ /ise 8 -1 ] % 7
[ /issign 9 9 ] % 8
[ /isdigit 10 -1 ] % 9
[ /isdigit 10 -1 ] % 10
] def
/accept_real {
dup 2 eq % integer
1 index 6 eq % real
2 index 10 eq % exponent
or or exch pop

} def */
ARR(11); ar = cvlit(ar);
ADDSUB(3); ADD(N(issign)); ADD(L(1)); ADD(L(1)); ENDSUB; // 0
ADDSUB(3); ADD(N(isdigit)); ADD(L(2)); ADD(L(4)); ENDSUB; // 1
ADDSUB(3); ADD(N(isdigit)); ADD(L(2)); ADD(L(3)); ENDSUB; // 2
ADDSUB(3); ADD(N(isdot)); ADD(L(6)); ADD(L(7)); ENDSUB; // 3
ADDSUB(3); ADD(N(isdot)); ADD(L(5)); ADD(L(-1)); ENDSUB; // 4
ADDSUB(3); ADD(N(isdigit)); ADD(L(6)); ADD(L(-1)); ENDSUB; // 5
ADDSUB(3); ADD(N(isdigit)); ADD(L(6)); ADD(L(7)); ENDSUB; // 6
ADDSUB(3); ADD(N(ise)); ADD(L(8)); ADD(L(-1)); ENDSUB; // 7
ADDSUB(3); ADD(N(issign)); ADD(L(9)); ADD(L(9)); ENDSUB; // 8
ADDSUB(3); ADD(N(isdigit)); ADD(L(10)); ADD(L(-1)); ENDSUB; // 9
ADDSUB(3); ADD(N(isdigit)); ADD(L(10)); ADD(L(-1)); ENDSUB; // 10
object fsm_real = ar;
//DEF(fsm_real);
ARR(15);
ADD(N(dup)); ADD(L(2)); ADD(N(eq)); //integer
ADD(L(1)); ADD(N(index)); ADD(L(6)); ADD(N(eq)); //real
ADD(L(2)); ADD(N(index)); ADD(L(10)); ADD(N(eq)); //exponent
ADD(N(or)); ADD(N(or)); ADD(N(exch)); ADD(N(pop));
object accept_real = ar;
//DEF(accept_real);

/* str fsm accept-proc -check- bool
execute the state machine against the string
using accept-proc to interpret the final state
/check { % str fsm accept . bool
3 1 roll % acc str fsm
0 exch 0 { % acc str n fsm sta
2 copy get % acc str n fsm sta fsm[sta]
5 3 roll % acc fsm sta fsm[sta] str n
2 copy get % acc fsm sta fsm[sta] str n str[n]
3 index 0 get % acc fsm sta fsm[sta] str n str[n] fsm[sta][0]
cvx exec % acc fsm sta fsm[sta] str n bool
{
1 add 5 2 roll % acc str n+1 fsm sta fsm[sta]
1 get exch pop % acc str n+1 fsm sta'
}{
5 2 roll % acc str n fsm sta fsm[sta]
2 get exch pop % acc str n fsm sta'
} ifelse
dup -1 eq {exit} if % acc str n(+1) fsm sta'
2 index 4 index length ge {exit} if % acc str n(+1) fsm sta'
} loop
5 1 roll pop pop pop % sta accept
exec % bool
} def */
ARR(15);
ADD(L(3)); ADD(L(1)); ADD(N(roll));
ADD(L(0)); ADD(N(exch)); ADD(L(0));
ADDSUB(31);
ADD(L(2)); ADD(N(copy)); ADD(N(get));
ADD(L(5)); ADD(L(3)); ADD(N(roll));
ADD(L(2)); ADD(N(copy)); ADD(N(get));
ADD(L(3)); ADD(N(index)); ADD(L(0)); ADD(N(get));
ADD(N(cvx)); ADD(N(exec));
ADDSUB(9);
ADD(L(1)); ADD(N(add)); ADD(L(5)); ADD(L(2)); ADD(N(roll));
ADD(L(1)); ADD(N(get)); ADD(N(exch)); ADD(N(pop));
ENDSUB;
ADDSUB(7);
ADD(L(5)); ADD(L(2)); ADD(N(roll));
ADD(L(2)); ADD(N(get)); ADD(N(exch)); ADD(N(pop));
ENDSUB;
ADD(N(ifelse));
ADD(N(dup)); ADD(L(-1)); ADD(N(eq));
ADDSUB(1); ADD(N(exit)); ENDSUB; ADD(N(if));
ADD(L(2)); ADD(N(index)); ADD(L(4)); ADD(N(index));
ADD(N(length)); ADD(N(ge));
ADDSUB(1); ADD(N(exit)); ENDSUB; ADD(N(if));
ENDSUB;
ADD(N(loop));
ADD(L(5)); ADD(L(1)); ADD(N(roll));
ADD(N(pop)); ADD(N(pop)); ADD(N(pop));
ADD(N(exec));
DEF(check);

/* string radix -cvri- integer

convert a string to integer using radix
/cvri { % string radix . num
0 3 1 roll exch % 0 base str
dup 0 get issign {
dup 0 get
(-) 0 get eq 4 1 roll
1 1 index length
1 sub getinterval
}{ false 4 1 roll } ifelse % bool sum base str i
0 1 2 index length 1 sub { % bool sum base str i
2 copy get % bool sum base str i s_i
dup lower within { u-l add } if
%dup =()=
alnum indexof % bool sum base str i digit
5 4 roll 4 index mul % bool base str i digit sum*base
add % bool base str i sum=sum*base+digit
4 1 roll pop % bool sum base str
} for
pop pop % bool sum
exch { neg } if % num

} def */
ARR(26);
ADD(L(0)); ADD(L(3)); ADD(L(1)); ADD(N(roll)); ADD(N(exch));
ADD(N(dup)); ADD(L(0)); ADD(N(get)); ADD(N(issign));
ADDSUB(15);
ADD(N(dup)); ADD(L(0)); ADD(N(get));
ADD(L('-')); ADD(N(eq)); ADD(L(4)); ADD(L(1)); ADD(N(roll));
ADD(L(1)); ADD(L(1)); ADD(N(index)); ADD(N(length));
ADD(L(1)); ADD(N(sub)); ADD(N(getinterval));
ENDSUB;
ADDSUB(4);
ADD(N(false)); ADD(L(4)); ADD(L(1)); ADD(N(roll));
ENDSUB;
ADD(N(ifelse));
ADD(L(0)); ADD(L(1)); ADD(L(2)); ADD(N(index));
ADD(N(length)); ADD(L(1)); ADD(N(sub));
ADDSUB(21);
ADD(L(2)); ADD(N(copy)); ADD(N(get));
ADD(N(dup)); ADD(lower); ADD(N(within));
ADDSUB(2); ADD(u_l); ADD(N(add)); ENDSUB; ADD(N(if));
ADD(alnum); ADD(N(indexof));
ADD(L(5)); ADD(L(4)); ADD(N(roll));
ADD(L(4)); ADD(N(index)); ADD(N(mul)); ADD(N(add));
ADD(L(4)); ADD(L(1)); ADD(N(roll)); ADD(N(pop));
ENDSUB;
ADD(N(for));
ADD(N(pop)); ADD(N(pop));
ADD(N(exch));
ADDSUB(1); ADD(N(neg)); ENDSUB; ADD(N(if));
DEF(cvri);

/* string -radix- number

interpret a string containing a radix number
/radix {
(#) search pop exch pop % (digits) (radix) % split
10 cvri % (digits) radix
cvri % num

} def */
ARR(8);
ADD(cvlit(consbst(ctx, 1, "#"))); ADD(N(search));
ADD(N(pop)); ADD(N(exch)); ADD(N(pop));
ADD(L(10)); ADD(N(cvri));
ADD(N(cvri));
DEF(radix);

/* Postscript "switch statement" dictionaries */

/* esc_dict - contains values associated with escape seqences */
object esc_dict;
{
object td = consbdc(ctx, 5); esc_dict = td;

/* /default {} */
ARR(0);
DEF(default);

/* (\n)0 get { pop false } */
ARR(2); ADD(N(pop)); ADD(N(false));
bdcput(ctx, td, L('\n'), ar);

/* (a)0 get (\a)0 get
(b)0 get (\b)0 get
(f)0 get (\f)0 get
(n)0 get (\n)0 get
(r)0 get (\r)0 get
(t)0 get (\t)0 get
(v)0 get (\v)0 get */
bdcput(ctx, td, L('a'), L('\a'));
bdcput(ctx, td, L('b'), L('\b'));
bdcput(ctx, td, L('f'), L('\f'));
bdcput(ctx, td, L('n'), L('\n'));
bdcput(ctx, td, L('r'), L('\r'));
bdcput(ctx, td, L('t'), L('\t'));
bdcput(ctx, td, L('v'), L('\v'));
}

/* str_dict - contains actions for
parens and slashes in (strings) */
object str_dict;
{
object td = consbdc(ctx, 5); str_dict = td;

/* /default{true} */
ARR(1);
ADD(N(true));
DEF(default);

/* ($)0 get{6 -1 roll 1 add 6 1 roll} */
ARR(9);
ADD(L(6)); ADD(L(-1)); ADD(N(roll));
ADD(L(1)); ADD(N(add));
ADD(L(6)); ADD(L(1)); ADD(N(roll));
ADD(N(true));
bdcput(ctx, td, L('('), ar);

/* ($)0 get{
6 -1 roll 1 sub
6 1 roll
5 index 0 ne
} */
ARR(12);
ADD(L(6)); ADD(L(-1)); ADD(N(roll));
ADD(L(1)); ADD(N(sub));
ADD(L(6)); ADD(L(1)); ADD(N(roll));
ADD(L(5)); ADD(N(index)); ADD(L(0)); ADD(N(ne));
bdcput(ctx, td, L(')'), ar);

/* (\\)0 get{pop
dup 2 index length ge{ 4 2 roll exit }if
2 copy get exch 1 add exch
dup esc-dict exch
2 copy knwon not{ pop/default }if
get exec dup false ne
} */
ARR(29);
ADD(N(pop));
ADD(N(dup)); ADD(L(2)); ADD(N(index)); ADD(N(length));
ADD(N(ge));
ADDSUB(4);
ADD(L(4)); ADD(L(2)); ADD(N(roll)); ADD(N(exit));
ENDSUB;
ADD(N(if));
ADD(L(2)); ADD(N(copy)); ADD(N(get));
ADD(N(exch)); ADD(L(1)); ADD(N(add)); ADD(N(exch));
ADD(N(dup)); ADD(esc_dict); ADD(N(exch));
ADD(L(2)); ADD(N(copy)); ADD(N(known)); ADD(N(not));
ADDSUB(2); ADD(N(pop)); ADD(cvlit(N(default))); ENDSUB;
ADD(N(if));
ADD(N(get)); ADD(N(exec)); ADD(N(dup)); ADD(N(false)); ADD(N(ne));
bdcput(ctx, td, L('\\'), ar);

}

/* grok_dict - contains scanner actions for
strings, hexstrings, array and dict names [ ] << >>,
procs, and executable names. */
object grok_dict;
{
object td = consbdc(ctx, 5); grok_dict = td;

/* /default { cvn cvx } */
ARR(2); // bareword: executable name
ADD(N(cvn)); ADD(N(cvx));
DEF(default);

/* (/) 0 get { pop
dup 0 get (/) 0 get eq {

1 1 index length 1 sub getinterval

puff cvn load
}{
puff cvn cvlit
} ifelse
}*/
ARR(9); // slash: literal name
ADD(N(pop));
ADD(N(dup)); ADD(L(0)); ADD(N(get)); ADD(L('/')); ADD(N(eq));
ADDSUB(10);
ADD(L(1)); ADD(L(1)); ADD(N(index)); ADD(N(length));
ADD(L(1)); ADD(N(sub)); ADD(N(getinterval));
ADD(N(puff)); ADD(N(cvn)); ADD(N(load));
ENDSUB;
ADDSUB(3);
ADD(N(puff)); ADD(N(cvn)); ADD(N(cvlit));
ENDSUB;
ADD(N(ifelse));
bdcput(ctx, td, L('/'), ar);

/* ($) 0 get { pop % (...$)
1 exch dup length string 0 exch 0 % defer src si buf bi
{
4 index 0 eq {exit} if % d s si b bi
4 2 roll % d b bi s si
dup 2 index length ge {4 2 roll exit} if
2 copy get exch 1 add exch % d b bi s si+1 s(si)
dup str-dict exch
2 copy known not{ pop/default }if
get exec % d b bi s si s(si)'
{
5 3 roll % d s si s(si)' b bi
3 2 roll % d s si b bi s(si)'
3 copy put pop 1 add % d s si b bi+1
}{
pop
4 2 roll
} ifelse
} loop
0 exch getinterval % d s si b'
4 1 roll % b' d s si
1 index length 1 index sub getinterval % b' d s'
exch pop exch % s' b'
cvlit
} */
ARR(28);
ADD(N(pop));
ADD(L(1)); ADD(N(exch)); ADD(N(dup)); ADD(N(length)); ADD(N(string));
ADD(L(0)); ADD(N(exch)); ADD(L(0));
ADDSUB(37);
ADD(L(4)); ADD(N(index)); ADD(L(0)); ADD(N(eq));
ADDSUB(1); ADD(N(exit)); ENDSUB; ADD(N(if));
ADD(L(4)); ADD(L(2)); ADD(N(roll));
ADD(N(dup)); ADD(L(2)); ADD(N(index));
ADD(N(length)); ADD(N(ge));
ADDSUB(4);
ADD(L(4)); ADD(L(2)); ADD(N(roll));
ADD(N(exit)); ENDSUB;
ADD(N(if));
ADD(L(2)); ADD(N(copy)); ADD(N(get));
ADD(N(exch)); ADD(L(1)); ADD(N(add)); ADD(N(exch));
ADD(N(dup)); ADD(str_dict); ADD(N(exch));
ADD(L(2)); ADD(N(copy)); ADD(N(known)); ADD(N(not));
ADDSUB(2); ADD(N(pop)); ADD(cvlit(N(default))); ENDSUB;
ADD(N(if));
ADD(N(get)); ADD(N(exec));
ADDSUB(12);
ADD(L(5)); ADD(L(3)); ADD(N(roll));
ADD(L(3)); ADD(L(2)); ADD(N(roll));
ADD(L(3)); ADD(N(copy)); ADD(N(put));
ADD(N(pop)); ADD(L(1)); ADD(N(add));
ENDSUB;
ADDSUB(4);
ADD(N(pop));
ADD(L(4)); ADD(L(2));
ADD(N(roll));
ENDSUB;
ADD(N(ifelse));
ENDSUB;
ADD(N(loop));
ADD(L(0)); ADD(N(exch)); ADD(N(getinterval));
ADD(L(4)); ADD(L(1)); ADD(N(roll));
ADD(L(1)); ADD(N(index)); ADD(N(length));
ADD(L(1)); ADD(N(index)); ADD(N(sub)); ADD(N(getinterval));
ADD(N(exch)); ADD(N(pop)); ADD(N(exch));
ADD(N(cvlit));
bdcput(ctx, td, L('('), ar);

/* (<)0 get { pop % (...>...) | (<...)
dup 0 get (<) 0 get eq { % (<...)
1 1 index length 1 sub getinterval % (...)
(<<) cvn cvx
}{ % (...>)
dup length 2 idiv 1 add string % src buf
0 exch 0 { %loop % s si b bi
4 2 roll dup 2 index length ge % b bi s si bool
{ 4 2 roll exit } if % b bi s si
2 copy get exch 1 add exch % b bi s si+1 s(si)

dup (>) 0 get eq { pop 4 2 roll exit } if % b bi s si s(si)

dup isspace { pop 4 2 roll }{ % b bi s si s(si)
dup lower within { u-l add } if % b bi s si s(si)
dup isxdigit not { /toke cvx /syntaxerror signalerror
}{
alnum indexof % b bi s si hi
4 bitshift % b bi s si hi
5 1 roll % hi b bi s si
{ % eat whitespace until next dig
dup 2 index length ge % hi b bi s si bool

{ (0) 0 get exit } if

2 copy get exch 1 add exch % hi b bi s si+1 s(si)

dup isspace not { exit } if

pop % hi b bi s si+1
} loop % hi b bi s si' s(si'-1)

dup lower within { u-l add } if % hi b bi s si char
dup isxdigit not { /toke cvx /syntaxerror signalerror
}{
alnum indexof % hi b bi s si lo
6 -1 roll or % b bi s si int
} ifelse
} ifelse
5 3 roll 3 2 roll % s si b bi int
3 copy put pop 1 add % s si b bi+1
} ifelse
} loop
0 exch getinterval % s si b'
3 1 roll % b' s si
1 index length 1 index sub getinterval % b' s'
exch % src' buf'
cvlit
} ifelse
} */
ARR(9); ADD(N(pop));
ADD(N(dup)); ADD(L(0)); ADD(N(get)); ADD(L('<')); ADD(N(eq));
ADDSUB(9);
ADD(L(1)); ADD(L(1)); ADD(N(index)); ADD(N(length));
ADD(L(1)); ADD(N(sub)); ADD(N(getinterval));
ADD(cvlit(consname(ctx, "<<"))); ADD(N(cvx)); ENDSUB;
ADDSUB(27);
ADD(N(dup)); ADD(N(length)); ADD(L(2)); ADD(N(idiv));
ADD(L(1)); ADD(N(add)); ADD(N(string));
ADD(L(0)); ADD(N(exch)); ADD(L(0));
ADDSUB(27);
ADD(L(4)); ADD(L(2)); ADD(N(roll)); ADD(N(dup));
ADD(L(2)); ADD(N(index)); ADD(N(length)); ADD(N(ge));
ADDSUB(4);
ADD(L(4)); ADD(L(2)); ADD(N(roll)); ADD(N(exit)); ENDSUB;
ADD(N(if));
ADD(L(2)); ADD(N(copy)); ADD(N(get));
ADD(N(exch)); ADD(L(1)); ADD(N(add)); ADD(N(exch));
ADD(N(dup)); ADD(L('>')); ADD(N(eq));
ADDSUB(5); ADD(N(pop));
ADD(L(4)); ADD(L(2)); ADD(N(roll)); ADD(N(exit)); ENDSUB;
ADD(N(if));
ADD(N(dup)); ADD(N(isspace));
ADDSUB(4);
ADD(N(pop)); ADD(L(4)); ADD(L(2)); ADD(N(roll)); ENDSUB;
ADDSUB(23);
ADD(N(dup)); ADD(lower); ADD(N(within));
ADDSUB(2); ADD(u_l); ADD(N(add)); ENDSUB;
ADD(N(if));
ADD(N(dup)); ADD(N(isxdigit)); ADD(N(not));
ADDSUB(1); ADD(N(syntaxerror)); ENDSUB;
ADDSUB(20);
ADD(alnum); ADD(N(indexof));
ADD(L(4)); ADD(N(bitshift));
ADD(L(5)); ADD(L(1)); ADD(N(roll));
ADDSUB(20);
ADD(N(dup)); ADD(L(2)); ADD(N(index));
ADD(N(length)); ADD(N(ge));
ADDSUB(2); ADD(L('0')); ADD(N(exit)); ENDSUB;
ADD(N(if));
ADD(L(2)); ADD(N(copy)); ADD(N(get));
ADD(N(exch)); ADD(L(1)); ADD(N(add)); ADD(N(exch));
ADD(N(dup)); ADD(N(isspace)); ADD(N(not));
ADDSUB(1); ADD(N(exit)); ENDSUB;
ADD(N(if));
ADD(N(pop));
ENDSUB;
ADD(N(loop));
ADD(N(dup)); ADD(lower); ADD(N(within));
ADDSUB(2); ADD(u_l); ADD(N(add)); ENDSUB;
ADD(N(if));
ADD(N(dup)); ADD(N(isxdigit)); ADD(N(not));
ADDSUB(1); ADD(N(syntaxerror)); ENDSUB;
ADDSUB(6);
ADD(alnum); ADD(N(indexof));
ADD(L(6)); ADD(L(-1)); ADD(N(roll));
ADD(N(or));
ENDSUB;
ADD(N(ifelse));
ENDSUB;
ADD(N(ifelse));

ADD(L(5)); ADD(L(3)); ADD(N(roll));
ADD(L(3)); ADD(L(2)); ADD(N(roll));
ADD(L(3)); ADD(N(copy)); ADD(N(put));
ADD(N(pop)); ADD(L(1)); ADD(N(add));
ENDSUB;
ADD(N(ifelse));

ENDSUB;
ADD(N(loop));
ADD(L(0)); ADD(N(exch)); ADD(N(getinterval));
ADD(L(3)); ADD(L(1)); ADD(N(roll));
ADD(L(1)); ADD(N(index)); ADD(N(length));
ADD(L(1)); ADD(N(index)); ADD(N(sub)); ADD(N(getinterval));
ADD(N(exch));
ADD(N(cvlit));
ENDSUB;
ADD(N(ifelse));
bdcput(ctx, td, L('<'), ar);

/* (>)0 get {
pop
dup 0 get (>) 0 get eq {

1 1 index length 1 sub getinterval

(>>) cvn cvx

}{
/toke cvx /syntaxerror signalerror
} ifelse

} */
ARR(9);
ADD(N(pop));
ADD(N(dup)); ADD(L(0)); ADD(N(get)); ADD(L('>')); ADD(N(eq));
ADDSUB(9);
ADD(L(1)); ADD(L(1)); ADD(N(index)); ADD(N(length));
ADD(L(1)); ADD(N(sub)); ADD(N(getinterval));
ADD(cvlit(consname(ctx, ">>"))); ADD(N(cvx)); ENDSUB;
ADDSUB(1);
ADD(N(syntaxerror)); ENDSUB;
ADD(N(ifelse));
bdcput(ctx, td, L('>'), ar);

/* ({)0 get {
pop
mark exch % [ s'
{ % [ ... s'
toke % [ ... s' t b
not { syntaxerror } if % [ ... s' t
dup (}) cvn eq { % [ ... s' t
pop % [ ... s'
counttomark 1 add 1 roll % s' [ ...
] cvx exit
} if % [ ... s' t
exch % [ ... t s'
} loop % s' {}
%true % s' {} true
} */
ARR(5);
ADD(N(pop));
ADD(mark); ADD(N(exch));
ADDSUB(10);
ADD(N(toke));
ADD(N(not));
ADDSUB(1); ADD(N(syntaxerror)); ENDSUB;
ADD(N(if));
ADD(N(dup)); ADD(cvlit(consname(ctx, "}"))); ADD(N(eq));
ADDSUB(9);
ADD(N(pop));
ADD(N(counttomark)); ADD(L(1)); ADD(N(add));
ADD(L(1)); ADD(N(roll));
ADD(cvx(consname(ctx, "]"))); ADD(N(cvx)); ADD(N(exit));
ENDSUB;
ADD(N(if));
ADD(N(exch));
ENDSUB;
ADD(N(loop));
//ADD(N(true));
bdcput(ctx, td, L('{'), ar);

}

/* string -grok- token

interpret a string using fsm-acceptors and convertors
/grok {

{
dup fsm_dec //accept_dec check { 10 cvri exit } if
dup fsm_rad //accept_rad check { radix exit } if
dup fsm_real //accept_real check { cvr exit } if
grok-dict 1 index 0 get

2 copy known not { pop /default } if
get exec
exit } loop

} def */
ARR(2);
ADDSUB(32);
ADD(N(dup)); ADD(fsm_dec); ADD(accept_dec); ADD(N(check));
ADDSUB(3); ADD(L(10)); ADD(N(cvri)); ADD(N(exit)); ENDSUB;
ADD(N(if));
ADD(N(dup)); ADD(fsm_rad); ADD(accept_rad); ADD(N(check));
ADDSUB(2); ADD(N(radix)); ADD(N(exit)); ENDSUB;
ADD(N(if));
ADD(N(dup)); ADD(fsm_real); ADD(accept_real); ADD(N(check));
ADDSUB(2); ADD(N(cvr)); ADD(N(exit)); ENDSUB;
ADD(N(if));

/* fallback. lookup first char in grok_dict */
ADD(grok_dict); ADD(L(1)); ADD(N(index)); ADD(L(0)); ADD(N(get));
ADD(L(2)); ADD(N(copy)); ADD(N(known)); ADD(N(not));
ADDSUB(2); ADD(N(pop)); ADD(cvlit(N(default))); ENDSUB;
ADD(N(if));
ADD(N(get)); ADD(N(exec));

ADD(N(exit));
ENDSUB;
ADD(N(loop));
DEF(grok);

/* str -snip- str'
trim initial whitespace from string
/snip { % str . str'
{
dup length 0 eq
{ exit } if
dup 0 get isspace not
{ exit } if

1 1 index length
1 sub getinterval

} loop
} def */
ARR(2);
ADDSUB(20);
ADD(N(dup)); ADD(N(length)); ADD(L(0)); ADD(N(eq));
ADDSUB(1); ADD(N(exit)); ENDSUB; ADD(N(if));
ADD(N(dup)); ADD(L(0)); ADD(N(get)); ADD(N(isspace));
//n.b. #isspace is not used in the C namespace.
//so, no conflict with ctypes.h
ADD(N(not));
ADDSUB(1); ADD(N(exit)); ENDSUB; ADD(N(if));
ADD(L(1)); ADD(L(1)); ADD(N(index)); ADD(N(length));
ADD(L(1)); ADD(N(sub)); ADD(N(getinterval));
ENDSUB;
ADD(N(loop));
DEF(snip);

/* puff
read in a token up to delimiter
/puff { % str . post pre
1 1 index % s 1 s
1 1 index length 1 sub % s 1 s 1 sN-1
getinterval { % s n s[n]
isreg not { exit } if
1 add % s n+1
} forall % s n
%1 add
2 copy 0 exch getinterval % s n s[0..n]
3 1 roll % s[0..n] s n
1 index length % s[0..n] s n sN
1 index sub getinterval % s[0..n) s[n..$)
exch
} def */
ARR(28);
ADD(L(1)); ADD(L(1)); ADD(N(index));
ADD(L(1)); ADD(L(1)); ADD(N(index)); ADD(N(length)); ADD(L(1)); ADD(N(sub));
ADD(N(getinterval));
ADDSUB(6);
ADD(N(isreg)); ADD(N(not));
ADDSUB(1); ADD(N(exit)); ENDSUB;

ADD(N(if));
ADD(L(1)); ADD(N(add));
ENDSUB;
ADD(N(forall));

//ADD(L(1)); ADD(N(add));
ADD(L(2)); ADD(N(copy)); ADD(L(0)); ADD(N(exch)); ADD(N(getinterval));
ADD(L(3)); ADD(L(1)); ADD(N(roll));
ADD(L(1)); ADD(N(index)); ADD(N(length));
ADD(L(1)); ADD(N(index)); ADD(N(sub)); ADD(N(getinterval));
ADD(N(exch));
DEF(puff);

/*
/toke {
tokedict begin
snip
dup length 0 eq { false }{
dup 0 get
isdel not {
puff
}{
dup 1 1 index length 1 sub getinterval
exch 0 1 getinterval
} ifelse
grok true
} ifelse
end
} def */
ARR(11);
ADD(td); ADD(N(begin));
ADD(N(snip));
ADD(N(dup)); ADD(N(length)); ADD(L(0)); ADD(N(eq));
ADDSUB(1); ADD(N(false)); ENDSUB;
ADDSUB(10);
ADD(N(dup)); ADD(L(0)); ADD(N(get));
ADD(N(isdel)); ADD(N(not));
ADDSUB(1); ADD(N(puff)); ENDSUB;
ADDSUB(12);
ADD(N(dup)); ADD(L(1)); ADD(L(1)); ADD(N(index));
ADD(N(length)); ADD(L(1)); ADD(N(sub));
ADD(N(getinterval)); ADD(N(exch));
ADD(L(0)); ADD(L(1)); ADD(N(getinterval));
ENDSUB;
ADD(N(ifelse));
ADD(N(grok)); ADD(N(true));
ENDSUB;
ADD(N(ifelse));
ADD(N(end));
DEF(toke);
bdcput(ctx, sd, N(toke), ar);
dumpdic(ctx->gl, td);

/*
/tokeloop { % string
/wstr exch def
{
wstr toke {
exch /wstr exch def
dup type /arraytype ne {
exec
} if
}{
exit
} ifelse
} loop
} def */
ARR(5);
ADD(cvlit(N(wstr))); ADD(N(exch)); ADD(N(def));
ADDSUB(5);
ADD(N(wstr)); ADD(N(toke));
ADDSUB(10);
ADD(N(exch)); ADD(cvlit(N(wstr))); ADD(N(exch)); ADD(N(def));
ADD(N(dup)); ADD(N(type)); ADD(cvlit(N(arraytype))); ADD(N(ne));
ADDSUB(1);
ADD(N(exec));
ENDSUB;
ADD(N(if));
ENDSUB;
ADDSUB(1);
ADD(N(exit));
ENDSUB;
ADD(N(ifelse));
ENDSUB;
ADD(N(loop));
bdcput(ctx, sd, N(tokeloop), ar);

ARR(1);
ADD(cvx(consoper(ctx, "strhandler", strhandler, 0, 0)));
bdcput(ctx, sd, N(strhandler), ar);
arrstrhandler = ar;
}

luser- -droog

unread,

Jan 26, 2014, 2:32:19 AM1/26/14

to

I was reading back over this thread, and all the links are dead. :( And the file mentioned doesn't even exist in the current. So I dug up the revision where the C-encoded PS scanner is replaced by pure C code. The difference highlighter gets amusingly confused.

http://code.google.com/p/xpost/source/detail?r=d2a0aad28b370a5751a229dabf72c9e22f2adfac&path=/optok.c

luser droog

unread,

Nov 28, 2016, 11:12:42 AM11/28/16

to

On Sunday, January 26, 2014 at 1:32:19 AM UTC-6, luser droog wrote:
> I was reading back over this thread, and all the links are dead. :( And the file mentioned doesn't even exist in the current. So I dug up the revision where the C-encoded PS scanner is replaced by pure C code. The difference highlighter gets amusingly confused.
>
> http://code.google.com/p/xpost/source/detail?r=d2a0aad28b370a5751a229dabf72c9e22f2adfac&path=/optok.c

Reading back over this thread again, and /that/ link is now dead.

Here's the (current) working one:

https://github.com/luser-dr00g/xpost/commit/52f971e5194b101d675ba87d3fafde499c538251