embarrassing spaghetti code needs stylistic advice

91 views
Skip to first unread message

luser-ex-troll

unread,
Mar 19, 2009, 11:12:04 PM3/19/09
to
Hello all.
I have a problem of a somewhat different kind than the usual post. My
code works! It's just appallingly ugly. With my attention focused
sharply on clear and consistent data structures, the most important
function in my nascent postscript interpreter, the lexical scanner,
has degenerated into spaghetti.

It happened incrementally so I didn't really worry
about it until it became overwhelmingly obvious
that what I've got is terribly, horribly ugly.

I realize that this is a large post, but I couldn't
trim it any shorter without making it either
incomplete (and non-functional) or no longer
representative of the problem.

Specifically the problem is the toke function
which scans a string or file to create an object
(tag-union, variant-record). It's constructed
as a series of tests and loops within a big loop,
but uses goto to change its mind about what
type of object it has found (eg. '+' followed
by a digit is a noise character introducing the
number, but followed by anything else, it's an
executable name).

I can't seem to think of a control structure to replace it with that
affords the same flexibility.

tia.
lxt
ps. feel free to trim the entire code from any
responses. I realize it's quite long for this
medium.
/* tokentest.c
the scanner playpen
*/

#include <ctype.h>
#include <stdbool.h> //true false
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
//#include "object.h"
/* object.h
global constants
object structures and typedefs
*/

//limits
#define MAXNAMES 1000
#define MAXTOKEN 256
#define OSSIZE 500
#define ESSIZE 250
#define DSSIZE 20

/* Objects */

#define Types \
X(null, int dummy) \
X(mark, int dummy2) \
X(boolean, bool b) \
X(integer, int i) \
X(real, float f) \
X(name, int n) \
X(string, String *s) \
X(array, Array *a) \
X(dict, Dict *d) \
X(operator, Operator op) \
X(file, FILE *file) \
X(font, void *font) \
X(packedarray, void *pa) \
X(save, void *save) \


struct s_operator {
char *name;
void (*fp)();
};

typedef struct s_object Object;
typedef struct s_string String;
typedef struct s_array Array;
typedef struct s_dict Dict;
typedef struct s_operator Operator;
struct s_object {
#define X(a, b) a ## type ,
enum e_type { Types } type;
#undef X
unsigned char flags;
#define READ 1
#define WRITE 2
#define EXEC 4
#define COMP 8
#define X(a, b) b;
union { Types } u;
#undef X
};


struct s_string {
int ref;
size_t length;
struct s_string *copyof;
char *s; };

struct s_array {
int ref;
size_t length;
struct s_array *copyof;
Object *a; };

struct s_pair { Object key, value; };
struct s_dict {
int ref;
size_t length;
size_t maxlength;
struct s_pair *p; };

// Singular Objects
Object null;
Object mark;

// exported functions
int error (char *fmt, ...);
Object boolean (char b);
Object integer (int i);
Object real (float f);

char *names[MAXNAMES];
//int nameslen;
Object name (char *s);

Object stringn (int n);
Object string (char *s);
void dec_string (String *s);
void inc_string (String *s);
String * substring (String *s, size_t offset, size_t length);

Object array (int n);
void dec_array (Array *a);
void inc_array (Array *a);
Object car (Array *a);
Array * cdr (Array *a);
Array * subarray (Array *a, size_t offset, size_t length);

Object dict (int n);
int eq (Object a, Object b);
struct
s_pair * lookup (Dict *d, Object key);
bool define (Dict *d, Object key, Object value);
void dec_dict (Dict *d);
void inc_dict (Dict *d);

void dec (Object *o);
void inc (Object *o);

Object executable (Object o);
Object operator (char *name, void (*fp)());

/* eof: object.h */

//#include "system.h"
/* system.h
stacks and operators
*/

#define X(a, b) #a "type",
char *typestring[] = { Types }; //names for enum e_type type member of
Object
#undef X
int defer_exec;
int defer_paren;
int quitflag;

Object os[OSSIZE];
Object *tos = os;
#define push(obj) \
(tos != os+OSSIZE)? *(tos++) = obj: (error("stackoverflow"),null)
#define pop ( (tos!=os)? (*(--tos)): (error("stackunderflow"),null) )

Object es[ESSIZE];
Object *tes = es;
#define pushe(obj) \
(tes != es+ESSIZE)? *(tes++) = obj: (error("execstackoverflow"),null)
#define pope ( (tes!=es)? (*(--tes)): (error
("execstackunderflow"),null) )

Object ds[DSSIZE];
Object *tds = ds;
#define pushd(obj) \
(tds != ds+DSSIZE)? *(tds++) = obj: (error("dictstackoverflow"),null)
#define popd ( (tds!=ds)? (*(--tds)): (error
("dictstackunderflow"),null) )

/* operator helpers */

#define stackunder(n,op) ( (tos-os >= n)?: error("stackunderflow in "
#op) )
#define typecheck(ob,tp,op) \
( (ob.type == tp ## type)?: error("typecheck in " #op) )
#define xcheck(ob,op) \
(ob.flags & EXEC)? 0: error("typecheck in " #op)


/* Operators */

/* Miscellaneous Operators */
void Oprompt ();

/* eof system.h */

int sgetc(String *s) {
if (s->length == 0) return EOF;
s->length--;
return *(s->s++);
//s->s++;
//return s->s[-1];
}

int Snext(Object s) {
return sgetc(s.u.s);
}

void Sback(int c, Object s) {
s.u.s->length++;
*(--(s.u.s->s)) = c; //back it up, follow the pointer, store
}

int Fnext(Object f) {
return fgetc(f.u.file);
}

void Fback(int c, Object f) {
ungetc(c, f.u.file);
}


// called by Otoken, below
Object toke(Object src, int (*next)(Object), void (*back)(int,
Object)) {
int i;
int d = 0;
bool negate = false;
char *punct = "()<>[]{}/%";
char s[MAXTOKEN];
char *sp = s;
#define NEXT if ((i=next(src)) == EOF) goto fail
#define NEXTor if ((i=next(src)) == EOF)
#define BACK back(i,src)

while ( (i = next(src)) != EOF ) {
top:
if(i == '\n') { Oprompt(); } //newline
if(isspace(i)) continue; //whitespace _/comments
if(i == '%') { do { NEXT; } while(i != '\n'); goto top; }
if(i == '+') { //optional +
NEXTor goto single;
if(!isdigit(i)) { BACK; i = '+'; goto aname; }
i -= '0';
goto digit; }

if(i == '-') { //optional -
NEXTor goto single;
if(!isdigit(i)) { BACK; i = '-'; goto aname; }
i -= '0'; negate = true;
goto digit; }

if(isdigit(i)) { //digits
do {
i -= '0';
d *= 10;
digit: d += i;
NEXTor goto digitskipback;
if (i == '.') goto real;
if (i == '#') goto radix;
//TODO E notation
} while (isdigit(i));
BACK;
digitskipback:
if (negate) d *= -1;
return integer(d); }

goto after_real;
real: { float f; //b/c f is a FILE *
int e;
f = (float)d; //the positive integer so far
d = 0;
e = 1;
NEXTor goto floatskipback;
while(isdigit(i)) {
i -= '0';
d *= 10;
e *= 10;
d += i;
NEXTor goto floatskipback;
}
//TODO E notation
BACK;
floatskipback:
f += (float)d/(float)e;
if (negate) f *= -1;
return real(f); }
after_real:

goto after_radix;
radix: { int r = d;
if (r > 36) error("badradix syntaxerror in token");
if (r < 2) error("badradix syntaxerror in token");
NEXTor goto radixskipback;
d = 0;
do {
if (isdigit(i)) i -= '0';
else if (islower(i)) i -= 'a'+10;
else if (isupper(i)) i -= 'A'+10;
else error("badradixdigit syntaxerror in token");
d *= r;
d += i;
NEXTor goto radixskipback;
} while(isalnum(i));
BACK;
radixskipback:
return integer(d); }
after_radix:

if(i == '(') { // string
defer_paren = 1;
NEXTor goto syntaxerror;
if (i == ')') defer_paren--;
while (defer_paren) {
if (i == '\n') Oprompt();
if (i == '(') defer_paren++;
//TODO octal and hex
if (i == '\\') {
NEXTor goto syntaxerror;
switch(i) {
case '\n': Oprompt(); goto skip;
case 'a': i = '\a'; break;
case 'b': i = '\b'; break;
case 'f': i = '\f'; break;
case 'n': i = '\n'; break;
case 'r': i = '\r'; break;
case 't': i = '\t'; break;
case 'v': i = '\v'; break;
case '(': case ')':
case '\'': case '\"':
case '?': case '\\': break;
default: error("syntaxerror (string\\escape) in token");
}
}
*sp++ = (char)i;
if (sp-s > MAXTOKEN) error("limitcheck in token");
skip: NEXTor goto syntaxerror;
if (i == ')') defer_paren--;
}
*sp++ = 0;
//no BACK! eat the paren
return string(s); }

if(i == '/') { // literal name
NEXTor goto litnameskipback;
do {
*sp++ = (char)i;
NEXTor goto litnameskipback;
} while(isgraph(i) && strchr(punct,i)==NULL );
BACK;
litnameskipback:
*sp = 0;
return name(s); }

if(strchr("[]", i)) { // array
single: s[0] = (char)i; s[1] = 0;
return executable(name(s)); }

if(i == '{') { //procedures
typedef struct s_cord Fish;
struct s_cord { Object o; struct s_cord *link; };
Fish *head, *tail;
Object o, fin;
size_t i, len = 0;

fin = name("}"); /* make a list */
(void)((head=malloc(sizeof *head)) ||error("VMerror in token"));
tail = head;
do { tail->o = toke(src,next,back);
if ( eq(tail->o,fin) ) break;
len++;
(void)((tail->link=malloc(sizeof *tail)) ||error("VMerror in
token"));
tail = tail->link;
tail->link = NULL; /* possibly unnecessary */
} while(1);

o = array((int)len); /* turn list into array */
tail = head; /* fish becomes worm which eats itself */
for(i=0;i<len;i++) {
o.u.a->a[i] = tail->o;
head = tail->link;
free(tail);
tail = head;
}
free(head); //"}" equiv to free(tail), but this looks more
symmetrical
return executable(o);
}

if(i == '}') {
return executable(name("}"));
}

if(isgraph(i)) { //executable names
do {
aname: *sp++ = (char)i;
NEXTor goto nameskipback;
} while(isgraph(i) && !isspace(i) && strchr(punct,i)==NULL );
BACK;
nameskipback:
*sp = 0;
return executable(name(s)); }

syntaxerror:
error("syntaxerror in token");
} //while

fail:
return null;
}

void Otoken() {
Object o;
Object src;
stackunder(1,token);
src = pop;
switch(src.type) {
case stringtype: push(src);
o = toke(src, Snext, Sback);
dec(&src);
break;
case filetype:
o = toke(src, Fnext, Fback);
break;
default: error("typecheck in token");
}

if (o.type == nulltype) { push(boolean(false)); }
else {
if(eq(o,name("}"))) { error("unmatchedmark in token"); }
else { push(o); push(boolean(true)); }
}
}

int main() {
bool done = false;

push(string("this is a string"));
while(!done) {
Otoken(); //executable names
if (pop.u.b) { //check boolean return value
Object o;
o = pop;
if (o.type == nametype) {
printf("!grOK: name, %s\n", names[o.u.n]);
}
} else {
printf("!grNAK: failed to read a token");
done = true;
}
}

return 0;
}

/* eof token.c */

/* object.c
error function (to avoid a main.h or misc.c)
object allocators
and storage for singular objects null and mark
*/

#include <float.h> //FLT_EPSILON
#include <math.h> //fabsf
#include <stdarg.h> //...
#include <stdbool.h> //true false
#include <stdio.h> //vfprintf
#include <stdlib.h> //exit malloc free
#include <string.h> //strcmp strdup
//#include "object.h"

int error(char *fmt, ...) {
va_list argptr;
va_start( argptr, fmt );
(void)vfprintf(stderr, fmt, argptr);
(void)fputc('\n',stderr);
va_end(argptr);
exit(EXIT_FAILURE);
}


/* Singular objects */

Object null = { .type = nulltype, .flags = 0, .u.dummy = 0};
Object mark = { .type = marktype, .flags = 0, .u.dummy2 = 0};

/* Object Allocators and Convenience Functions */

Object boolean (char b) {
Object o = { .type = booleantype, .flags = 0, .u.b = b };
return o;
}

Object integer (int i) {
Object o = { .type = integertype, .flags = 0, .u.i = i };
return o;
}

Object real (float f) {
Object o = { .type = realtype, .flags = 0, .u.f = f };
return o;
}


char *names[MAXNAMES];
int nameslen = 0;
Object name (char *s) {
Object o = { .type = nametype, .flags = 0, .u.dummy = 0 };
int i;
for (i=0; i<nameslen; i++) { //look
if (strcmp(s, names[i]) == 0) { //found
o.u.n = i;
return o;
}
}
o.u.n = i; //new
names[i] = strdup(s);
nameslen++;
return o;
}


Object stringn (int n) {
Object o = { .type = stringtype, .flags = COMP, .u.dummy = 0 };
(void)((o.u.s = (String *)malloc(sizeof *o.u.s))
|| error("VMerror in stringn"));
o.u.s->ref = 1;
o.u.s->length = (size_t)n;
o.u.s->copyof = NULL;
(void)((o.u.s->s = malloc((size_t)n+1))
|| error("VMerror in stringn"));
return o; }

String *substring(String *s, size_t offset, size_t length) {
String *new;
if (offset+length > s->length)
error("rangecheck in substring");
(void)((new = malloc(sizeof *new))
|| error("VMerror in substring"));
new->ref = 1;
new->length = length;
new->copyof = s;
new->s = s->s + offset;
return new;
}

Object string (char *s) {
Object o;
size_t n;
n = strlen(s);
o = stringn((int)n);
strcpy(o.u.s->s, s);
//make substring so you can play with the pointer
//and dec can still free it later.
o.u.s = substring(o.u.s, 0, o.u.s->length);
return o; }

void dec_string(String *s) {
if (--s->ref == 0) {
if (s->copyof) dec_string(s->copyof);
else free(s->s);
free(s);
}
}

void inc_string(String *s) { s->ref++; }


Object array (int n) {
Object o = { .type = arraytype, .flags = COMP, .u.dummy = 0 };
(void)((o.u.a = (Array *)malloc(sizeof *o.u.a))
|| error("VMerror in array"));
o.u.a->ref = 1;
o.u.a->length = (size_t)n;
o.u.a->copyof = NULL;
(void)((o.u.a->a = (Object *)calloc((size_t)n, sizeof o))
|| error("VMerror in array"));
return o; }

void dec_array(Array *a) {
if (--a->ref == 0) {
int i;
for (i=0; i < (int)a->length; i++) {
//kill elements
dec(a->a + i);
}
if(a->copyof) dec_array(a->copyof);
else free(a->a);
free(a);
}
}

void inc_array(Array *a) { a->ref++; }


Array *subarray(Array *a, size_t offset, size_t length) {
Array *new;
if (offset+length > a->length)
error("rangecheck in subarray");
(void)((new = malloc(sizeof *new))
|| error("VMerror in subarray"));
new->ref = 1;
new->length = length;
new->copyof = a;
inc_array(a);
new->a = a->a + offset;
return new;
}

Object car(Array *a) { return a->a[0]; }

Array *cdr(Array *a) { return subarray(a, 1, a->length-1); }


Object dict (int n) {
Object o = { .type = dicttype, .flags = COMP, .u.dummy = 0 };
(void)((o.u.d = (Dict *)malloc(sizeof *o.u.d))
|| error("VMerror in dict"));
o.u.d->ref = 1;
o.u.d->maxlength = (size_t)n;
o.u.d->length = 0;
(void)((o.u.d->p = (struct s_pair *)calloc((size_t)n,sizeof *o.u.d-
>p))
|| error("VMerror in dict"));
return o; }

int eq (Object a, Object b) {
if (a.type != b.type) { return false; }
switch(a.type) {
case nulltype:
case marktype: return true;
case booleantype: return a.u.b == b.u.b;
case nametype: //ints
case integertype: return a.u.i == b.u.i;
case realtype: return (fabsf(a.u.f - b.u.f) > FLT_EPSILON);
case stringtype: return (strcmp(a.u.s->s, b.u.s->s) == 0);
case arraytype: //composites (pointers)
case filetype:
case dicttype: return a.u.d == b.u.d;
case operatortype: return a.u.op.fp == b.u.op.fp;
default:
return false;
}
}

struct s_pair *lookup (Dict *d, Object key) {
struct s_pair *p = NULL;
int i;
for (i=0; i < (int)d->length; i++) {
if (eq(d->p[i].key,key)) {
p = &d->p[i];
break;
}
}
return p;
}

bool define(Dict *d, Object key, Object value) {
struct s_pair *p;
p = lookup(d, key);
if (p) {
dec(&p->value);
p->value = value;
return true;
} else {
if (d->length >= d->maxlength) {
//error("dictfull in define");
return false;
}
p = &d->p[d->length++];
inc(&key);
p->key = key;
inc(&value);
p->value = value;
return true;
}
}

void dec_dict(Dict *d) {
if (--d->ref == 0) {
int i;
for (i=0; i < (int)d->length; i++) {
//kill elements
dec(&d->p[i].key);
dec(&d->p[i].value);
}
free(d->p);
free(d);
}
}

void inc_dict(Dict *d) { d->ref++; }


void dec(Object *o) {
if (o->flags & COMP ) { //if Composite
switch(o->type) { //decrement the ref
case stringtype: dec_string(o->u.s); break;
case arraytype: dec_array(o->u.a); break;
case dicttype: dec_dict(o->u.d); break;
default: break;
}
}
}

void inc(Object *o) {
if (o->flags & COMP) {
switch(o->type) {
case stringtype: inc_string(o->u.s); break;
case arraytype: inc_array(o->u.a); break;
case dicttype: inc_dict(o->u.d); break;
default: break;
}
}
}

Object executable (Object o) { o.flags |= EXEC; return o; }

Object operator (char *name, void (*fp)()) {
Object o = { .type = operatortype, .flags = EXEC,
.u.op = { .name = name, .fp = fp } };
return o;
}

/* eof: object.c */

/* from system.c */
void Oprompt() {
printf("> "); fflush(stdout);
}
/* end excerpt from system.c */

This ends the obnoxiously long message.

Message has been deleted

Mark Wooding

unread,
Mar 20, 2009, 6:04:12 AM3/20/09
to
Tetsuya <i...@noi.com> writes:

> luser-ex-troll wrote:
> > This ends the obnoxiously long message.
>

> Next time use pastebin.com please.

Disagree strongly. The message is asking an interesting question, and
will be archived forever. The archive becomes much less valuable
without the actual code in question.

Also, including the original code makes quoting portions of it in
criticism easier.

-- [mdw]

Richard Heathfield

unread,
Mar 20, 2009, 6:15:40 AM3/20/09
to
Tetsuya said:

> luser-ex-troll wrote:
>> This ends the obnoxiously long message.
>

> Next time use pastebin.com please.

Why? What's wrong with Usenet?

--
Richard Heathfield <http://www.cpax.org.uk>
Email: -http://www. +rjh@
Google users: <http://www.cpax.org.uk/prg/writings/googly.php>
"Usenet is a strange place" - dmr 29 July 1999

Richard Heathfield

unread,
Mar 20, 2009, 6:27:06 AM3/20/09
to
luser-ex-troll said:

> Hello all.
> I have a problem of a somewhat different kind than the usual post.
> My code works!

Delighted to hear it! (Incidentally, relying as you do on C99
features will restrict the portability of your code and reduce the
number of people able to help you with it. If those are not
concerns for you, then obviously that's not an issue.)

> It's just appallingly ugly.

Concur.

> With my attention
> focused sharply on clear and consistent data structures, the most
> important function in my nascent postscript interpreter, the
> lexical scanner, has degenerated into spaghetti.

Again, concur.

> It happened incrementally so I didn't really worry
> about it until it became overwhelmingly obvious
> that what I've got is terribly, horribly ugly.

I was going to suggest a state machine, but one doesn't normally
need one for something as simple as lexing. A simple loop is
usually enough.

I think you're trying to do too much in your scanner - you're not
just scanning the input for lexemes, you're also trying to
interpret stuff as you go along. To some extent that /can/ be done
neatly, but if you must do it, do it something like this:

while(!error && (p = scanner_getlexeme(stream)) != NULL)
{
n = token_gettype(p);
q = (*token_interpret[n])(p);
error = token_store(q, n, tokenlist);
}

<snip>

nick_keigh...@hotmail.com

unread,
Mar 20, 2009, 6:33:04 AM3/20/09
to
On 20 Mar, 03:12, luser-ex-troll <mijo...@yahoo.com> wrote:

> I have a problem of a somewhat different kind than the usual post. My
> code works! It's just appallingly ugly. With my attention focused
> sharply on clear and consistent data structures, the most important
> function in my nascent postscript interpreter, the lexical scanner,
> has degenerated into spaghetti.

<snip>

ooo! haven't seen code like that in a while! With gotos as well!
I might have a go later but at the moment all I do is give general
advice. Write some tests. Write lots of tests. Every time you make
a small change run all the tests.

In the past I've also drawn flow charts. At least then you can see the
structure of the whole program at once. Try and remove chunks into
separate functions that

1. have a single entrance and exit
2. perform a single function well

pass parameters, don't use global data

Bartc

unread,
Mar 20, 2009, 8:09:44 AM3/20/09
to
luser-ex-troll wrote:

> I have a problem of a somewhat different kind than the usual post. My
> code works! It's just appallingly ugly. With my attention focused
> sharply on clear and consistent data structures, the most important
> function in my nascent postscript interpreter, the lexical scanner,
> has degenerated into spaghetti.

> NEXTor goto radixskipback;
<snip lots of similar code>

This is for your Postscript interpreter?

Considering Postscript itself doesn't have Goto at all, you're setting
yourself bad examples.

Possibly the code is a consequence of using C which I don't consider very
flexible when developing code. (If you'd started off with Python for
example, this problem would not have come up since it doesn't have Goto
either).

The specific function you mentioned seemed some sort of lexical parser. I've
written loads without needing lots of gotos (one or two are OK).

Use return statements instead (so add more functions). Syntax errors in
tokens I usually deal with by returning a special error token (leaving it to
the caller to report the error).

And I'd get rid of the BACK/NEXT macros which are distorting the syntactical
structure of the code so that the statement type is not recognisable.

--
bartc

luser-ex-troll

unread,
Mar 20, 2009, 12:31:08 PM3/20/09
to

Thanks. I like it. On first read I thought I had an objection
that the various ways of terminating a token should be tied
to the semantic meaning. But it's is probably just an artifact
of my intuitive understanding of the postscript behavior.
With this way it looks like everything is more strongly
dependent on the enum of types; which seems ideal.

lxt

luser-ex-troll

unread,
Mar 20, 2009, 12:46:53 PM3/20/09
to
On Mar 20, 7:09 am, "Bartc" <ba...@freeuk.com> wrote:
> luser-ex-troll wrote:
> > I have a problem of a somewhat different kind than the usual post. My
> > code works! It's just appallingly ugly. With my attention focused
> > sharply on clear and consistent data structures, the most important
> > function in my nascent postscript interpreter, the lexical scanner,
> > has degenerated into spaghetti.
> > NEXTor goto radixskipback;
>
> <snip lots of similar code>
>
> This is for your Postscript interpreter?
>
> Considering Postscript itself doesn't have Goto at all, you're setting
> yourself bad examples.

Well, yeah. You're totally right. But it didn't seem so wrong
when I wrote the first one, and the second. Then suddenly it
was no longer obvious how to tease a structure back out of it.

> Possibly the code is a consequence of using C which I don't consider very
> flexible when developing code. (If you'd started off with Python for
> example, this problem would not have come up since it doesn't have Goto
> either).

Agreed. But I don't like block structure being controlled by
indentation. I suppose that's obvious from the code.

> The specific function you mentioned seemed some sort of lexical parser. I've
> written loads without needing lots of gotos (one or two are OK).
>

See? But what about the third?! The fourth?!

> Use return statements instead (so add more functions). Syntax errors in
> tokens I usually deal with by returning a special error token (leaving it to
> the caller to report the error).

Agreed, but I'm looking for a nice way to organize and dispatch
those functions, rather than merely translating the problem into
function spaghetti.

> And I'd get rid of the BACK/NEXT macros which are distorting the syntactical
> structure of the code so that the statement type is not recognisable.

Roger that. Artifact from the version that used macros for the string/
file overloading instead of function pointers. At that
point the served a useful purpose by jumping out of the spaghetti like
uh, meatballs.

lxt

Phil Carmody

unread,
Mar 21, 2009, 4:02:48 AM3/21/09
to
Richard Heathfield <r...@see.sig.invalid> writes:
> Tetsuya said:
>
>> luser-ex-troll wrote:
>>> This ends the obnoxiously long message.
>>
>> Next time use pastebin.com please.
>
> Why? What's wrong with Usenet?

I can't believe that hasn't elicited any kind of answer yet! :-)

Phil
--
Marijuana is indeed a dangerous drug.
It causes governments to wage war against their own people.
-- Dave Seaman (sci.math, 19 Mar 2009)

gw7...@aol.com

unread,
Mar 21, 2009, 5:57:35 AM3/21/09
to
On 20 Mar, 03:12, luser-ex-troll <mijo...@yahoo.com> wrote:
> I have a problem of a somewhat different kind than the usual post. My
> code works! It's just appallingly ugly. With my attention focused
> sharply on clear and consistent data structures, the most important
> function in my nascent postscript interpreter, the lexical scanner,
> has degenerated into spaghetti.

The book "BCPL: the language and its compiler", by Martin Richards and
Coklin Whitby-Strevens, includes the code for a lexical scanner, with
plenty of comments about it. If you can get hold of this book, it
might be worth reading. BCPL is a fore-runner of C.

I've only skimmed through your code, but a few points struck me:

> int Snext(Object s) {
> return sgetc(s.u.s);
>
> }
>
> void Sback(int c, Object s) {
> s.u.s->length++;
> *(--(s.u.s->s)) = c; //back it up, follow the pointer, store
>
> }

You're worryuing here about reading values and then needing to back up
a bit. The code in the book avoids the need to do this. There is a
function called RCH which will read a character in and put it in a
global variable CH. (Yes, I know globals are disapproved of these
days, but bear with me...) The function NEXTSYMB, which reads the next
tioken from the source, assumes that there is a character in CH which
has not yet been processed. It processes it, reaing more characters if
necessary using RCH, and it calls RCH at least once so that, when it
finishes, there is an unprocessed charcter left in CH. Thus you just
need to call RCH once at the beginning, and then you call NEXTSYMB
continually to get the tokens.

For example, suppose that a '<' character can either be the start of
<=, the start of <<, or just a less-than sign. You do the following
(I've converted this bit into C, and also fixed a couple of stylistic
points):

case '>':
RCH();
if (CH == '=') { RCH(); return S_LE; }
if (CH == '>') { RCH(); return S_LSHIFT; }
return S_LS;

Either way, the first unprocessed character is in CH afterwards.

> #define NEXT if ((i=next(src)) == EOF) goto fail
> #define NEXTor if ((i=next(src)) == EOF)

You seem very worried about reading in an EOF character. This seems a
bit unnecessary. At any point in the processing, it seems that one of
three things can be the case:

a) the characters you have processed so far need to be followed by
something of a specific type, and it is an error if they're not;
b) the characters you have processed so far may or may not be followed
by something of a specific type, if they are then process that, if
not, leave what follows to be processed next time round;
c) the characters you have processed so far are complete in themselves
and what follows is something separate.

In none of these cases does there seem to be any need to check
specifically whether what follows includes an EOF. Simply treat it as
any character which is different from what is allowed to follow.

This may in fact improve any error messages that you show the user -
there will be more of "You should have provided a ***, and didn't" and
less of "Unexpected end of file".

You should only need to worry about an EOF if i is EOF when you start
the loop - in which case you simply return null.

> Specifically the problem is the toke function
> which scans a string or file to create an object
> (tag-union, variant-record). It's constructed
> as a series of tests and loops within a big loop,
> but uses goto to change its mind about what
> type of object it has found (eg. '+' followed
> by a digit is a noise character introducing the
> number, but followed by anything else, it's an
> executable name).

>     if(i == '+') { //optional +


>         NEXTor goto single;
>         if(!isdigit(i)) { BACK; i = '+'; goto aname; }
>         i -= '0';
>         goto digit; }

This doesn't really seem necessary. Either '+' is followed by a digit,
or it isn't. If it is, the digit (and any subsequent digits) are
processed exactly the same way as if the '+' wasn't there. So I think
you may just need a "continue" here intead of the "goto digit" - start
the processing off again, this time looking at the first digit rather
than the '+'. It may mean testing whether the first digit is a digit
twice, but that's hardly the biggest waste on the planet, is it?

[If you do read the book, note that it is itself not perfect. For one
thing, NEXTSYMB returns its result by a global, which seems an
unnecessary piece of horribleness. Also, instead of the neat code
above, it actually uses RETRUN to leave the function without doing a
RCH, and BREAK to leave the SWITCHON (equivalent to a switch) where it
hits a RCH at the end of the function.]

Anyhow, hope that helps.
Paul.

luser-ex-troll

unread,
Mar 21, 2009, 9:59:08 AM3/21/09
to

Yes. I'll add that to my bookfetch list at alibris. BCPL was
interpreted, wasn't it?

As far as improving the error messages, I'm somewhat restricted by the
behavior dictated by the Adobe spec, but I think I can add a field of
extra detail into the report. The error function I posted is just a
stub.

The big stumbling block, as I see it now, is my use of 3 kinds of test
on the character in question: if (i == 'x'), strchr("string", i), and
isalpha(i). It seems if I just pick one, I can organize the tests into
a grammar structure and drastically simplify the code.

Maybe it's time to draw flowcharts...

lxt

luser-ex-troll

unread,
Mar 21, 2009, 5:15:00 PM3/21/09
to
I've barely begun this second attempt and already I want to write
gotos.

Does the following look foredoomed to devolve?

#include <stdio.h>
#include <string.h>

#define space " \t\r\n\f"
#define delim "()<>[]{}/%"
#define crlf "\r\n"
#define digit "0123456789"
#define hex digit "abcdef" "ABCDEF"
#define alpha "abcdefghijklmnopqrstuvwxyz"
#define Alpha "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
#define alnum digit alpha Alpha
#define pm "+-"
#define dot "."
#define rad "#"
#define raddoteE "#.eE"
#define eE "eE"
#define epm "e+-"
#define Epm "E+-"

enum e_lext { fail, decimal, radix, real, string, hexstring, name };

typedef struct {
char *pre; /*optional prefix*/
char *chug; /*things to accept*/
int elsewise; /*thing to try if unacceptable*/
char *other; /*transfer to different test by offset matched*/
enum e_lext type; /*type to be interpreted*/
} test;

test tests[] = {
/*0*/{.type = fail, .pre = space, .chug = NULL, .elsewise =
1, .other = NULL },
/*1*/{.type = decimal, .pre = pm, .chug = digit, .elsewise = 6, .other
= raddoteE },
/*2*/{.type = radix, .pre = NULL, .chug = alnum, .elsewise =
6, .other = NULL },
/*3*/{.type = real, .pre = NULL, .chug = digit, .elsewise =
6, .other = eE },
/*4*/{.type = real, .pre = pm, .chug = digit, .elsewise =
6, .other = NULL },
/*5*/{.type = real, .pre = pm, .chug = digit, .elsewise =
6, .other = NULL },
/*6*/{.type = string, .pre = "(", .chug = NULL, .elsewise =
8, .other = NULL },
/*7*/{.type = hexstring, .pre = "<", .chug = hex, .elsewise =
8, .other = NULL },
/*8*/{.type = name, .pre = "/", .chug = alnum, .elsewise =
9, .other = NULL },
/*9*/{.type = fail, .pre = NULL, .chug = NULL, .elsewise =
10, .other = NULL }
};

#define NBUF 256

int main() {
char buf[NBUF] = "";
char *s = buf;
int i;
int testing=0;
char *off;

while( (i=getchar()) != EOF) {
top:
if (tests[testing].pre) /* try this */
while (strchr(tests[testing].pre,i)) {
*s++ = (char)i; *s = 0; i=getchar();
}

if (tests[testing].chug) /* try that */
while (strchr(tests[testing].chug,i)) {
*s++ = (char)i; *s = 0; i=getchar();
}

if (tests[testing].other) { // try the other */
off = strchr(tests[testing].other,i);
if (off) { // transfer to special test
testing += (int) (off-tests[testing].other) + 1;
*s++ = (char)i; *s = 0; i=getchar();
goto top;
}
}

if (s == buf) {
if (testing == 10) {
printf("fail: unable to grok the stream\n");
break;
} else {
testing = tests[testing].elsewise;
goto top;
}
} else {
ungetc(i,stdin);
printf("grok: %s\n", buf);
s = buf;
testing = 0;
}

} //while
return 0;
} //main

//eof

Flash Gordon

unread,
Mar 21, 2009, 6:38:02 PM3/21/09
to
luser-ex-troll wrote:
> I've barely begun this second attempt and already I want to write
> gotos.

Ones you don't need...

> Does the following look foredoomed to devolve?

<snip>

> int main() {
> char buf[NBUF] = "";
> char *s = buf;

Personally I would be more inclined to do use an index than a pointer.

> int i;
> int testing=0;
> char *off;
>
> while( (i=getchar()) != EOF) {
> top:

<snip>

> *s++ = (char)i; *s = 0; i=getchar();

Why the cast?

> goto top;

Loose the i=getchar() and replace the goto with a continue.

<snip>

> goto top;

Do an ungetc() followed by a continue

<snip>

You could also do with breaking the code down in to functions rather
than writing one massive function.
--
Flash Gordon

Richard

unread,
Mar 21, 2009, 7:35:13 PM3/21/09
to
Flash Gordon <sm...@spam.causeway.com> writes:

> luser-ex-troll wrote:
>> I've barely begun this second attempt and already I want to write
>> gotos.
>
> Ones you don't need...
>
>> Does the following look foredoomed to devolve?
>
> <snip>
>
>> int main() {
>> char buf[NBUF] = "";
>> char *s = buf;
>
> Personally I would be more inclined to do use an index than a pointer.
>
>> int i;
>> int testing=0;
>> char *off;
>>
>> while( (i=getchar()) != EOF) {
>> top:
>
> <snip>
>
>> *s++ = (char)i; *s = 0; i=getchar();
>
> Why the cast?

Whether right or wrong I would say its blatantly obvious. If you think
he shouldn't cast then tell him why not.

--
"Avoid hyperbole at all costs, its the most destructive argument on
the planet" - Mark McIntyre in comp.lang.c

Richard Heathfield

unread,
Mar 21, 2009, 8:26:00 PM3/21/09
to
luser-ex-troll said:

> I've barely begun this second attempt and already I want to write
> gotos.

Imagine a C-like language that is exactly the same as C except that
it has no goto, switchless break, or continue. How would you write
your program in such a language?

> Does the following look foredoomed to devolve?

Yes.

luser-ex-troll

unread,
Mar 22, 2009, 7:18:05 AM3/22/09
to
On Mar 21, 7:26 pm, Richard Heathfield <r...@see.sig.invalid> wrote:
> luser-ex-troll said:
>
> > I've barely begun this second attempt and already I want to write
> > gotos.
>
> Imagine a C-like language that is exactly the same as C except that
> it has no goto, switchless break, or continue. How would you write
> your program in such a language?
>

Not sure, but equally unsure I'd want to use such a language. I think
I'm beginning to truly appreciate the dangers of goto. I also think
that a jump to the top and jump out of loop are the 1 or 2 exceptions
to the rule that may be worthwhile.

But here's take 3 with no gotos, and with much cleaner control flow, I
hope.

--
loose-rocks-trawl

#include <ctype.h>
#include <stdbool.h>
#include <stdio.h>
#include <string.h>

int pm (int i) { return strchr("+-",i) != 0; }
int space (int i) { return strchr(" \t\n\r\f",i) != 0; }
int dot (int i) { return i == '.'; }
int e (int i) { return strchr("eE",i) != 0; }
int rad (int i) { return i == '#'; }
int notraddote (int i) { return strchr("#.eE",i) == 0; }
int lparen (int i) { return i == '('; }
int notrparen (int i) { return i != ')'; }
int rparen (int i) { return i == ')'; }
int lt (int i) { return i == '<'; }
int gt (int i) { return i == '>'; }
int delim (int i) { return strchr("()<>[]{}/%",i) != 0; }
int regular (int i) { return !space(i) && !delim(i); }
int slash (int i) { return i == '/'; }

typedef struct {
int (*fp)(int); //function predicate
int yes,no; //transition
bool pass; //store char if accepted
} test;

#define SYNE -1
#define DECI -2
#define RADI -3
#define REAL -4
#define STRG -5
#define HEXS -6
#define NAME -7

test tests[] = {
/* 0*/ {.fp= space, .yes= 0, .no= 1, .pass=false }, /*sp* */

//decimal
/* 1*/ {.fp=pm, .yes= 2, .no= 3, .pass=true },
/* 2*/ {.fp=isdigit, .yes= 4, .no=19, .pass=true }, //[+-]
[^0-9]: name
/* 3*/ {.fp=isdigit, .yes= 4, .no=13, .pass=true }, //[^0-9]:
string?
/* 4*/ {.fp=isdigit, .yes= 4, .no= 5, .pass=true }, //[0-9]+
/* 5*/ {.fp=notraddote, .yes=DECI, .no= 6, .pass=false }, //[0-9]+
[^0-9]

//radix
/* 6*/ {.fp=rad, .yes= 7, .no= 8, .pass=true }, /*([#]?)
*/
/* 7*/ {.fp=isalnum, .yes= 7, .no=RADI, .pass=true }, /*([0-9a-z]
*) */

//real
/* 8*/ {.fp=dot, .yes= 9, .no=10, .pass=true }, /*([.]?)
*/
/* 9*/ {.fp=isdigit, .yes= 9, .no=10, .pass=true }, /*([0-9]?)
*/

//exponential
/*10*/ {.fp=e, .yes=11, .no=12, .pass=true }, /*([eE]?)
*/
/*11*/ {.fp=pm, .yes=12, .no=12, .pass=true }, /*[eE]
([+-]?) */
/*12*/ {.fp=isdigit, .yes=12, .no=REAL, .pass=true }, /*([0-9]?)
*/

//string
/*13*/ {.fp=lparen, .yes=14, .no=16, .pass=false }, /*[(]?*/
/*14*/ {.fp=notrparen, .yes=14, .no=15, .pass=true }, /*([^)]?)
*/
/*15*/ {.fp=rparen, .yes=STRG, .no=SYNE, .pass=false }, /*[)]?*/

//hexstring
/*16*/ {.fp=lt, .yes=17, .no=19, .pass=false }, /*[<]?*/
/*17*/ {.fp=isxdigit, .yes=17, .no=18, .pass=true }, /*([0-9a-f]
*)*/
/*18*/ {.fp=gt, .yes=HEXS, .no=SYNE, .pass=false }, /*[>]?*/

//name
/*19*/ {.fp=slash, .yes=20, .no=20, .pass=true }, /*([/]?)*/
/*20*/ {.fp=regular, .yes=20, .no=NAME, .pass=true } /*([^sp
delim]*)*/
};

#define NBUF 256

int main() {
int i;


char buf[NBUF] = "";
char *s = buf;

int state = 0;

while ( (i=getchar()) != EOF ) {

if ( (*tests[state].fp)(i) ) {
if (tests[state].pass) {
*s++ = i; *s = 0;
}
state = tests[state].yes;
} else {
state = tests[state].no;
ungetc(i,stdin);
}

if (state < 0) {
char *typestring;
if (state == -1) { break; }
switch(state) {
case DECI: typestring = "decimal"; break;
case RADI: typestring = "radix"; break;
case REAL: typestring = "real"; break;
case STRG: typestring = "string"; break;
case HEXS: typestring = "hexstring"; break;
case NAME: typestring = "name"; break;
}
printf("grok: %d %s %s\n", state, typestring, buf);
state = 0;
s = buf; *s = 0;
}

} //while
printf("fail: 0x%x\n",(unsigned)i);
return 0;
} //main

//eof

Bartc

unread,
Mar 23, 2009, 6:44:30 AM3/23/09
to
luser-ex-troll wrote:
> On Mar 20, 7:09 am, "Bartc" <ba...@freeuk.com> wrote:

>> Python for example, this problem would not have come up since it
>> doesn't have Goto either).
>
> Agreed. But I don't like block structure being controlled by
> indentation. I suppose that's obvious from the code.

Not controlled. But represented. How else would you show the code structure?
(Imagine reading a novel, with lots of dialogue, typeset as a single giant
paragraph.)

>> The specific function you mentioned seemed some sort of lexical
>> parser. I've written loads without needing lots of gotos (one or two
>> are OK).
>>
>
> See? But what about the third?! The fourth?!

By the third or fourth goto, you will either see a pattern emerging, or
start to realise it's getting out of hand and needs a slightly different
approach. Although this is true of a lot of coding.


--
Bartc

Richard Harter

unread,
Mar 23, 2009, 5:24:05 PM3/23/09
to
On Mon, 23 Mar 2009 10:44:30 GMT, "Bartc" <ba...@freeuk.com>
wrote:

>luser-ex-troll wrote:
>> On Mar 20, 7:09 am, "Bartc" <ba...@freeuk.com> wrote:
>
>>> Python for example, this problem would not have come up since it
>>> doesn't have Goto either).
>>
>> Agreed. But I don't like block structure being controlled by
>> indentation. I suppose that's obvious from the code.
>
>Not controlled. But represented. How else would you show the code structure?
>(Imagine reading a novel, with lots of dialogue, typeset as a single giant
>paragraph.)

That's a rather dubious comparison. Novels have chapter headings
and paragraphs; they don't have the equivalent of levels of
indentation. They may have flashbacks but they are structured to
be read sequentially.


Richard Harter, c...@tiac.net
http://home.tiac.net/~cri, http://www.varinoma.com
If I do not see as far as others, it is because
I stand in the footprints of giants.

CBFalconer

unread,
Mar 23, 2009, 6:19:25 PM3/23/09
to
Richard Harter wrote:
> "Bartc" <ba...@freeuk.com> wrote:
>> luser-ex-troll wrote:
>>> "Bartc" <ba...@freeuk.com> wrote:
>>
>>>> Python for example, this problem would not have come up since
>>>> it doesn't have Goto either).
>>>
>>> Agreed. But I don't like block structure being controlled by
>>> indentation. I suppose that's obvious from the code.
>>
>> Not controlled. But represented. How else would you show the
>> code structure? (Imagine reading a novel, with lots of
>> dialogue, typeset as a single giant paragraph.)
>
> That's a rather dubious comparison. Novels have chapter
> headings and paragraphs; they don't have the equivalent of
> levels of indentation. They may have flashbacks but they are
> structured to be read sequentially.

Have you never ignored a Usenet message that is written as a solid
block, possibly withour either sentence of paragraph delimitation?

--
[mail]: Chuck F (cbfalconer at maineline dot net)
[page]: <http://cbfalconer.home.att.net>
Try the download section.


luser-ex-troll

unread,
Mar 23, 2009, 8:32:48 PM3/23/09
to
On Mar 23, 5:19 pm, CBFalconer <cbfalco...@yahoo.com> wrote:
> Richard Harter wrote:
> > "Bartc" <ba...@freeuk.com> wrote:
> >> luser-ex-troll wrote:
> >>> "Bartc" <ba...@freeuk.com> wrote:
>
> >>>> Python for example, this problem would not have come up since
> >>>> it doesn't have Goto either).
>
> >>> Agreed. But I don't like block structure being controlled by
> >>> indentation. I suppose that's obvious from the code.
>
> >> Not controlled. But represented. How else would you show the
> >> code structure? (Imagine reading a novel, with lots of
> >> dialogue, typeset as a single giant paragraph.)
>
> > That's a rather dubious comparison.  Novels have chapter
> > headings and paragraphs; they don't have the equivalent of
> > levels of indentation.  They may have flashbacks but they are
> > structured to be read sequentially.
>
> Have you never ignored a Usenet message that is written as a solid
> block, possibly withour either sentence of paragraph delimitation?
>

Sounds like Finnegan's Wake! ;{>

--
lxt
quark rhymes with quart

Richard Harter

unread,
Mar 23, 2009, 10:46:43 PM3/23/09
to
On Mon, 23 Mar 2009 17:19:25 -0500, CBFalconer
<cbfal...@yahoo.com> wrote:

>Richard Harter wrote:
>> "Bartc" <ba...@freeuk.com> wrote:
>>> luser-ex-troll wrote:
>>>> "Bartc" <ba...@freeuk.com> wrote:
>>>
>>>>> Python for example, this problem would not have come up since
>>>>> it doesn't have Goto either).
>>>>
>>>> Agreed. But I don't like block structure being controlled by
>>>> indentation. I suppose that's obvious from the code.
>>>
>>> Not controlled. But represented. How else would you show the
>>> code structure? (Imagine reading a novel, with lots of
>>> dialogue, typeset as a single giant paragraph.)
>>
>> That's a rather dubious comparison. Novels have chapter
>> headings and paragraphs; they don't have the equivalent of
>> levels of indentation. They may have flashbacks but they are
>> structured to be read sequentially.
>
>Have you never ignored a Usenet message that is written as a solid
>block, possibly withour either sentence of paragraph delimitation?

I have indeed. And your point is?

Richard Bos

unread,
Mar 24, 2009, 9:25:03 AM3/24/09
to
"Bartc" <ba...@freeuk.com> wrote:

> luser-ex-troll wrote:
> > On Mar 20, 7:09 am, "Bartc" <ba...@freeuk.com> wrote:
>
> >> Python for example, this problem would not have come up since it
> >> doesn't have Goto either).
> >
> > Agreed. But I don't like block structure being controlled by
> > indentation. I suppose that's obvious from the code.
>
> Not controlled.

Wrong. In Python, indentation _does_ control the block structure, unlike
in sane languages or even in C.

Richard

luser-ex-troll

unread,
Mar 24, 2009, 11:46:46 PM3/24/09
to
Alright now. I think I've turned a corner with this. And, of course,
everyone who said "use functions!" was spot on.

Hopefully this looks more C-worthy.

--
lexit-real


#include <ctype.h>
#include <stdio.h>
#include <string.h>

int issign (int c){return !!strchr("+-",c);}
int israd (int c){return !!strchr("#", c);}
int isdot (int c){return !!strchr(".", c);}
int ise (int c){return !!strchr("eE",c);}
int isdelim (int c){return !!strchr("()<>{}[]%/",c);}
int isregular(int c){return !isspace(c);}

typedef struct test test;
struct test {
int (*fp)(int); int y,n;
};

test decimal[] = {
/* 0*/ { issign, 1, 1 },
/* 1*/ { isdigit, 2, -1 },
/* 2*/ { isdigit, 2, -1 }, //success
};
int dec_accept(int i){ return i==2; }

test radix[] = {
/* 0*/ { isdigit, 1, -1 },
/* 1*/ { isdigit, 1, 2 },
/* 2*/ { israd, 3, -1 },
/* 3*/ { isdigit, 4, -1 },
/* 4*/ { isdigit, 4, -1 }, //success
};
int rad_accept(int i){ return i==4; }

test real[] = {
/* 0*/ { issign, 1, 1 },
/* 1*/ { isdigit, 2, 4 },
/* 2*/ { isdigit, 2, 3 },
/* 3*/ { isdot, 6, 7 }, //success
/* 4*/ { isdot, 5, -1 },
/* 5*/ { isdigit, 6, -1 },
/* 6*/ { isdigit, 6, 7 }, //success
/* 7*/ { ise, 8, -1 },
/* 8*/ { issign, 9, 9 },
/* 9*/ { isdigit, 10, -1 },
/*10*/ { isdigit, 10, -1 }, //success
};
int real_accept(int i){switch(i){case 3: case 6:case 10:return 1;}
return 0;}

int check(char *buf, test *fsm, int(*yes)(int)){ char *s = buf; int
sta = 0;
while(sta!=-1 && *s) {
if (fsm[sta].fp(*s))
{ sta = fsm[sta].y; s++; }
else { sta = fsm[sta].n; }
}
sta=yes(sta);
return sta; }

int grok(char *buf) {
if (check(buf,decimal,dec_accept)) { printf( "dec: %s\n",
buf); return 0; }
else if (check(buf,radix, rad_accept)) { printf( "rad: %s\n",
buf); return 0; }
else if (check(buf,real, real_accept)) { printf("real: %s\n",
buf); return 0; }
else { printf("grok? %s\n", buf); return -1; }
}

int puff(char *buf, int nbuf) { char *s = buf; int c;
while ((c=getchar()) != EOF) {
if(isspace(c) || isdelim(c))
break;
if(nbuf < s-buf-1)
return -1;
*s++ = c;
}
*s++ = 0;
return 0; }

int toke(char *buf, int nbuf) { char *s=buf; int sta = 0;
while(isspace(*s=getchar())) /**/;
if( (sta=puff(buf+1,nbuf-1)) == -1) return -1;
sta = grok(buf);
return sta; }

#define NBUF 10
int main() { char buf[NBUF] = ""; int sta;
while ( (sta=toke(buf,NBUF)) != -1 )
/**/;
return 0; }


luser-ex-troll

unread,
Mar 25, 2009, 12:16:55 AM3/25/09
to
correction; last 2 lines of radix test should use isalnum rather than
isdigit:
/* 3*/ { isalnum, 4, -1 },
/* 4*/ { isalnum, 4, -1 }, //success

oh, and here are the regular expressions that the three machines are
intended to match:
decimals: ^[+-]?d+$
radix: ^d+[#][a-Z0-9]+$
real: ^[+-]?(d+.d*)|(d*.d+)([eE][+-]?d+)?$

--
laxite

Keith Thompson

unread,
Mar 25, 2009, 2:52:05 AM3/25/09
to

I assume d is intended to represent a decimal digit. In the regular
expression syntaxes I've seen, that's represented as \d; d represents
the letter d itself.

--
Keith Thompson (The_Other_Keith) ks...@mib.org <http://www.ghoti.net/~kst>
Nokia
"We must do something. This is something. Therefore, we must do this."
-- Antony Jay and Jonathan Lynn, "Yes Minister"

luser-ex-troll

unread,
Mar 25, 2009, 3:13:36 AM3/25/09
to
On Mar 25, 1:52 am, Keith Thompson <ks...@mib.org> wrote:

> luser-ex-troll <mijo...@yahoo.com> writes:
> > correction; last 2 lines of radix test should use isalnum rather than
> > isdigit:
> > /* 3*/ { isalnum, 4, -1 },
> > /* 4*/ { isalnum, 4, -1 }, //success
>
> > oh, and here are the regular expressions that the three machines are
> > intended to match:
> > decimals: ^[+-]?d+$
> > radix: ^d+[#][a-Z0-9]+$
> > real: ^[+-]?(d+.d*)|(d*.d+)([eE][+-]?d+)?$
>
> I assume d is intended to represent a decimal digit.  In the regular
> expression syntaxes I've seen, that's represented as \d; d represents
> the letter d itself.
>

Yes, precisely. Apologies.

--
elsorryxtrog

Barry Schwarz

unread,
Mar 25, 2009, 8:08:27 AM3/25/09
to
On Tue, 24 Mar 2009 20:46:46 -0700 (PDT), luser-ex-troll
<mij...@yahoo.com> wrote:

>Alright now. I think I've turned a corner with this. And, of course,
>everyone who said "use functions!" was spot on.
>
>Hopefully this looks more C-worthy.
>

>lexit-real
>
>
>#include <ctype.h>
>#include <stdio.h>
>#include <string.h>
>
>int issign (int c){return !!strchr("+-",c);}
>int israd (int c){return !!strchr("#", c);}
>int isdot (int c){return !!strchr(".", c);}

Wouldn't return c == '.' be easier?

>int ise (int c){return !!strchr("eE",c);}
>int isdelim (int c){return !!strchr("()<>{}[]%/",c);}
>int isregular(int c){return !isspace(c);}
>
>typedef struct test test;
>struct test {
> int (*fp)(int); int y,n;
>};

You could combine the typedef and the structure definition into a
single declaration.

>
>test decimal[] = {
>/* 0*/ { issign, 1, 1 },
>/* 1*/ { isdigit, 2, -1 },
>/* 2*/ { isdigit, 2, -1 }, //success
>};
>int dec_accept(int i){ return i==2; }

Would it not work just as well if you eliminated the third element of
decimal and changed this to return i == 1?

>
>test radix[] = {
>/* 0*/ { isdigit, 1, -1 },
>/* 1*/ { isdigit, 1, 2 },
>/* 2*/ { israd, 3, -1 },
>/* 3*/ { isdigit, 4, -1 },
>/* 4*/ { isdigit, 4, -1 }, //success
>};
>int rad_accept(int i){ return i==4; }
>
>test real[] = {
>/* 0*/ { issign, 1, 1 },
>/* 1*/ { isdigit, 2, 4 },
>/* 2*/ { isdigit, 2, 3 },
>/* 3*/ { isdot, 6, 7 }, //success
>/* 4*/ { isdot, 5, -1 },
>/* 5*/ { isdigit, 6, -1 },
>/* 6*/ { isdigit, 6, 7 }, //success
>/* 7*/ { ise, 8, -1 },
>/* 8*/ { issign, 9, 9 },
>/* 9*/ { isdigit, 10, -1 },
>/*10*/ { isdigit, 10, -1 }, //success
>};
>int real_accept(int i){switch(i){case 3: case 6:case 10:return 1;}
>return 0;}
>
>int check(char *buf, test *fsm, int(*yes)(int)){ char *s = buf; int
>sta = 0;

Notice how usenet adds line breaks you didn't intend at inopportune
points. Additionally, multiple statements per line tend to hinder
readability.

> while(sta!=-1 && *s) {

Why didn't you use buf directly instead of s?

> if (fsm[sta].fp(*s))
> { sta = fsm[sta].y; s++; }
> else { sta = fsm[sta].n; }
> }
> sta=yes(sta);
>return sta; }
>
>int grok(char *buf) {
> if (check(buf,decimal,dec_accept)) { printf( "dec: %s\n",
>buf); return 0; }

Here it is even worse. And it destroys your attempt at consistent
indentation.

> else if (check(buf,radix, rad_accept)) { printf( "rad: %s\n",
>buf); return 0; }
> else if (check(buf,real, real_accept)) { printf("real: %s\n",
>buf); return 0; }
> else { printf("grok? %s\n", buf); return -1; }
>}
>
>int puff(char *buf, int nbuf) { char *s = buf; int c;
> while ((c=getchar()) != EOF) {

It would be more user friendly if you used '\n' as your terminator
rather than EOF.

> if(isspace(c) || isdelim(c))
> break;
> if(nbuf < s-buf-1)
> return -1;
> *s++ = c;
> }
> *s++ = 0;

As a matter of style, recommend '\0' when assigning to a char.

>return 0; }
>
>int toke(char *buf, int nbuf) { char *s=buf; int sta = 0;
> while(isspace(*s=getchar())) /**/;
> if( (sta=puff(buf+1,nbuf-1)) == -1) return -1;

The value assigned to sta is never used.

> sta = grok(buf);
>return sta; }
>
>#define NBUF 10
>int main() { char buf[NBUF] = ""; int sta;
> while ( (sta=toke(buf,NBUF)) != -1 )

Ditto.

> /**/;
>return 0; }
>

--
Remove del for email

luser-ex-troll

unread,
Mar 25, 2009, 9:57:50 AM3/25/09
to
On Mar 25, 7:08 am, Barry Schwarz <schwa...@dqel.com> wrote:
> On Tue, 24 Mar 2009 20:46:46 -0700 (PDT), luser-ex-troll
>
>
>
> <mijo...@yahoo.com> wrote:
> >Alright now. I think I've turned a corner with this. And, of course,
> >everyone who said "use functions!" was spot on.
>
> >Hopefully this looks more C-worthy.
>
> >lexit-real
>
> >#include <ctype.h>
> >#include <stdio.h>
> >#include <string.h>
>
> >int issign   (int c){return !!strchr("+-",c);}
> >int israd    (int c){return !!strchr("#", c);}
> >int isdot    (int c){return !!strchr(".", c);}
>
> Wouldn't return c == '.' be easier?

Yes, but I liked the symmetry.

> >int ise      (int c){return !!strchr("eE",c);}
> >int isdelim  (int c){return !!strchr("()<>{}[]%/",c);}
> >int isregular(int c){return !isspace(c);}
>
> >typedef struct test test;
> >struct test {
> >    int (*fp)(int); int y,n;
> >};
>
> You could combine the typedef and the structure definition into a
> single declaration.

Yes. I think I will. I was considering making the machines recursively
nested instead of arrays, but then I'd have to build them dynamically.

>
> >test decimal[] = {
> >/* 0*/ { issign,  1,  1 },
> >/* 1*/ { isdigit, 2, -1 },
> >/* 2*/ { isdigit, 2, -1 }, //success
> >};
> >int dec_accept(int i){ return i==2; }
>
> Would it not work just as well if you eliminated the third element of
> decimal and changed this to return i == 1?
>

I think not. That way a single "+" would be interpreted as a decimal
because it terminates in state 1. The 'n' transition is only followed
if there is a next character && it doesn't match.

Yes, irritating, but acknowledged.

> >    while(sta!=-1 && *s) {
>
> Why didn't you use buf directly instead of s?

There isn't a real reason in this function, I justed followed the same
idiom throughout. Again, notions of symmetry.

> >    if (fsm[sta].fp(*s))
> >    { sta = fsm[sta].y; s++; }
> >    else { sta = fsm[sta].n; }
> >    }
> >    sta=yes(sta);
> >return sta; }
>
> >int grok(char *buf) {
> >    if      (check(buf,decimal,dec_accept)) { printf( "dec: %s\n",
> >buf); return 0; }
>
> Here it is even worse.  And it destroys your attempt at consistent
> indentation.

I know. But it's so pretty with 85 columns. I'll split such things for
future postings, but I'm keeping it this way on disk.

> >    else if (check(buf,radix,  rad_accept)) { printf( "rad: %s\n",
> >buf); return 0; }
> >    else if (check(buf,real,  real_accept)) { printf("real: %s\n",
> >buf); return 0; }
> >    else { printf("grok? %s\n", buf); return -1; }
> >}
>
> >int puff(char *buf, int nbuf) { char *s = buf; int c;
> >    while ((c=getchar()) != EOF) {
>
> It would be more user friendly if you used '\n' as your terminator
> rather than EOF.
>

I don't understand: the program should reject an EOF and demand the
line be finished?! A newline separator is handled by isspace on the
next line.

> >    if(isspace(c) || isdelim(c))
> >        break;
> >    if(nbuf < s-buf-1)
> >        return -1;
> >    *s++ = c;
> >    }
> >    *s++ = 0;
>
> As a matter of style, recommend '\0' when assigning to a char.

Yes. But it's a pain on my tiny keyboard.

> >return 0; }
>
> >int toke(char *buf, int nbuf) { char *s=buf; int sta = 0;
> >    while(isspace(*s=getchar())) /**/;
> >    if( (sta=puff(buf+1,nbuf-1)) == -1) return -1;
>
> The value assigned to sta is never used.

Yes. I played with these lines more after posting. My favorite is:
(void)( (-1== (sta=puff(s,nbuf-1)) )
|| (-1== (sta=grok(buf)) ) );

> >    sta = grok(buf);
> >return sta; }
>
> >#define NBUF 10
> >int main() { char buf[NBUF] = ""; int sta;
> >    while ( (sta=toke(buf,NBUF)) != -1 )
>
> Ditto.
>
> >    /**/;
> >return 0; }
>

Thanks a bunch. I'll keep the lines much shorter to guard against ugly
splits. Is there any way to defend against those extra newlines?

--
lxt

luser-ex-troll

unread,
Mar 25, 2009, 2:55:07 PM3/25/09
to
Traced, debugged, sieved, and splinted; is it stylish yet?

There's still a long line (75chars), but with real C comments, it
should be safe for transmission.

/*
590(1)01:42 PM:podvig 0> make t3
cc -g -pedantic -Wall -Wextra -lm t3.c -o t3
591(1)01:43 PM:podvig 0> splint +boolint -boolops -exportlocal t3.c
Splint 3.1.2 --- 23 Aug 2008

Finished checking --- no warnings
592(1)01:43 PM:podvig 0>

*/

#include <ctype.h>
#include <stdio.h>
#include <string.h>

int israd (int c){return (int)'#'==c;}
int isdot (int c){return (int)'.'==c;}


int ise (int c){return !!strchr("eE",c);}

int issign (int c){return !!strchr("+-",c);}

int isdelim (int c){return !!strchr("()<>{}[]%/",c);}
int isregular(int c)

{return c!=EOF && !isspace(c) && !isdelim(c);}

typedef struct test test;
struct test {
int (*fp)(int); int y, n;
};

/* ^[+-]?\d+$ */
test fsm_dec[] = {


/* 0*/ { issign, 1, 1 },

/* 1*/ { isdigit, 2, -1 }, /* [+-]?! ??(\d)?? */
/* 2*/ { isdigit, 2, -1 }, /* [+-]?\d\d* yes! */
};
/*acceptable decimal?*/
int acc_dec(int i){
return i==2;
}

/* ^\d+[#][a-Z0-9]+$ */
test fsm_rad[] = {


/* 0*/ { isdigit, 1, -1 },

/* 1*/ { isdigit, 1, 2 }, /* \d\d* */
/* 2*/ { israd, 3, -1 }, /* \d\d*[^\d] */
/* 3*/ { isalnum, 4, -1 }, /* \d\d*# */
/* 4*/ { isalnum, 4, -1 }, /* \d\d*#\x\x* yes! */
};
/*acceptable radix?*/
int acc_rad(int i){
return i==4;
}

/* ^[+-]?(\d+(\.\d*)?)|(\d*\.\d+)([eE][+-]?\d+)?$ */
test fsm_real[] = {


/* 0*/ { issign, 1, 1 },

/* 1*/ { isdigit, 2, 4 }, /* [+-]? */
/* 2*/ { isdigit, 2, 3 }, /* [+-]?\d\d* yes! */
/* 3*/ { isdot, 6, 7 }, /* [+-]?\d\d*[^\d] */
/* 4*/ { isdot, 5, -1 }, /* [+-]?[^\d] */
/* 5*/ { isdigit, 6, -1 }, /* [+-]?\. */
/* 6*/ { isdigit, 6, 7 }, /* [+-]?(\d\d*)?\.\d* yes! */
/* 7*/ { ise, 8, -1 }, /* [+-]?(\d\d*)?(\.\d*)? */
/* 8*/ { issign, 9, 9 }, /* [+-]?(\d\d*)?(\.\d*)?[eE] */
/* 9*/ { isdigit, 10, -1 }, /* [+-]?(\d\d*)?(\.\d*)?[eE][+-]? */
/*10*/ { isdigit, 10, -1 }, /* [+-]?(\d\d*)?(\.\d*)?[eE][+-]?\d\d*
yes! */
};
/*acceptable real*/
int acc_real(int i){
switch(i) {case 2:case 6:case 10:return 1;}
return 0; }

int czek(char *s, test *fsm, int(*yes)(int)){


int sta = 0;
while(sta!=-1 && *s) {

if (fsm[sta].fp((int)*s))


{ sta = fsm[sta].y; s++; }
else{ sta = fsm[sta].n; }
}

sta=yes(sta); /*did it end in success?*/
return sta; }

int grok(char *s) {
if (czek(s, fsm_dec, acc_dec )) {
printf( "dec: %s\n", s); return 0; }
else if (czek(s, fsm_rad, acc_rad )) {
printf( "rad: %s\n", s); return 0; }
else if (czek(s, fsm_real,acc_real)) {
printf("real: %s\n", s); return 0; }
else {
printf("grok? %s\n", s);
}
return -1; }

int puff(char *buf, int nbuf) {

int c; char *s = buf;
while ( (c=getchar()), isregular(c) ) {
if(s-buf >= nbuf-1) return -1;
*s++ = (char)c;
}
*s = (char)0;
return 0; }

int toke(char *buf, int nbuf) {

int sta = 0; char *s=buf;
while(isspace(*s=(char)getchar())) /**/;
s++;


(void)( (-1== (sta=puff(s,nbuf-1)) )
|| (-1== (sta=grok(buf)) ) );

return sta; }

#define NBUF 10
int main() { char buf[NBUF] = "";

while (-1 != toke(buf,NBUF)) /**/;
return 0; }

/*eof*/

Keith Thompson

unread,
Mar 25, 2009, 3:15:31 PM3/25/09
to
luser-ex-troll <mij...@yahoo.com> writes:
> Traced, debugged, sieved, and splinted; is it stylish yet?

Just a few random comments; I haven't read it closely enough to make
an overall critique.

[...]

> int israd (int c){return (int)'#'==c;}
> int isdot (int c){return (int)'.'==c;}

The cast is unnecessary; character constants are already of type int.
(And I personally dislike the "constant == variable" style, but I know
a lot of people like and use it.)

> int ise (int c){return !!strchr("eE",c);}

I would have written this as
strchr(...) != NULL
rather than
!!strchr(...)

Trsnss s nt lwys a vrt.

Excuse men, I mean:
Terseness is not always a virtue.

[...]

> int toke(char *buf, int nbuf) {
> int sta = 0; char *s=buf;
> while(isspace(*s=(char)getchar())) /**/;

The cast is unnecessary; without it, the result of getchar() will be
implicitly converted to char by the assignment.

In fact, *most* casts are unnecessary.

> s++;
> (void)( (-1== (sta=puff(s,nbuf-1)) )
> || (-1== (sta=grok(buf)) ) );
> return sta; }

In the statement before the return, you're computing results and
throwing them away -- and discarding clarity along with them. I think
the statement is equivalent to:

if ((sta=puff(s,nbuf-1)) != -1) {
sta=grok(buf);
}

or, even better:

if ((sta = puff(s, nbuf - 1)) != -1) {
sta = grok(buf);
}


> #define NBUF 10
> int main() { char buf[NBUF] = "";
> while (-1 != toke(buf,NBUF)) /**/;
> return 0; }
>
> /*eof*/

I find your code layout to be jarring. I'd write the above as:

int main(void)
{
char buf[NBUF] = "";
while (-1 != toke(buf, NBUF)) {
continue;
}
return 0;
}

(Actually there are some other changes I'd make, but I limited myself
to adding the void keyword and changing the layout.)

luser-ex-troll

unread,
Mar 25, 2009, 4:16:50 PM3/25/09
to
On Mar 25, 2:15 pm, Keith Thompson <ks...@mib.org> wrote:

> luser-ex-troll <mijo...@yahoo.com> writes:
> > Traced, debugged, sieved, and splinted; is it stylish yet?
>
> Just a few random comments; I haven't read it closely enough to make
> an overall critique.

I'm grateful nonetheless.

> [...]
>
> > int israd    (int c){return (int)'#'==c;}
> > int isdot    (int c){return (int)'.'==c;}
>
> The cast is unnecessary; character constants are already of type int.
> (And I personally dislike the "constant == variable" style, but I know
> a lot of people like and use it.)
>

I chose it here so the 'c's would line up nicely relative to the
strchr counterparts, and then followed through elsewhere for
consistency.

> > int ise      (int c){return !!strchr("eE",c);}
>
> I would have written this as
>     strchr(...) != NULL
> rather than
>     !!strchr(...)
>
> Trsnss s nt lwys a vrt.
>
> Excuse men, I mean:
> Terseness is not always a virtue.
>

With the exception of wooing a Russian princess will billiard chalk, I
agree. I started with no punctuation at all but splint complained
about type mismatching, and with focus on brevity, I opted for the
minimal syntax adjustment (another option was adding -type to splint,
but that seemed to be throwing out the baby).

> [...]
>
> > int toke(char *buf, int nbuf) {
> >     int sta = 0; char *s=buf;
> >     while(isspace(*s=(char)getchar())) /**/;
>
> The cast is unnecessary; without it, the result of getchar() will be
> implicitly converted to char by the assignment.
>
> In fact, *most* casts are unnecessary.
>

Indeed, but splint demands either this or a +charint switch.

> >     s++;
> >     (void)( (-1== (sta=puff(s,nbuf-1)) )
> >          || (-1== (sta=grok(buf))      ) );
> > return sta; }
>
> In the statement before the return, you're computing results and
> throwing them away -- and discarding clarity along with them.  I think
> the statement is equivalent to:
>
>     if ((sta=puff(s,nbuf-1)) != -1) {
>         sta=grok(buf);
>     }
>
> or, even better:
>
>     if ((sta = puff(s, nbuf - 1)) != -1) {
>         sta = grok(buf);
>     }
>

Yes. That's 500% better. Thanks.

> > #define NBUF 10
> > int main() { char buf[NBUF] = "";
> >     while (-1 != toke(buf,NBUF)) /**/;
> > return 0; }
>
> > /*eof*/
>
> I find your code layout to be jarring.  I'd write the above as:
>
>     int main(void)
>     {
>         char buf[NBUF] = "";
>         while (-1 != toke(buf, NBUF)) {
>             continue;
>         }
>         return 0;
>     }
>
> (Actually there are some other changes I'd make, but I limited myself
> to adding the void keyword and changing the layout.)

Seriously? 8 lines versus 3? Perhaps my hardware constraints have
suggested more terseness (The olpc xo-1 has a 6"x4.5" lcd), but here I
think it really pays off. It is, after all, a stub for testing the
module before linking into the larger program.

But I'm truly curious to know how you would really format it: blank
lines after declarations and before return?

--
l*t
? fix post stand under you do

Keith Thompson

unread,
Mar 25, 2009, 4:43:32 PM3/25/09
to
luser-ex-troll <mij...@yahoo.com> writes:
> On Mar 25, 2:15 pm, Keith Thompson <ks...@mib.org> wrote:
>> luser-ex-troll <mijo...@yahoo.com> writes:
[...]

>> > #define NBUF 10
>> > int main() { char buf[NBUF] = "";
>> >     while (-1 != toke(buf,NBUF)) /**/;
>> > return 0; }
>>
>> > /*eof*/
>>
>> I find your code layout to be jarring.  I'd write the above as:
>>
>>     int main(void)
>>     {
>>         char buf[NBUF] = "";
>>         while (-1 != toke(buf, NBUF)) {
>>             continue;
>>         }
>>         return 0;
>>     }
>>
>> (Actually there are some other changes I'd make, but I limited myself
>> to adding the void keyword and changing the layout.)
>
> Seriously? 8 lines versus 3? Perhaps my hardware constraints have
> suggested more terseness (The olpc xo-1 has a 6"x4.5" lcd), but here I
> think it really pays off. It is, after all, a stub for testing the
> module before linking into the larger program.
>
> But I'm truly curious to know how you would really format it: blank
> lines after declarations and before return?

There are several minor style points here on which I'm undecided. I
might put the opening brace for the function either on the same line
as the prototype or on the next line by itself; the former is more
consistent with they way I use braces in other contexts, and the
latter is probably a throwback to K&R C, where parameter declarations
are typically separated from the function declaration.

How to write a loop with an emtpy body is another thing on which I'm
undecided. I always use braces for compound statements, even when
they're not necessary (a habit I picked up from Perl where they're
always mandatory, but I find it safer and more consistent in C as
well). I used the "continue" keyword here because I think it clearly
expresses what's going on; I might use an empty comment instead if I
were in the mood. I wouldn't use
while (condition);
because it's just too terse for my tastes, and too easy to mistake for
a typo. Your own empty comment on the same line isn't bad.

I might put a blank line between the declarations and statements, but
I might not bother for something this small.

I think I see why you put the return statement on a different
indentation than the rest of the function body, but I wouldn't do it
that way; syntactically, return is just another statement. And I
really dislike putting code on a line after a '{', or before a '}'.

Here's another way I might write it if I were a bit more concerned
with vertical space.

int main(void) {
char buf[NBUF] = "";

while (toke(buf, NBUF) != -1) continue;
return 0;
}

In real life, I'd follow my employer's coding standards if I were
writing code for work, or the style of the existing code if I were
working on an existing project. But if I were writing my own code for
my own purposes, I'd feel free to indulge my own idiosyncracies (which
are of course far more rational and consistent than everyone else's
idiosyncracies).

luser-ex-troll

unread,
Mar 25, 2009, 5:15:50 PM3/25/09
to

Thanks. This is exactly what I've been itching for.

My final super-terse version actually began like the 5-line version
here. Then while doing lots of scrolling up and down I thought: J J
<< . And then it seemed nice to put the return type and the return
value on the same level to help keep straight when the int is
representing true/false as 1/0 or 0/-1. I don't plan on adopting this
format as a general habit, but the specific situation suggested a
benefit.

Fortunately, I am at my own mercy for decisions of this sort, but that
makes me responsible for the decision as well.

For most purposes I agree with your recommendations and appreciate the
sound ground upon which they stand.

--
lxt
his name is bobo. he knows no mercy.

pete

unread,
Mar 25, 2009, 6:14:58 PM3/25/09
to
Keith Thompson wrote:
> I wouldn't use
> while (condition);
> because it's just too terse for my tastes, and too easy to mistake for
> a typo.

I would write that this way:

while (condition) {
;
}

--
pete

luser-ex-troll

unread,
Mar 25, 2009, 6:22:44 PM3/25/09
to

nice. but what about:

while(condition){;}

?

--
lxt
as funky as you wan na be

Richard

unread,
Mar 25, 2009, 6:27:18 PM3/25/09
to
luser-ex-troll <mij...@yahoo.com> writes:

Any C programmer capable of following any C is not going to think

while (condition);

is a typo if there is no further bracketing and indentation.

while(condition){;}

and

while (condition) {
;
}

are far less obvious or readable IMO.

pete

unread,
Mar 25, 2009, 6:35:48 PM3/25/09
to
Richard wrote:
> luser-ex-troll <mij...@yahoo.com> writes:
>
>> On Mar 25, 5:14 pm, pete <pfil...@mindspring.com> wrote:
>>> Keith Thompson wrote:
>>>> I wouldn't use
>>>> while (condition);
>>>> because it's just too terse for my tastes, and too easy to mistake for
>>>> a typo.
>>> I would write that this way:
>>>
>>> while (condition) {
>>> ;
>>> }

> Any C programmer capable of following any C is not going to think
>
> while (condition);
>
> is a typo if there is no further bracketing and indentation.

That's not the entire problem.
Sometimes the next line can be easily mistaken
for part of the loop.

--
pete

luser-ex-troll

unread,
Mar 25, 2009, 6:50:28 PM3/25/09
to
On Mar 25, 5:35 pm, pete <pfil...@mindspring.com> wrote:
> Richard wrote:

what about?

while(condition) { /* (: ;) */; }

smiles are free!

--
lxt

Keith Thompson

unread,
Mar 25, 2009, 7:09:16 PM3/25/09
to
luser-ex-troll <mij...@yahoo.com> writes:
[...]

> what about?
>
> while(condition) { /* (: ;) */; }
>
> smiles are free!

It's cute.

Determining whether I meant that as a compliment is left as an
exercise.

luser-ex-troll

unread,
Mar 25, 2009, 7:38:05 PM3/25/09
to
On Mar 25, 6:09 pm, Keith Thompson <ks...@mib.org> wrote:

> luser-ex-troll <mijo...@yahoo.com> writes:
>
> [...]
>
> > what about?
>
> > while(condition) { /* (: ;) */; }
>
> > smiles are free!
>
> It's cute.
>
> Determining whether I meant that as a compliment is left as an
> exercise.
>

Though that remain a mystery (subjunctive, right? cause /he/ knows),
this thread should provide useful fodder for anyone searching the
archive for replacing gotos, coding a finite state machine to execute
a regular expression, how to make C look like something else, and
unnecessary cuteness.

--
lxt
"Real programmers set the universal constants at the start such that
the
universe evolves to contain the disk with the data they want."

Nate Eldredge

unread,
Mar 25, 2009, 7:39:51 PM3/25/09
to
luser-ex-troll <mij...@yahoo.com> writes:

I personally tend to write

while (condition) ;

I think the added space draws more attention to the empty body, since I
ordinarily don't leave spaces before semicolons at the end of a statement.

I also sometimes do

while (condition) /* keep going */ ;

pete

unread,
Mar 25, 2009, 7:55:22 PM3/25/09
to

I don't like to write trivial comments.
The thing about comments, especially trivial ones,
is that they don't always get updated
when the code changes when the programmers are busy
trying to get the code to do what they want it to do.

Working with deadlines, causes programmers to try to work fast.
I like having conventions which reduce the amount of decissions
I have to make about how to write something.

I have never worked in a place where the coding conventions
prohibited one line looops, but I have heard of it.

--
pete

CBFalconer

unread,
Mar 25, 2009, 7:16:47 PM3/25/09
to

I would write:

while (condition) continue;

Richard Heathfield

unread,
Mar 25, 2009, 9:04:18 PM3/25/09
to
CBFalconer said:

> pete wrote:
>> Keith Thompson wrote:
>>
>>> I wouldn't use
>>> while (condition);
>>> because it's just too terse for my tastes, and too easy to
>>> mistake for a typo.
>>
>> I would write that this way:
>>
>> while (condition) {
>> ;
>> }
>
> I would write:
>
> while (condition) continue;

Putting the 'continue' on a separate line makes it clearer exactly
when the condition is met (when stepping through the code with a
debugger - not something I do a lot nowadays, but I used to, and
some people still do). And using the (optional) braces to mark off
the loop body is a good habit to get into, as it can save all kinds
of embarrassment later when the loop is maintained.

--
Richard Heathfield <http://www.cpax.org.uk>
Email: -http://www. +rjh@
Google users: <http://www.cpax.org.uk/prg/writings/googly.php>
"Usenet is a strange place" - dmr 29 July 1999

Richard Harter

unread,
Mar 25, 2009, 10:03:51 PM3/25/09
to
On Wed, 25 Mar 2009 17:14:58 -0500, pete <pfi...@mindspring.com>
wrote:

My view on this is that a loop without a body should be obvious
to the reader. If the next line then it is a one line loop.
Either

while (condition);
or
while (condition) {}

is okay; anything more is sheer pointless ugliness.

CBFalconer

unread,
Mar 25, 2009, 10:04:59 PM3/25/09
to
Richard Heathfield wrote:
> CBFalconer said:
>> pete wrote:
>>> Keith Thompson wrote:
>>>
>>>> I wouldn't use
>>>> while (condition);
>>>> because it's just too terse for my tastes, and too easy to
>>>> mistake for a typo.
>>>
>>> I would write that this way:
>>>
>>> while (condition) {
>>> ;
>>> }
>>
>> I would write:
>>
>> while (condition) continue;
>
> Putting the 'continue' on a separate line makes it clearer exactly
> when the condition is met (when stepping through the code with a
> debugger - not something I do a lot nowadays, but I used to, and
> some people still do). And using the (optional) braces to mark off
> the loop body is a good habit to get into, as it can save all kinds
> of embarrassment later when the loop is maintained.

Since those are not among my objectives, I would do none of them.

Richard Harter

unread,
Mar 25, 2009, 11:16:21 PM3/25/09
to
On Thu, 26 Mar 2009 02:03:51 GMT, c...@tiac.net (Richard Harter)
wrote:

>On Wed, 25 Mar 2009 17:14:58 -0500, pete <pfi...@mindspring.com>
>wrote:
>
>>Keith Thompson wrote:
>>> I wouldn't use
>>> while (condition);
>>> because it's just too terse for my tastes, and too easy to mistake for
>>> a typo.
>>
>>I would write that this way:
>>
>> while (condition) {
>> ;
>> }
>
>My view on this is that a loop without a body should be obvious
>to the reader. If the next line then it is a one line loop.

^ is not indented

pete

unread,
Mar 26, 2009, 12:36:34 AM3/26/09
to
Richard Harter wrote:
> On Thu, 26 Mar 2009 02:03:51 GMT, c...@tiac.net (Richard Harter)
> wrote:
>
>> On Wed, 25 Mar 2009 17:14:58 -0500, pete <pfi...@mindspring.com>
>> wrote:
>>
>>> Keith Thompson wrote:
>>>> I wouldn't use
>>>> while (condition);
>>>> because it's just too terse for my tastes, and too easy to mistake for
>>>> a typo.
>>> I would write that this way:
>>>
>>> while (condition) {
>>> ;
>>> }
>> My view on this is that a loop without a body should be obvious
>> to the reader. If the next line then it is a one line loop.
> ^ is not indented

I've seen code where the next line *is* indented.

--
pete

Nate Eldredge

unread,
Mar 26, 2009, 2:06:28 AM3/26/09
to
pete <pfi...@mindspring.com> writes:

> Nate Eldredge wrote:
>> I personally tend to write
>>
>> while (condition) ;
>>
>> I think the added space draws more attention to the empty body, since I
>> ordinarily don't leave spaces before semicolons at the end of a statement.
>>
>> I also sometimes do
>>
>> while (condition) /* keep going */ ;
>
> I don't like to write trivial comments.
> The thing about comments, especially trivial ones,
> is that they don't always get updated
> when the code changes when the programmers are busy
> trying to get the code to do what they want it to do.

It's a good point. I think this is a little different from the usual

x = 3; /* assign 3 to x */

because in some sense the comment *is* the code. But it's a subtle
distinction even in my mind.

Han from China

unread,
Mar 26, 2009, 1:05:10 AM3/26/09
to
Richard Harter wrote:
> My view on this is that a loop without a body should be obvious
> to the reader. If the next line then it is a one line loop.
> Either
>
> while (condition);

Yes, while(condition); should suffice. If you hire someone who
can't see the empty loop body in that, then you need to have
a word with HR, since that kind of employee is probably going
to screw up the code during work and clog the water cooler
with crazy putty during lunch breaks. However, the economy
being what it is, if you must hire someone like that, then
the following should be adequate:

/*****************************************************/
/* EMPTY LOOP BODY BELOW THE BELOW COMMENT, DUMBASS! */
/* EMPTY LOOP BODY BELOW, DUMBASS! */
while(condition); /* HERE IT IS! */
/* EMPTY LOOP BODY ABOVE, DUMBASS! */
/* EMPTY LOOP BODY ABOVE THE ABOVE COMMENT, DUMBASS! */
/*****************************************************/

Yours,
Han from China

--
"Only entropy comes easy." -- Anton Chekhov

Han from China

unread,
Mar 26, 2009, 1:12:59 AM3/26/09
to
Richard Harter wrote:
>> Richard Harter, c...@tiac.net
>> http://home.tiac.net/~cri, http://www.varinoma.com
>> If I do not see as far as others, it is because
>> I stand in the footprints of giants.

Just to beat the others to it: Richard, DON'T QUOTE SIGS. It's
against THE RULES, and I need THE RULES to have some kind of
ORDER without STRESS, since with STRESS, my MEDICATION doesn't
have the full BENEFITS. Please DON'T quote SIGS. Thank YOU.

P.S. two CRACKS on the sidewalk, MUST avoid the CRACKS.

Phil Carmody

unread,
Mar 26, 2009, 3:32:35 AM3/26/09
to
CBFalconer <cbfal...@yahoo.com> writes:
> Richard Heathfield wrote:
>> CBFalconer said:
>>> pete wrote:
>>>> Keith Thompson wrote:
>>>>
>>>>> I wouldn't use
>>>>> while (condition);
>>>>> because it's just too terse for my tastes, and too easy to
>>>>> mistake for a typo.
>>>>
>>>> I would write that this way:
>>>>
>>>> while (condition) {
>>>> ;
>>>> }
>>>
>>> I would write:
>>>
>>> while (condition) continue;
>>
>> Putting the 'continue' on a separate line makes it clearer exactly
>> when the condition is met (when stepping through the code with a
>> debugger - not something I do a lot nowadays, but I used to, and
>> some people still do). And using the (optional) braces to mark off
>> the loop body is a good habit to get into, as it can save all kinds
>> of embarrassment later when the loop is maintained.
>
> Since those are not among my objectives, I would do none of them.

You never intend to maintain your code? Yikes, that's a
scary attitude.

Phil
--
Marijuana is indeed a dangerous drug.
It causes governments to wage war against their own people.
-- Dave Seaman (sci.math, 19 Mar 2009)

Flash Gordon

unread,
Mar 26, 2009, 3:07:21 AM3/26/09
to

Someone else once suggested
while (condition) continue;
I decided I like it so now use it.
--
Flash Gordon

Phil Carmody

unread,
Mar 26, 2009, 4:23:46 AM3/26/09
to
Unwrapping the echelon, putting names next to styles:

>>> On Mar 25, 5:14 pm, pete <pfil...@mindspring.com> wrote:

while (condition) {
;
}


>> luser-ex-troll <mij...@yahoo.com> writes:
while(condition){;}


> Nate Eldredge wrote:
while (condition) ;


while (condition) /* keep going */ ;


Flash Gordon <sm...@spam.causeway.com> writes:
while (condition) continue;


I can see continue's merits. I might try that and see if it fits.
I find I don't use continue for anything (I've even worked in a
company where it was against the coding standards as it was a
'confusing minority technique' or some crap like that), so it
seems as if it might feel odd typing it. At least the first few
times.

Traditionally I'm between Pete and l-e-t:
while(condition) { ; }
as I like to draw a little attention, but only a little, to the
empty block. I'm a religious block-rather-than-statement user.

nick_keigh...@hotmail.com

unread,
Mar 26, 2009, 4:33:00 AM3/26/09
to
On 26 Mar, 04:36, pete <pfil...@mindspring.com> wrote:
> Richard Harter wrote:
> > On Thu, 26 Mar 2009 02:03:51 GMT, c...@tiac.net (Richard Harter)
> > wrote:
> >> On Wed, 25 Mar 2009 17:14:58 -0500, pete <pfil...@mindspring.com>
> >> wrote:
> >>> Keith Thompson wrote:

> >>>> I wouldn't use
> >>>>     while (condition);

> >>>> because it's just too terse for my tastes, and too easy to mistake for
> >>>> a typo.
> >>> I would write that this way:
>
> >>>     while (condition) {
> >>>         ;
> >>>     }
> >> My view on this is that a loop without a body should be obvious
> >> to the reader.  If the next line then it is a one line loop.
> >                                  ^ is not indented
>
> I've seen code where the next line *is* indented.

that's my style

while (condition)
;

I like the ; on a line of it's own so it stands out


Richard Heathfield

unread,
Mar 26, 2009, 7:08:18 AM3/26/09
to
CBFalconer said:

> Richard Heathfield wrote:
>> CBFalconer said:

<snip>

>>> I would write:
>>>
>>> while (condition) continue;
>>
>> Putting the 'continue' on a separate line makes it clearer
>> exactly when the condition is met (when stepping through the code
>> with a debugger - not something I do a lot nowadays, but I used
>> to, and some people still do). And using the (optional) braces to
>> mark off the loop body is a good habit to get into, as it can
>> save all kinds of embarrassment later when the loop is
>> maintained.
>
> Since those are not among my objectives, I would do none of them.

That certainly shows consistency, anyway. But my advice was not
aimed at you. I have long since given up trying to educate you, but
your mistakes can be used to educate others.

pete

unread,
Mar 26, 2009, 7:31:11 AM3/26/09