embarrassing spaghetti code needs stylistic advice

luser-ex-troll

unread,

Mar 19, 2009, 11:12:04 PM3/19/09

to

Hello all.
I have a problem of a somewhat different kind than the usual post. My
code works! It's just appallingly ugly. With my attention focused
sharply on clear and consistent data structures, the most important
function in my nascent postscript interpreter, the lexical scanner,
has degenerated into spaghetti.

It happened incrementally so I didn't really worry
about it until it became overwhelmingly obvious
that what I've got is terribly, horribly ugly.

I realize that this is a large post, but I couldn't
trim it any shorter without making it either
incomplete (and non-functional) or no longer
representative of the problem.

Specifically the problem is the toke function
which scans a string or file to create an object
(tag-union, variant-record). It's constructed
as a series of tests and loops within a big loop,
but uses goto to change its mind about what
type of object it has found (eg. '+' followed
by a digit is a noise character introducing the
number, but followed by anything else, it's an
executable name).

I can't seem to think of a control structure to replace it with that
affords the same flexibility.

tia.
lxt
ps. feel free to trim the entire code from any
responses. I realize it's quite long for this
medium.
/* tokentest.c
the scanner playpen
*/

#include <ctype.h>
#include <stdbool.h> //true false
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
//#include "object.h"
/* object.h
global constants
object structures and typedefs
*/

//limits
#define MAXNAMES 1000
#define MAXTOKEN 256
#define OSSIZE 500
#define ESSIZE 250
#define DSSIZE 20

/* Objects */

#define Types \
X(null, int dummy) \
X(mark, int dummy2) \
X(boolean, bool b) \
X(integer, int i) \
X(real, float f) \
X(name, int n) \
X(string, String *s) \
X(array, Array *a) \
X(dict, Dict *d) \
X(operator, Operator op) \
X(file, FILE *file) \
X(font, void *font) \
X(packedarray, void *pa) \
X(save, void *save) \

struct s_operator {
char *name;
void (*fp)();
};

typedef struct s_object Object;
typedef struct s_string String;
typedef struct s_array Array;
typedef struct s_dict Dict;
typedef struct s_operator Operator;
struct s_object {
#define X(a, b) a ## type ,
enum e_type { Types } type;
#undef X
unsigned char flags;
#define READ 1
#define WRITE 2
#define EXEC 4
#define COMP 8
#define X(a, b) b;
union { Types } u;
#undef X
};

struct s_string {
int ref;
size_t length;
struct s_string *copyof;
char *s; };

struct s_array {
int ref;
size_t length;
struct s_array *copyof;
Object *a; };

struct s_pair { Object key, value; };
struct s_dict {
int ref;
size_t length;
size_t maxlength;
struct s_pair *p; };

// Singular Objects
Object null;
Object mark;

// exported functions
int error (char *fmt, ...);
Object boolean (char b);
Object integer (int i);
Object real (float f);

char *names[MAXNAMES];
//int nameslen;
Object name (char *s);

Object stringn (int n);
Object string (char *s);
void dec_string (String *s);
void inc_string (String *s);
String * substring (String *s, size_t offset, size_t length);

Object array (int n);
void dec_array (Array *a);
void inc_array (Array *a);
Object car (Array *a);
Array * cdr (Array *a);
Array * subarray (Array *a, size_t offset, size_t length);

Object dict (int n);
int eq (Object a, Object b);
struct
s_pair * lookup (Dict *d, Object key);
bool define (Dict *d, Object key, Object value);
void dec_dict (Dict *d);
void inc_dict (Dict *d);

void dec (Object *o);
void inc (Object *o);

Object executable (Object o);
Object operator (char *name, void (*fp)());

/* eof: object.h */

//#include "system.h"
/* system.h
stacks and operators
*/

#define X(a, b) #a "type",
char *typestring[] = { Types }; //names for enum e_type type member of
Object
#undef X
int defer_exec;
int defer_paren;
int quitflag;

Object os[OSSIZE];
Object *tos = os;
#define push(obj) \
(tos != os+OSSIZE)? *(tos++) = obj: (error("stackoverflow"),null)
#define pop ( (tos!=os)? (*(--tos)): (error("stackunderflow"),null) )

Object es[ESSIZE];
Object *tes = es;
#define pushe(obj) \
(tes != es+ESSIZE)? *(tes++) = obj: (error("execstackoverflow"),null)
#define pope ( (tes!=es)? (*(--tes)): (error
("execstackunderflow"),null) )

Object ds[DSSIZE];
Object *tds = ds;
#define pushd(obj) \
(tds != ds+DSSIZE)? *(tds++) = obj: (error("dictstackoverflow"),null)
#define popd ( (tds!=ds)? (*(--tds)): (error
("dictstackunderflow"),null) )

/* operator helpers */

#define stackunder(n,op) ( (tos-os >= n)?: error("stackunderflow in "
#op) )
#define typecheck(ob,tp,op) \
( (ob.type == tp ## type)?: error("typecheck in " #op) )
#define xcheck(ob,op) \
(ob.flags & EXEC)? 0: error("typecheck in " #op)

/* Operators */

/* Miscellaneous Operators */
void Oprompt ();

/* eof system.h */

int sgetc(String *s) {
if (s->length == 0) return EOF;
s->length--;
return *(s->s++);
//s->s++;
//return s->s[-1];
}

int Snext(Object s) {
return sgetc(s.u.s);
}

void Sback(int c, Object s) {
s.u.s->length++;
*(--(s.u.s->s)) = c; //back it up, follow the pointer, store
}

int Fnext(Object f) {
return fgetc(f.u.file);
}

void Fback(int c, Object f) {
ungetc(c, f.u.file);
}

// called by Otoken, below
Object toke(Object src, int (*next)(Object), void (*back)(int,
Object)) {
int i;
int d = 0;
bool negate = false;
char *punct = "()<>[]{}/%";
char s[MAXTOKEN];
char *sp = s;
#define NEXT if ((i=next(src)) == EOF) goto fail
#define NEXTor if ((i=next(src)) == EOF)
#define BACK back(i,src)

while ( (i = next(src)) != EOF ) {
top:
if(i == '\n') { Oprompt(); } //newline
if(isspace(i)) continue; //whitespace _/comments
if(i == '%') { do { NEXT; } while(i != '\n'); goto top; }
if(i == '+') { //optional +
NEXTor goto single;
if(!isdigit(i)) { BACK; i = '+'; goto aname; }
i -= '0';
goto digit; }

if(i == '-') { //optional -
NEXTor goto single;
if(!isdigit(i)) { BACK; i = '-'; goto aname; }
i -= '0'; negate = true;
goto digit; }

if(isdigit(i)) { //digits
do {
i -= '0';
d *= 10;
digit: d += i;
NEXTor goto digitskipback;
if (i == '.') goto real;
if (i == '#') goto radix;
//TODO E notation
} while (isdigit(i));
BACK;
digitskipback:
if (negate) d *= -1;
return integer(d); }

goto after_real;
real: { float f; //b/c f is a FILE *
int e;
f = (float)d; //the positive integer so far
d = 0;
e = 1;
NEXTor goto floatskipback;
while(isdigit(i)) {
i -= '0';
d *= 10;
e *= 10;
d += i;
NEXTor goto floatskipback;
}
//TODO E notation
BACK;
floatskipback:
f += (float)d/(float)e;
if (negate) f *= -1;
return real(f); }
after_real:

goto after_radix;
radix: { int r = d;
if (r > 36) error("badradix syntaxerror in token");
if (r < 2) error("badradix syntaxerror in token");
NEXTor goto radixskipback;
d = 0;
do {
if (isdigit(i)) i -= '0';
else if (islower(i)) i -= 'a'+10;
else if (isupper(i)) i -= 'A'+10;
else error("badradixdigit syntaxerror in token");
d *= r;
d += i;
NEXTor goto radixskipback;
} while(isalnum(i));
BACK;
radixskipback:
return integer(d); }
after_radix:

if(i == '(') { // string
defer_paren = 1;
NEXTor goto syntaxerror;
if (i == ')') defer_paren--;
while (defer_paren) {
if (i == '\n') Oprompt();
if (i == '(') defer_paren++;
//TODO octal and hex
if (i == '\\') {
NEXTor goto syntaxerror;
switch(i) {
case '\n': Oprompt(); goto skip;
case 'a': i = '\a'; break;
case 'b': i = '\b'; break;
case 'f': i = '\f'; break;
case 'n': i = '\n'; break;
case 'r': i = '\r'; break;
case 't': i = '\t'; break;
case 'v': i = '\v'; break;
case '(': case ')':
case '\'': case '\"':
case '?': case '\\': break;
default: error("syntaxerror (string\\escape) in token");
}
}
*sp++ = (char)i;
if (sp-s > MAXTOKEN) error("limitcheck in token");
skip: NEXTor goto syntaxerror;
if (i == ')') defer_paren--;
}
*sp++ = 0;
//no BACK! eat the paren
return string(s); }

if(i == '/') { // literal name
NEXTor goto litnameskipback;
do {
*sp++ = (char)i;
NEXTor goto litnameskipback;
} while(isgraph(i) && strchr(punct,i)==NULL );
BACK;
litnameskipback:
*sp = 0;
return name(s); }

if(strchr("[]", i)) { // array
single: s[0] = (char)i; s[1] = 0;
return executable(name(s)); }

if(i == '{') { //procedures
typedef struct s_cord Fish;
struct s_cord { Object o; struct s_cord *link; };
Fish *head, *tail;
Object o, fin;
size_t i, len = 0;

fin = name("}"); /* make a list */
(void)((head=malloc(sizeof *head)) ||error("VMerror in token"));
tail = head;
do { tail->o = toke(src,next,back);
if ( eq(tail->o,fin) ) break;
len++;
(void)((tail->link=malloc(sizeof *tail)) ||error("VMerror in
token"));
tail = tail->link;
tail->link = NULL; /* possibly unnecessary */
} while(1);

o = array((int)len); /* turn list into array */
tail = head; /* fish becomes worm which eats itself */
for(i=0;i<len;i++) {
o.u.a->a[i] = tail->o;
head = tail->link;
free(tail);
tail = head;
}
free(head); //"}" equiv to free(tail), but this looks more
symmetrical
return executable(o);
}

if(i == '}') {
return executable(name("}"));
}

if(isgraph(i)) { //executable names
do {
aname: *sp++ = (char)i;
NEXTor goto nameskipback;
} while(isgraph(i) && !isspace(i) && strchr(punct,i)==NULL );
BACK;
nameskipback:
*sp = 0;
return executable(name(s)); }

syntaxerror:
error("syntaxerror in token");
} //while

fail:
return null;
}

void Otoken() {
Object o;
Object src;
stackunder(1,token);
src = pop;
switch(src.type) {
case stringtype: push(src);
o = toke(src, Snext, Sback);
dec(&src);
break;
case filetype:
o = toke(src, Fnext, Fback);
break;
default: error("typecheck in token");
}

if (o.type == nulltype) { push(boolean(false)); }
else {
if(eq(o,name("}"))) { error("unmatchedmark in token"); }
else { push(o); push(boolean(true)); }
}
}

int main() {
bool done = false;

push(string("this is a string"));
while(!done) {
Otoken(); //executable names
if (pop.u.b) { //check boolean return value
Object o;
o = pop;
if (o.type == nametype) {
printf("!grOK: name, %s\n", names[o.u.n]);
}
} else {
printf("!grNAK: failed to read a token");
done = true;
}
}

return 0;
}

/* eof token.c */

/* object.c
error function (to avoid a main.h or misc.c)
object allocators
and storage for singular objects null and mark
*/

#include <float.h> //FLT_EPSILON
#include <math.h> //fabsf
#include <stdarg.h> //...
#include <stdbool.h> //true false
#include <stdio.h> //vfprintf
#include <stdlib.h> //exit malloc free
#include <string.h> //strcmp strdup
//#include "object.h"

int error(char *fmt, ...) {
va_list argptr;
va_start( argptr, fmt );
(void)vfprintf(stderr, fmt, argptr);
(void)fputc('\n',stderr);
va_end(argptr);
exit(EXIT_FAILURE);
}

/* Singular objects */

Object null = { .type = nulltype, .flags = 0, .u.dummy = 0};
Object mark = { .type = marktype, .flags = 0, .u.dummy2 = 0};

/* Object Allocators and Convenience Functions */

Object boolean (char b) {
Object o = { .type = booleantype, .flags = 0, .u.b = b };
return o;
}

Object integer (int i) {
Object o = { .type = integertype, .flags = 0, .u.i = i };
return o;
}

Object real (float f) {
Object o = { .type = realtype, .flags = 0, .u.f = f };
return o;
}

char *names[MAXNAMES];
int nameslen = 0;
Object name (char *s) {
Object o = { .type = nametype, .flags = 0, .u.dummy = 0 };
int i;
for (i=0; i<nameslen; i++) { //look
if (strcmp(s, names[i]) == 0) { //found
o.u.n = i;
return o;
}
}
o.u.n = i; //new
names[i] = strdup(s);
nameslen++;
return o;
}

Object stringn (int n) {
Object o = { .type = stringtype, .flags = COMP, .u.dummy = 0 };
(void)((o.u.s = (String *)malloc(sizeof *o.u.s))
|| error("VMerror in stringn"));
o.u.s->ref = 1;
o.u.s->length = (size_t)n;
o.u.s->copyof = NULL;
(void)((o.u.s->s = malloc((size_t)n+1))
|| error("VMerror in stringn"));
return o; }

String *substring(String *s, size_t offset, size_t length) {
String *new;
if (offset+length > s->length)
error("rangecheck in substring");
(void)((new = malloc(sizeof *new))
|| error("VMerror in substring"));
new->ref = 1;
new->length = length;
new->copyof = s;
new->s = s->s + offset;
return new;
}

Object string (char *s) {
Object o;
size_t n;
n = strlen(s);
o = stringn((int)n);
strcpy(o.u.s->s, s);
//make substring so you can play with the pointer
//and dec can still free it later.
o.u.s = substring(o.u.s, 0, o.u.s->length);
return o; }

void dec_string(String *s) {
if (--s->ref == 0) {
if (s->copyof) dec_string(s->copyof);
else free(s->s);
free(s);
}
}

void inc_string(String *s) { s->ref++; }

Object array (int n) {
Object o = { .type = arraytype, .flags = COMP, .u.dummy = 0 };
(void)((o.u.a = (Array *)malloc(sizeof *o.u.a))
|| error("VMerror in array"));
o.u.a->ref = 1;
o.u.a->length = (size_t)n;
o.u.a->copyof = NULL;
(void)((o.u.a->a = (Object *)calloc((size_t)n, sizeof o))
|| error("VMerror in array"));
return o; }

void dec_array(Array *a) {
if (--a->ref == 0) {
int i;
for (i=0; i < (int)a->length; i++) {
//kill elements
dec(a->a + i);
}
if(a->copyof) dec_array(a->copyof);
else free(a->a);
free(a);
}
}

void inc_array(Array *a) { a->ref++; }

Array *subarray(Array *a, size_t offset, size_t length) {
Array *new;
if (offset+length > a->length)
error("rangecheck in subarray");
(void)((new = malloc(sizeof *new))
|| error("VMerror in subarray"));
new->ref = 1;
new->length = length;
new->copyof = a;
inc_array(a);
new->a = a->a + offset;
return new;
}

Object car(Array *a) { return a->a[0]; }

Array *cdr(Array *a) { return subarray(a, 1, a->length-1); }

Object dict (int n) {
Object o = { .type = dicttype, .flags = COMP, .u.dummy = 0 };
(void)((o.u.d = (Dict *)malloc(sizeof *o.u.d))
|| error("VMerror in dict"));
o.u.d->ref = 1;
o.u.d->maxlength = (size_t)n;
o.u.d->length = 0;
(void)((o.u.d->p = (struct s_pair *)calloc((size_t)n,sizeof *o.u.d-
>p))
|| error("VMerror in dict"));
return o; }

int eq (Object a, Object b) {
if (a.type != b.type) { return false; }
switch(a.type) {
case nulltype:
case marktype: return true;
case booleantype: return a.u.b == b.u.b;
case nametype: //ints
case integertype: return a.u.i == b.u.i;
case realtype: return (fabsf(a.u.f - b.u.f) > FLT_EPSILON);
case stringtype: return (strcmp(a.u.s->s, b.u.s->s) == 0);
case arraytype: //composites (pointers)
case filetype:
case dicttype: return a.u.d == b.u.d;
case operatortype: return a.u.op.fp == b.u.op.fp;
default:
return false;
}
}

struct s_pair *lookup (Dict *d, Object key) {
struct s_pair *p = NULL;
int i;
for (i=0; i < (int)d->length; i++) {
if (eq(d->p[i].key,key)) {
p = &d->p[i];
break;
}
}
return p;
}

bool define(Dict *d, Object key, Object value) {
struct s_pair *p;
p = lookup(d, key);
if (p) {
dec(&p->value);
p->value = value;
return true;
} else {
if (d->length >= d->maxlength) {
//error("dictfull in define");
return false;
}
p = &d->p[d->length++];
inc(&key);
p->key = key;
inc(&value);
p->value = value;
return true;
}
}

void dec_dict(Dict *d) {
if (--d->ref == 0) {
int i;
for (i=0; i < (int)d->length; i++) {
//kill elements
dec(&d->p[i].key);
dec(&d->p[i].value);
}
free(d->p);
free(d);
}
}

void inc_dict(Dict *d) { d->ref++; }

void dec(Object *o) {
if (o->flags & COMP ) { //if Composite
switch(o->type) { //decrement the ref
case stringtype: dec_string(o->u.s); break;
case arraytype: dec_array(o->u.a); break;
case dicttype: dec_dict(o->u.d); break;
default: break;
}
}
}

void inc(Object *o) {
if (o->flags & COMP) {
switch(o->type) {
case stringtype: inc_string(o->u.s); break;
case arraytype: inc_array(o->u.a); break;
case dicttype: inc_dict(o->u.d); break;
default: break;
}
}
}

Object executable (Object o) { o.flags |= EXEC; return o; }

Object operator (char *name, void (*fp)()) {
Object o = { .type = operatortype, .flags = EXEC,
.u.op = { .name = name, .fp = fp } };
return o;
}

/* eof: object.c */

/* from system.c */
void Oprompt() {
printf("> "); fflush(stdout);
}
/* end excerpt from system.c */

This ends the obnoxiously long message.

Message has been deleted

Mark Wooding

unread,

Mar 20, 2009, 6:04:12 AM3/20/09

to

Tetsuya <i...@noi.com> writes:

> luser-ex-troll wrote:
> > This ends the obnoxiously long message.
>

> Next time use pastebin.com please.

Disagree strongly. The message is asking an interesting question, and
will be archived forever. The archive becomes much less valuable
without the actual code in question.

Also, including the original code makes quoting portions of it in
criticism easier.

-- [mdw]

Richard Heathfield

unread,

Mar 20, 2009, 6:15:40 AM3/20/09

to

Tetsuya said:

> luser-ex-troll wrote:
>> This ends the obnoxiously long message.
>

> Next time use pastebin.com please.

Why? What's wrong with Usenet?

--
Richard Heathfield <http://www.cpax.org.uk>
Email: -http://www. +rjh@
Google users: <http://www.cpax.org.uk/prg/writings/googly.php>
"Usenet is a strange place" - dmr 29 July 1999

Richard Heathfield

unread,

Mar 20, 2009, 6:27:06 AM3/20/09

to

luser-ex-troll said:

> Hello all.
> I have a problem of a somewhat different kind than the usual post.
> My code works!

Delighted to hear it! (Incidentally, relying as you do on C99
features will restrict the portability of your code and reduce the
number of people able to help you with it. If those are not
concerns for you, then obviously that's not an issue.)

> It's just appallingly ugly.

Concur.

> With my attention
> focused sharply on clear and consistent data structures, the most
> important function in my nascent postscript interpreter, the
> lexical scanner, has degenerated into spaghetti.

Again, concur.

> It happened incrementally so I didn't really worry
> about it until it became overwhelmingly obvious
> that what I've got is terribly, horribly ugly.

I was going to suggest a state machine, but one doesn't normally
need one for something as simple as lexing. A simple loop is
usually enough.

I think you're trying to do too much in your scanner - you're not
just scanning the input for lexemes, you're also trying to
interpret stuff as you go along. To some extent that /can/ be done
neatly, but if you must do it, do it something like this:

while(!error && (p = scanner_getlexeme(stream)) != NULL)
{
n = token_gettype(p);
q = (*token_interpret[n])(p);
error = token_store(q, n, tokenlist);
}

<snip>

nick_keigh...@hotmail.com

unread,

Mar 20, 2009, 6:33:04 AM3/20/09

to

On 20 Mar, 03:12, luser-ex-troll <mijo...@yahoo.com> wrote:

> I have a problem of a somewhat different kind than the usual post. My
> code works! It's just appallingly ugly. With my attention focused
> sharply on clear and consistent data structures, the most important
> function in my nascent postscript interpreter, the lexical scanner,
> has degenerated into spaghetti.

<snip>

ooo! haven't seen code like that in a while! With gotos as well!
I might have a go later but at the moment all I do is give general
advice. Write some tests. Write lots of tests. Every time you make
a small change run all the tests.

In the past I've also drawn flow charts. At least then you can see the
structure of the whole program at once. Try and remove chunks into
separate functions that

1. have a single entrance and exit
2. perform a single function well

pass parameters, don't use global data

Bartc

unread,

Mar 20, 2009, 8:09:44 AM3/20/09

to

luser-ex-troll wrote:

> I have a problem of a somewhat different kind than the usual post. My
> code works! It's just appallingly ugly. With my attention focused
> sharply on clear and consistent data structures, the most important
> function in my nascent postscript interpreter, the lexical scanner,
> has degenerated into spaghetti.

> NEXTor goto radixskipback;
<snip lots of similar code>

This is for your Postscript interpreter?

Considering Postscript itself doesn't have Goto at all, you're setting
yourself bad examples.

Possibly the code is a consequence of using C which I don't consider very
flexible when developing code. (If you'd started off with Python for
example, this problem would not have come up since it doesn't have Goto
either).

The specific function you mentioned seemed some sort of lexical parser. I've
written loads without needing lots of gotos (one or two are OK).

Use return statements instead (so add more functions). Syntax errors in
tokens I usually deal with by returning a special error token (leaving it to
the caller to report the error).

And I'd get rid of the BACK/NEXT macros which are distorting the syntactical
structure of the code so that the statement type is not recognisable.

--
bartc

luser-ex-troll

unread,

Mar 20, 2009, 12:31:08 PM3/20/09

to

Thanks. I like it. On first read I thought I had an objection
that the various ways of terminating a token should be tied
to the semantic meaning. But it's is probably just an artifact
of my intuitive understanding of the postscript behavior.
With this way it looks like everything is more strongly
dependent on the enum of types; which seems ideal.

lxt

luser-ex-troll

unread,

Mar 20, 2009, 12:46:53 PM3/20/09

to

On Mar 20, 7:09 am, "Bartc" <ba...@freeuk.com> wrote:
> luser-ex-troll wrote:
> > I have a problem of a somewhat different kind than the usual post. My
> > code works! It's just appallingly ugly. With my attention focused
> > sharply on clear and consistent data structures, the most important
> > function in my nascent postscript interpreter, the lexical scanner,
> > has degenerated into spaghetti.
> > NEXTor goto radixskipback;
>
> <snip lots of similar code>
>
> This is for your Postscript interpreter?
>
> Considering Postscript itself doesn't have Goto at all, you're setting
> yourself bad examples.

Well, yeah. You're totally right. But it didn't seem so wrong
when I wrote the first one, and the second. Then suddenly it
was no longer obvious how to tease a structure back out of it.

> Possibly the code is a consequence of using C which I don't consider very
> flexible when developing code. (If you'd started off with Python for
> example, this problem would not have come up since it doesn't have Goto
> either).

Agreed. But I don't like block structure being controlled by
indentation. I suppose that's obvious from the code.

> The specific function you mentioned seemed some sort of lexical parser. I've
> written loads without needing lots of gotos (one or two are OK).
>

See? But what about the third?! The fourth?!

> Use return statements instead (so add more functions). Syntax errors in
> tokens I usually deal with by returning a special error token (leaving it to
> the caller to report the error).

Agreed, but I'm looking for a nice way to organize and dispatch
those functions, rather than merely translating the problem into
function spaghetti.

> And I'd get rid of the BACK/NEXT macros which are distorting the syntactical
> structure of the code so that the statement type is not recognisable.

Roger that. Artifact from the version that used macros for the string/
file overloading instead of function pointers. At that
point the served a useful purpose by jumping out of the spaghetti like
uh, meatballs.

lxt

Phil Carmody

unread,

Mar 21, 2009, 4:02:48 AM3/21/09

to

Richard Heathfield <r...@see.sig.invalid> writes:
> Tetsuya said:
>
>> luser-ex-troll wrote:
>>> This ends the obnoxiously long message.
>>
>> Next time use pastebin.com please.
>
> Why? What's wrong with Usenet?

I can't believe that hasn't elicited any kind of answer yet! :-)

Phil
--
Marijuana is indeed a dangerous drug.
It causes governments to wage war against their own people.
-- Dave Seaman (sci.math, 19 Mar 2009)

gw7...@aol.com

unread,

Mar 21, 2009, 5:57:35 AM3/21/09

to

On 20 Mar, 03:12, luser-ex-troll <mijo...@yahoo.com> wrote:

> I have a problem of a somewhat different kind than the usual post. My
> code works! It's just appallingly ugly. With my attention focused
> sharply on clear and consistent data structures, the most important
> function in my nascent postscript interpreter, the lexical scanner,
> has degenerated into spaghetti.

The book "BCPL: the language and its compiler", by Martin Richards and
Coklin Whitby-Strevens, includes the code for a lexical scanner, with
plenty of comments about it. If you can get hold of this book, it
might be worth reading. BCPL is a fore-runner of C.

I've only skimmed through your code, but a few points struck me:

> int Snext(Object s) {
> return sgetc(s.u.s);
>
> }
>
> void Sback(int c, Object s) {
> s.u.s->length++;
> *(--(s.u.s->s)) = c; //back it up, follow the pointer, store
>
> }

You're worryuing here about reading values and then needing to back up
a bit. The code in the book avoids the need to do this. There is a
function called RCH which will read a character in and put it in a
global variable CH. (Yes, I know globals are disapproved of these
days, but bear with me...) The function NEXTSYMB, which reads the next
tioken from the source, assumes that there is a character in CH which
has not yet been processed. It processes it, reaing more characters if
necessary using RCH, and it calls RCH at least once so that, when it
finishes, there is an unprocessed charcter left in CH. Thus you just
need to call RCH once at the beginning, and then you call NEXTSYMB
continually to get the tokens.

For example, suppose that a '<' character can either be the start of
<=, the start of <<, or just a less-than sign. You do the following
(I've converted this bit into C, and also fixed a couple of stylistic
points):

case '>':
RCH();
if (CH == '=') { RCH(); return S_LE; }
if (CH == '>') { RCH(); return S_LSHIFT; }
return S_LS;

Either way, the first unprocessed character is in CH afterwards.

> #define NEXT if ((i=next(src)) == EOF) goto fail
> #define NEXTor if ((i=next(src)) == EOF)

You seem very worried about reading in an EOF character. This seems a
bit unnecessary. At any point in the processing, it seems that one of
three things can be the case:

a) the characters you have processed so far need to be followed by
something of a specific type, and it is an error if they're not;
b) the characters you have processed so far may or may not be followed
by something of a specific type, if they are then process that, if
not, leave what follows to be processed next time round;
c) the characters you have processed so far are complete in themselves
and what follows is something separate.

In none of these cases does there seem to be any need to check
specifically whether what follows includes an EOF. Simply treat it as
any character which is different from what is allowed to follow.

This may in fact improve any error messages that you show the user -
there will be more of "You should have provided a ***, and didn't" and
less of "Unexpected end of file".

You should only need to worry about an EOF if i is EOF when you start
the loop - in which case you simply return null.

> Specifically the problem is the toke function
> which scans a string or file to create an object
> (tag-union, variant-record). It's constructed
> as a series of tests and loops within a big loop,
> but uses goto to change its mind about what
> type of object it has found (eg. '+' followed
> by a digit is a noise character introducing the
> number, but followed by anything else, it's an
> executable name).

> if(i == '+') { //optional +

> NEXTor goto single;
> if(!isdigit(i)) { BACK; i = '+'; goto aname; }
> i -= '0';
> goto digit; }

This doesn't really seem necessary. Either '+' is followed by a digit,
or it isn't. If it is, the digit (and any subsequent digits) are
processed exactly the same way as if the '+' wasn't there. So I think
you may just need a "continue" here intead of the "goto digit" - start
the processing off again, this time looking at the first digit rather
than the '+'. It may mean testing whether the first digit is a digit
twice, but that's hardly the biggest waste on the planet, is it?

[If you do read the book, note that it is itself not perfect. For one
thing, NEXTSYMB returns its result by a global, which seems an
unnecessary piece of horribleness. Also, instead of the neat code
above, it actually uses RETRUN to leave the function without doing a
RCH, and BREAK to leave the SWITCHON (equivalent to a switch) where it
hits a RCH at the end of the function.]

Anyhow, hope that helps.
Paul.

luser-ex-troll

unread,

Mar 21, 2009, 9:59:08 AM3/21/09

to

Yes. I'll add that to my bookfetch list at alibris. BCPL was
interpreted, wasn't it?

As far as improving the error messages, I'm somewhat restricted by the
behavior dictated by the Adobe spec, but I think I can add a field of
extra detail into the report. The error function I posted is just a
stub.

The big stumbling block, as I see it now, is my use of 3 kinds of test
on the character in question: if (i == 'x'), strchr("string", i), and
isalpha(i). It seems if I just pick one, I can organize the tests into
a grammar structure and drastically simplify the code.

Maybe it's time to draw flowcharts...

lxt

luser-ex-troll

unread,

Mar 21, 2009, 5:15:00 PM3/21/09

to

I've barely begun this second attempt and already I want to write
gotos.

Does the following look foredoomed to devolve?

#include <stdio.h>
#include <string.h>

#define space " \t\r\n\f"
#define delim "()<>[]{}/%"
#define crlf "\r\n"
#define digit "0123456789"
#define hex digit "abcdef" "ABCDEF"
#define alpha "abcdefghijklmnopqrstuvwxyz"
#define Alpha "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
#define alnum digit alpha Alpha
#define pm "+-"
#define dot "."
#define rad "#"
#define raddoteE "#.eE"
#define eE "eE"
#define epm "e+-"
#define Epm "E+-"

enum e_lext { fail, decimal, radix, real, string, hexstring, name };

typedef struct {
char *pre; /*optional prefix*/
char *chug; /*things to accept*/
int elsewise; /*thing to try if unacceptable*/
char *other; /*transfer to different test by offset matched*/
enum e_lext type; /*type to be interpreted*/
} test;

test tests[] = {
/*0*/{.type = fail, .pre = space, .chug = NULL, .elsewise =
1, .other = NULL },
/*1*/{.type = decimal, .pre = pm, .chug = digit, .elsewise = 6, .other
= raddoteE },
/*2*/{.type = radix, .pre = NULL, .chug = alnum, .elsewise =
6, .other = NULL },
/*3*/{.type = real, .pre = NULL, .chug = digit, .elsewise =
6, .other = eE },
/*4*/{.type = real, .pre = pm, .chug = digit, .elsewise =
6, .other = NULL },
/*5*/{.type = real, .pre = pm, .chug = digit, .elsewise =
6, .other = NULL },
/*6*/{.type = string, .pre = "(", .chug = NULL, .elsewise =
8, .other = NULL },
/*7*/{.type = hexstring, .pre = "<", .chug = hex, .elsewise =
8, .other = NULL },
/*8*/{.type = name, .pre = "/", .chug = alnum, .elsewise =
9, .other = NULL },
/*9*/{.type = fail, .pre = NULL, .chug = NULL, .elsewise =
10, .other = NULL }
};

#define NBUF 256

int main() {
char buf[NBUF] = "";
char *s = buf;
int i;
int testing=0;
char *off;

while( (i=getchar()) != EOF) {
top:
if (tests[testing].pre) /* try this */
while (strchr(tests[testing].pre,i)) {
*s++ = (char)i; *s = 0; i=getchar();
}

if (tests[testing].chug) /* try that */
while (strchr(tests[testing].chug,i)) {
*s++ = (char)i; *s = 0; i=getchar();
}

if (tests[testing].other) { // try the other */
off = strchr(tests[testing].other,i);
if (off) { // transfer to special test
testing += (int) (off-tests[testing].other) + 1;
*s++ = (char)i; *s = 0; i=getchar();
goto top;
}
}

if (s == buf) {
if (testing == 10) {
printf("fail: unable to grok the stream\n");
break;
} else {
testing = tests[testing].elsewise;
goto top;
}
} else {
ungetc(i,stdin);
printf("grok: %s\n", buf);
s = buf;
testing = 0;
}

} //while
return 0;
} //main

//eof

Flash Gordon

unread,

Mar 21, 2009, 6:38:02 PM3/21/09

to

luser-ex-troll wrote:
> I've barely begun this second attempt and already I want to write
> gotos.

Ones you don't need...

> Does the following look foredoomed to devolve?

<snip>

> int main() {
> char buf[NBUF] = "";
> char *s = buf;

Personally I would be more inclined to do use an index than a pointer.

> int i;
> int testing=0;
> char *off;
>
> while( (i=getchar()) != EOF) {
> top:

<snip>

> *s++ = (char)i; *s = 0; i=getchar();

Why the cast?

> goto top;

Loose the i=getchar() and replace the goto with a continue.

<snip>

> goto top;

Do an ungetc() followed by a continue

<snip>

You could also do with breaking the code down in to functions rather
than writing one massive function.
--
Flash Gordon

Richard

unread,

Mar 21, 2009, 7:35:13 PM3/21/09

to

Flash Gordon <sm...@spam.causeway.com> writes:

> luser-ex-troll wrote:
>> I've barely begun this second attempt and already I want to write
>> gotos.
>
> Ones you don't need...
>
>> Does the following look foredoomed to devolve?
>
> <snip>
>
>> int main() {
>> char buf[NBUF] = "";
>> char *s = buf;
>
> Personally I would be more inclined to do use an index than a pointer.
>
>> int i;
>> int testing=0;
>> char *off;
>>
>> while( (i=getchar()) != EOF) {
>> top:
>
> <snip>
>
>> *s++ = (char)i; *s = 0; i=getchar();
>
> Why the cast?

Whether right or wrong I would say its blatantly obvious. If you think
he shouldn't cast then tell him why not.

--
"Avoid hyperbole at all costs, its the most destructive argument on
the planet" - Mark McIntyre in comp.lang.c

Richard Heathfield

unread,

Mar 21, 2009, 8:26:00 PM3/21/09

to

luser-ex-troll said:

> I've barely begun this second attempt and already I want to write
> gotos.

Imagine a C-like language that is exactly the same as C except that
it has no goto, switchless break, or continue. How would you write
your program in such a language?

> Does the following look foredoomed to devolve?

Yes.

luser-ex-troll

unread,

Mar 22, 2009, 7:18:05 AM3/22/09

to

On Mar 21, 7:26 pm, Richard Heathfield <r...@see.sig.invalid> wrote:
> luser-ex-troll said:
>
> > I've barely begun this second attempt and already I want to write
> > gotos.
>
> Imagine a C-like language that is exactly the same as C except that
> it has no goto, switchless break, or continue. How would you write
> your program in such a language?
>

Not sure, but equally unsure I'd want to use such a language. I think
I'm beginning to truly appreciate the dangers of goto. I also think
that a jump to the top and jump out of loop are the 1 or 2 exceptions
to the rule that may be worthwhile.

But here's take 3 with no gotos, and with much cleaner control flow, I
hope.

--
loose-rocks-trawl

#include <ctype.h>
#include <stdbool.h>
#include <stdio.h>
#include <string.h>

int pm (int i) { return strchr("+-",i) != 0; }
int space (int i) { return strchr(" \t\n\r\f",i) != 0; }
int dot (int i) { return i == '.'; }
int e (int i) { return strchr("eE",i) != 0; }
int rad (int i) { return i == '#'; }
int notraddote (int i) { return strchr("#.eE",i) == 0; }
int lparen (int i) { return i == '('; }
int notrparen (int i) { return i != ')'; }
int rparen (int i) { return i == ')'; }
int lt (int i) { return i == '<'; }
int gt (int i) { return i == '>'; }
int delim (int i) { return strchr("()<>[]{}/%",i) != 0; }
int regular (int i) { return !space(i) && !delim(i); }
int slash (int i) { return i == '/'; }

typedef struct {
int (*fp)(int); //function predicate
int yes,no; //transition
bool pass; //store char if accepted
} test;

#define SYNE -1
#define DECI -2
#define RADI -3
#define REAL -4
#define STRG -5
#define HEXS -6
#define NAME -7

test tests[] = {
/* 0*/ {.fp= space, .yes= 0, .no= 1, .pass=false }, /*sp* */

//decimal
/* 1*/ {.fp=pm, .yes= 2, .no= 3, .pass=true },
/* 2*/ {.fp=isdigit, .yes= 4, .no=19, .pass=true }, //[+-]
[^0-9]: name
/* 3*/ {.fp=isdigit, .yes= 4, .no=13, .pass=true }, //[^0-9]:
string?
/* 4*/ {.fp=isdigit, .yes= 4, .no= 5, .pass=true }, //[0-9]+
/* 5*/ {.fp=notraddote, .yes=DECI, .no= 6, .pass=false }, //[0-9]+
[^0-9]

//radix
/* 6*/ {.fp=rad, .yes= 7, .no= 8, .pass=true }, /*([#]?)
*/
/* 7*/ {.fp=isalnum, .yes= 7, .no=RADI, .pass=true }, /*([0-9a-z]
*) */

//real
/* 8*/ {.fp=dot, .yes= 9, .no=10, .pass=true }, /*([.]?)
*/
/* 9*/ {.fp=isdigit, .yes= 9, .no=10, .pass=true }, /*([0-9]?)
*/

//exponential
/*10*/ {.fp=e, .yes=11, .no=12, .pass=true }, /*([eE]?)
*/
/*11*/ {.fp=pm, .yes=12, .no=12, .pass=true }, /*[eE]
([+-]?) */
/*12*/ {.fp=isdigit, .yes=12, .no=REAL, .pass=true }, /*([0-9]?)
*/

//string
/*13*/ {.fp=lparen, .yes=14, .no=16, .pass=false }, /*[(]?*/
/*14*/ {.fp=notrparen, .yes=14, .no=15, .pass=true }, /*([^)]?)
*/
/*15*/ {.fp=rparen, .yes=STRG, .no=SYNE, .pass=false }, /*[)]?*/

//hexstring
/*16*/ {.fp=lt, .yes=17, .no=19, .pass=false }, /*[<]?*/
/*17*/ {.fp=isxdigit, .yes=17, .no=18, .pass=true }, /*([0-9a-f]
*)*/
/*18*/ {.fp=gt, .yes=HEXS, .no=SYNE, .pass=false }, /*[>]?*/

//name
/*19*/ {.fp=slash, .yes=20, .no=20, .pass=true }, /*([/]?)*/
/*20*/ {.fp=regular, .yes=20, .no=NAME, .pass=true } /*([^sp
delim]*)*/
};

#define NBUF 256

int main() {
int i;

char buf[NBUF] = "";
char *s = buf;

int state = 0;

while ( (i=getchar()) != EOF ) {

if ( (*tests[state].fp)(i) ) {
if (tests[state].pass) {
*s++ = i; *s = 0;
}
state = tests[state].yes;
} else {
state = tests[state].no;
ungetc(i,stdin);
}

if (state < 0) {
char *typestring;
if (state == -1) { break; }
switch(state) {
case DECI: typestring = "decimal"; break;
case RADI: typestring = "radix"; break;
case REAL: typestring = "real"; break;
case STRG: typestring = "string"; break;
case HEXS: typestring = "hexstring"; break;
case NAME: typestring = "name"; break;
}
printf("grok: %d %s %s\n", state, typestring, buf);
state = 0;
s = buf; *s = 0;
}

} //while
printf("fail: 0x%x\n",(unsigned)i);
return 0;
} //main

//eof

Bartc

unread,

Mar 23, 2009, 6:44:30 AM3/23/09

to

luser-ex-troll wrote:
> On Mar 20, 7:09 am, "Bartc" <ba...@freeuk.com> wrote:

>> Python for example, this problem would not have come up since it
>> doesn't have Goto either).
>
> Agreed. But I don't like block structure being controlled by
> indentation. I suppose that's obvious from the code.

Not controlled. But represented. How else would you show the code structure?
(Imagine reading a novel, with lots of dialogue, typeset as a single giant
paragraph.)

>> The specific function you mentioned seemed some sort of lexical
>> parser. I've written loads without needing lots of gotos (one or two
>> are OK).
>>
>
> See? But what about the third?! The fourth?!

By the third or fourth goto, you will either see a pattern emerging, or
start to realise it's getting out of hand and needs a slightly different
approach. Although this is true of a lot of coding.

--
Bartc

Richard Harter

unread,

Mar 23, 2009, 5:24:05 PM3/23/09

to

On Mon, 23 Mar 2009 10:44:30 GMT, "Bartc" <ba...@freeuk.com>
wrote:

>luser-ex-troll wrote:
>> On Mar 20, 7:09 am, "Bartc" <ba...@freeuk.com> wrote:
>
>>> Python for example, this problem would not have come up since it
>>> doesn't have Goto either).
>>
>> Agreed. But I don't like block structure being controlled by
>> indentation. I suppose that's obvious from the code.
>
>Not controlled. But represented. How else would you show the code structure?
>(Imagine reading a novel, with lots of dialogue, typeset as a single giant
>paragraph.)

That's a rather dubious comparison. Novels have chapter headings
and paragraphs; they don't have the equivalent of levels of
indentation. They may have flashbacks but they are structured to
be read sequentially.

Richard Harter, c...@tiac.net
http://home.tiac.net/~cri, http://www.varinoma.com
If I do not see as far as others, it is because
I stand in the footprints of giants.

CBFalconer

unread,

Mar 23, 2009, 6:19:25 PM3/23/09

to

Richard Harter wrote:
> "Bartc" <ba...@freeuk.com> wrote:
>> luser-ex-troll wrote:
>>> "Bartc" <ba...@freeuk.com> wrote:
>>
>>>> Python for example, this problem would not have come up since
>>>> it doesn't have Goto either).
>>>
>>> Agreed. But I don't like block structure being controlled by
>>> indentation. I suppose that's obvious from the code.
>>
>> Not controlled. But represented. How else would you show the
>> code structure? (Imagine reading a novel, with lots of
>> dialogue, typeset as a single giant paragraph.)
>
> That's a rather dubious comparison. Novels have chapter
> headings and paragraphs; they don't have the equivalent of
> levels of indentation. They may have flashbacks but they are
> structured to be read sequentially.

Have you never ignored a Usenet message that is written as a solid
block, possibly withour either sentence of paragraph delimitation?

--
[mail]: Chuck F (cbfalconer at maineline dot net)
[page]: <http://cbfalconer.home.att.net>
Try the download section.

luser-ex-troll

unread,

Mar 23, 2009, 8:32:48 PM3/23/09

to

On Mar 23, 5:19 pm, CBFalconer <cbfalco...@yahoo.com> wrote:
> Richard Harter wrote:
> > "Bartc" <ba...@freeuk.com> wrote:
> >> luser-ex-troll wrote:
> >>> "Bartc" <ba...@freeuk.com> wrote:
>
> >>>> Python for example, this problem would not have come up since
> >>>> it doesn't have Goto either).
>
> >>> Agreed. But I don't like block structure being controlled by
> >>> indentation. I suppose that's obvious from the code.
>
> >> Not controlled. But represented. How else would you show the
> >> code structure? (Imagine reading a novel, with lots of
> >> dialogue, typeset as a single giant paragraph.)
>
> > That's a rather dubious comparison. Novels have chapter
> > headings and paragraphs; they don't have the equivalent of
> > levels of indentation. They may have flashbacks but they are
> > structured to be read sequentially.
>
> Have you never ignored a Usenet message that is written as a solid
> block, possibly withour either sentence of paragraph delimitation?
>

Sounds like Finnegan's Wake! ;{>

--
lxt
quark rhymes with quart

Richard Harter

unread,

Mar 23, 2009, 10:46:43 PM3/23/09

to

On Mon, 23 Mar 2009 17:19:25 -0500, CBFalconer
<cbfal...@yahoo.com> wrote:

>Richard Harter wrote:
>> "Bartc" <ba...@freeuk.com> wrote:
>>> luser-ex-troll wrote:
>>>> "Bartc" <ba...@freeuk.com> wrote:
>>>
>>>>> Python for example, this problem would not have come up since
>>>>> it doesn't have Goto either).
>>>>
>>>> Agreed. But I don't like block structure being controlled by
>>>> indentation. I suppose that's obvious from the code.
>>>
>>> Not controlled. But represented. How else would you show the
>>> code structure? (Imagine reading a novel, with lots of
>>> dialogue, typeset as a single giant paragraph.)
>>
>> That's a rather dubious comparison. Novels have chapter
>> headings and paragraphs; they don't have the equivalent of
>> levels of indentation. They may have flashbacks but they are
>> structured to be read sequentially.
>
>Have you never ignored a Usenet message that is written as a solid
>block, possibly withour either sentence of paragraph delimitation?

I have indeed. And your point is?

Richard Bos

unread,

Mar 24, 2009, 9:25:03 AM3/24/09

to

"Bartc" <ba...@freeuk.com> wrote:

> luser-ex-troll wrote:
> > On Mar 20, 7:09 am, "Bartc" <ba...@freeuk.com> wrote:
>
> >> Python for example, this problem would not have come up since it
> >> doesn't have Goto either).
> >
> > Agreed. But I don't like block structure being controlled by
> > indentation. I suppose that's obvious from the code.
>
> Not controlled.

Wrong. In Python, indentation _does_ control the block structure, unlike
in sane languages or even in C.

Richard

luser-ex-troll

unread,

Mar 24, 2009, 11:46:46 PM3/24/09

to

Alright now. I think I've turned a corner with this. And, of course,
everyone who said "use functions!" was spot on.

Hopefully this looks more C-worthy.

--
lexit-real

#include <ctype.h>
#include <stdio.h>
#include <string.h>

int issign (int c){return !!strchr("+-",c);}
int israd (int c){return !!strchr("#", c);}
int isdot (int c){return !!strchr(".", c);}
int ise (int c){return !!strchr("eE",c);}
int isdelim (int c){return !!strchr("()<>{}[]%/",c);}
int isregular(int c){return !isspace(c);}

typedef struct test test;
struct test {
int (*fp)(int); int y,n;
};

test decimal[] = {
/* 0*/ { issign, 1, 1 },
/* 1*/ { isdigit, 2, -1 },
/* 2*/ { isdigit, 2, -1 }, //success
};
int dec_accept(int i){ return i==2; }

test radix[] = {
/* 0*/ { isdigit, 1, -1 },
/* 1*/ { isdigit, 1, 2 },
/* 2*/ { israd, 3, -1 },
/* 3*/ { isdigit, 4, -1 },
/* 4*/ { isdigit, 4, -1 }, //success
};
int rad_accept(int i){ return i==4; }

test real[] = {
/* 0*/ { issign, 1, 1 },
/* 1*/ { isdigit, 2, 4 },
/* 2*/ { isdigit, 2, 3 },
/* 3*/ { isdot, 6, 7 }, //success
/* 4*/ { isdot, 5, -1 },
/* 5*/ { isdigit, 6, -1 },
/* 6*/ { isdigit, 6, 7 }, //success
/* 7*/ { ise, 8, -1 },
/* 8*/ { issign, 9, 9 },
/* 9*/ { isdigit, 10, -1 },
/*10*/ { isdigit, 10, -1 }, //success
};
int real_accept(int i){switch(i){case 3: case 6:case 10:return 1;}
return 0;}

int check(char *buf, test *fsm, int(*yes)(int)){ char *s = buf; int
sta = 0;
while(sta!=-1 && *s) {
if (fsm[sta].fp(*s))
{ sta = fsm[sta].y; s++; }
else { sta = fsm[sta].n; }
}
sta=yes(sta);
return sta; }

int grok(char *buf) {
if (check(buf,decimal,dec_accept)) { printf( "dec: %s\n",
buf); return 0; }
else if (check(buf,radix, rad_accept)) { printf( "rad: %s\n",
buf); return 0; }
else if (check(buf,real, real_accept)) { printf("real: %s\n",
buf); return 0; }
else { printf("grok? %s\n", buf); return -1; }
}

int puff(char *buf, int nbuf) { char *s = buf; int c;
while ((c=getchar()) != EOF) {
if(isspace(c) || isdelim(c))
break;
if(nbuf < s-buf-1)
return -1;
*s++ = c;
}
*s++ = 0;
return 0; }

int toke(char *buf, int nbuf) { char *s=buf; int sta = 0;
while(isspace(*s=getchar())) /**/;
if( (sta=puff(buf+1,nbuf-1)) == -1) return -1;
sta = grok(buf);
return sta; }

#define NBUF 10
int main() { char buf[NBUF] = ""; int sta;
while ( (sta=toke(buf,NBUF)) != -1 )
/**/;
return 0; }

luser-ex-troll

unread,

Mar 25, 2009, 12:16:55 AM3/25/09

to

correction; last 2 lines of radix test should use isalnum rather than
isdigit:
/* 3*/ { isalnum, 4, -1 },
/* 4*/ { isalnum, 4, -1 }, //success

oh, and here are the regular expressions that the three machines are
intended to match:
decimals: ^[+-]?d+$
radix: ^d+[#][a-Z0-9]+$
real: ^[+-]?(d+.d*)|(d*.d+)([eE][+-]?d+)?$

--
laxite

Keith Thompson

unread,

Mar 25, 2009, 2:52:05 AM3/25/09

to

I assume d is intended to represent a decimal digit. In the regular
expression syntaxes I've seen, that's represented as \d; d represents
the letter d itself.

--
Keith Thompson (The_Other_Keith) ks...@mib.org <http://www.ghoti.net/~kst>
Nokia
"We must do something. This is something. Therefore, we must do this."
-- Antony Jay and Jonathan Lynn, "Yes Minister"

luser-ex-troll

unread,

Mar 25, 2009, 3:13:36 AM3/25/09

to

On Mar 25, 1:52 am, Keith Thompson <ks...@mib.org> wrote:

> luser-ex-troll <mijo...@yahoo.com> writes:
> > correction; last 2 lines of radix test should use isalnum rather than
> > isdigit:
> > /* 3*/ { isalnum, 4, -1 },
> > /* 4*/ { isalnum, 4, -1 }, //success
>
> > oh, and here are the regular expressions that the three machines are
> > intended to match:
> > decimals: ^[+-]?d+$
> > radix: ^d+[#][a-Z0-9]+$
> > real: ^[+-]?(d+.d*)|(d*.d+)([eE][+-]?d+)?$
>
> I assume d is intended to represent a decimal digit. In the regular
> expression syntaxes I've seen, that's represented as \d; d represents
> the letter d itself.
>

Yes, precisely. Apologies.

--
elsorryxtrog

Barry Schwarz

unread,

Mar 25, 2009, 8:08:27 AM3/25/09

to

On Tue, 24 Mar 2009 20:46:46 -0700 (PDT), luser-ex-troll
<mij...@yahoo.com> wrote:

>Alright now. I think I've turned a corner with this. And, of course,
>everyone who said "use functions!" was spot on.
>
>Hopefully this looks more C-worthy.
>

>lexit-real
>
>
>#include <ctype.h>
>#include <stdio.h>
>#include <string.h>
>
>int issign (int c){return !!strchr("+-",c);}
>int israd (int c){return !!strchr("#", c);}
>int isdot (int c){return !!strchr(".", c);}

Wouldn't return c == '.' be easier?

>int ise (int c){return !!strchr("eE",c);}
>int isdelim (int c){return !!strchr("()<>{}[]%/",c);}
>int isregular(int c){return !isspace(c);}
>
>typedef struct test test;
>struct test {
> int (*fp)(int); int y,n;
>};

You could combine the typedef and the structure definition into a
single declaration.

>
>test decimal[] = {
>/* 0*/ { issign, 1, 1 },
>/* 1*/ { isdigit, 2, -1 },
>/* 2*/ { isdigit, 2, -1 }, //success
>};
>int dec_accept(int i){ return i==2; }

Would it not work just as well if you eliminated the third element of
decimal and changed this to return i == 1?

>
>test radix[] = {
>/* 0*/ { isdigit, 1, -1 },
>/* 1*/ { isdigit, 1, 2 },
>/* 2*/ { israd, 3, -1 },
>/* 3*/ { isdigit, 4, -1 },
>/* 4*/ { isdigit, 4, -1 }, //success
>};
>int rad_accept(int i){ return i==4; }
>
>test real[] = {
>/* 0*/ { issign, 1, 1 },
>/* 1*/ { isdigit, 2, 4 },
>/* 2*/ { isdigit, 2, 3 },
>/* 3*/ { isdot, 6, 7 }, //success
>/* 4*/ { isdot, 5, -1 },
>/* 5*/ { isdigit, 6, -1 },
>/* 6*/ { isdigit, 6, 7 }, //success
>/* 7*/ { ise, 8, -1 },
>/* 8*/ { issign, 9, 9 },
>/* 9*/ { isdigit, 10, -1 },
>/*10*/ { isdigit, 10, -1 }, //success
>};
>int real_accept(int i){switch(i){case 3: case 6:case 10:return 1;}
>return 0;}
>
>int check(char *buf, test *fsm, int(*yes)(int)){ char *s = buf; int
>sta = 0;

Notice how usenet adds line breaks you didn't intend at inopportune
points. Additionally, multiple statements per line tend to hinder
readability.

> while(sta!=-1 && *s) {

Why didn't you use buf directly instead of s?

> if (fsm[sta].fp(*s))
> { sta = fsm[sta].y; s++; }
> else { sta = fsm[sta].n; }
> }
> sta=yes(sta);
>return sta; }
>
>int grok(char *buf) {
> if (check(buf,decimal,dec_accept)) { printf( "dec: %s\n",
>buf); return 0; }

Here it is even worse. And it destroys your attempt at consistent
indentation.

> else if (check(buf,radix, rad_accept)) { printf( "rad: %s\n",
>buf); return 0; }
> else if (check(buf,real, real_accept)) { printf("real: %s\n",
>buf); return 0; }
> else { printf("grok? %s\n", buf); return -1; }
>}
>
>int puff(char *buf, int nbuf) { char *s = buf; int c;
> while ((c=getchar()) != EOF) {

It would be more user friendly if you used '\n' as your terminator
rather than EOF.

> if(isspace(c) || isdelim(c))
> break;
> if(nbuf < s-buf-1)
> return -1;
> *s++ = c;
> }
> *s++ = 0;

As a matter of style, recommend '\0' when assigning to a char.

>return 0; }
>
>int toke(char *buf, int nbuf) { char *s=buf; int sta = 0;
> while(isspace(*s=getchar())) /**/;
> if( (sta=puff(buf+1,nbuf-1)) == -1) return -1;

The value assigned to sta is never used.

> sta = grok(buf);
>return sta; }
>
>#define NBUF 10
>int main() { char buf[NBUF] = ""; int sta;
> while ( (sta=toke(buf,NBUF)) != -1 )

Ditto.

> /**/;
>return 0; }
>

--
Remove del for email

luser-ex-troll

unread,

Mar 25, 2009, 9:57:50 AM3/25/09

to

On Mar 25, 7:08 am, Barry Schwarz <schwa...@dqel.com> wrote:
> On Tue, 24 Mar 2009 20:46:46 -0700 (PDT), luser-ex-troll
>
>
>
> <mijo...@yahoo.com> wrote:
> >Alright now. I think I've turned a corner with this. And, of course,
> >everyone who said "use functions!" was spot on.
>
> >Hopefully this looks more C-worthy.
>
> >lexit-real
>
> >#include <ctype.h>
> >#include <stdio.h>
> >#include <string.h>
>
> >int issign (int c){return !!strchr("+-",c);}
> >int israd (int c){return !!strchr("#", c);}
> >int isdot (int c){return !!strchr(".", c);}
>
> Wouldn't return c == '.' be easier?

Yes, but I liked the symmetry.

> >int ise (int c){return !!strchr("eE",c);}
> >int isdelim (int c){return !!strchr("()<>{}[]%/",c);}
> >int isregular(int c){return !isspace(c);}
>
> >typedef struct test test;
> >struct test {
> > int (*fp)(int); int y,n;
> >};
>
> You could combine the typedef and the structure definition into a
> single declaration.

Yes. I think I will. I was considering making the machines recursively
nested instead of arrays, but then I'd have to build them dynamically.

>
> >test decimal[] = {
> >/* 0*/ { issign, 1, 1 },
> >/* 1*/ { isdigit, 2, -1 },
> >/* 2*/ { isdigit, 2, -1 }, //success
> >};
> >int dec_accept(int i){ return i==2; }
>
> Would it not work just as well if you eliminated the third element of
> decimal and changed this to return i == 1?
>

I think not. That way a single "+" would be interpreted as a decimal
because it terminates in state 1. The 'n' transition is only followed
if there is a next character && it doesn't match.

Yes, irritating, but acknowledged.

> > while(sta!=-1 && *s) {
>
> Why didn't you use buf directly instead of s?

There isn't a real reason in this function, I justed followed the same
idiom throughout. Again, notions of symmetry.

> > if (fsm[sta].fp(*s))
> > { sta = fsm[sta].y; s++; }
> > else { sta = fsm[sta].n; }
> > }
> > sta=yes(sta);
> >return sta; }
>
> >int grok(char *buf) {
> > if (check(buf,decimal,dec_accept)) { printf( "dec: %s\n",
> >buf); return 0; }
>
> Here it is even worse. And it destroys your attempt at consistent
> indentation.

I know. But it's so pretty with 85 columns. I'll split such things for
future postings, but I'm keeping it this way on disk.

> > else if (check(buf,radix, rad_accept)) { printf( "rad: %s\n",
> >buf); return 0; }
> > else if (check(buf,real, real_accept)) { printf("real: %s\n",
> >buf); return 0; }
> > else { printf("grok? %s\n", buf); return -1; }
> >}
>
> >int puff(char *buf, int nbuf) { char *s = buf; int c;
> > while ((c=getchar()) != EOF) {
>
> It would be more user friendly if you used '\n' as your terminator
> rather than EOF.
>

I don't understand: the program should reject an EOF and demand the
line be finished?! A newline separator is handled by isspace on the
next line.

> > if(isspace(c) || isdelim(c))
> > break;
> > if(nbuf < s-buf-1)
> > return -1;
> > *s++ = c;
> > }
> > *s++ = 0;
>
> As a matter of style, recommend '\0' when assigning to a char.

Yes. But it's a pain on my tiny keyboard.

> >return 0; }
>
> >int toke(char *buf, int nbuf) { char *s=buf; int sta = 0;
> > while(isspace(*s=getchar())) /**/;
> > if( (sta=puff(buf+1,nbuf-1)) == -1) return -1;
>
> The value assigned to sta is never used.

Yes. I played with these lines more after posting. My favorite is:
(void)( (-1== (sta=puff(s,nbuf-1)) )
|| (-1== (sta=grok(buf)) ) );

> > sta = grok(buf);
> >return sta; }
>
> >#define NBUF 10
> >int main() { char buf[NBUF] = ""; int sta;
> > while ( (sta=toke(buf,NBUF)) != -1 )
>
> Ditto.
>
> > /**/;
> >return 0; }
>

Thanks a bunch. I'll keep the lines much shorter to guard against ugly
splits. Is there any way to defend against those extra newlines?

--
lxt

luser-ex-troll

unread,

Mar 25, 2009, 2:55:07 PM3/25/09

to

Traced, debugged, sieved, and splinted; is it stylish yet?

There's still a long line (75chars), but with real C comments, it
should be safe for transmission.

/*
590(1)01:42 PM:podvig 0> make t3
cc -g -pedantic -Wall -Wextra -lm t3.c -o t3
591(1)01:43 PM:podvig 0> splint +boolint -boolops -exportlocal t3.c
Splint 3.1.2 --- 23 Aug 2008

Finished checking --- no warnings
592(1)01:43 PM:podvig 0>

*/

#include <ctype.h>
#include <stdio.h>
#include <string.h>

int israd (int c){return (int)'#'==c;}
int isdot (int c){return (int)'.'==c;}

int ise (int c){return !!strchr("eE",c);}

int issign (int c){return !!strchr("+-",c);}

int isdelim (int c){return !!strchr("()<>{}[]%/",c);}
int isregular(int c)

{return c!=EOF && !isspace(c) && !isdelim(c);}

typedef struct test test;
struct test {
int (*fp)(int); int y, n;
};

/* ^[+-]?\d+$ */
test fsm_dec[] = {

/* 0*/ { issign, 1, 1 },

/* 1*/ { isdigit, 2, -1 }, /* [+-]?! ??(\d)?? */
/* 2*/ { isdigit, 2, -1 }, /* [+-]?\d\d* yes! */
};
/*acceptable decimal?*/
int acc_dec(int i){
return i==2;
}

/* ^\d+[#][a-Z0-9]+$ */
test fsm_rad[] = {

/* 0*/ { isdigit, 1, -1 },

/* 1*/ { isdigit, 1, 2 }, /* \d\d* */
/* 2*/ { israd, 3, -1 }, /* \d\d*[^\d] */
/* 3*/ { isalnum, 4, -1 }, /* \d\d*# */
/* 4*/ { isalnum, 4, -1 }, /* \d\d*#\x\x* yes! */
};
/*acceptable radix?*/
int acc_rad(int i){
return i==4;
}

/* ^[+-]?(\d+(\.\d*)?)|(\d*\.\d+)([eE][+-]?\d+)?$ */
test fsm_real[] = {

/* 0*/ { issign, 1, 1 },

/* 1*/ { isdigit, 2, 4 }, /* [+-]? */
/* 2*/ { isdigit, 2, 3 }, /* [+-]?\d\d* yes! */
/* 3*/ { isdot, 6, 7 }, /* [+-]?\d\d*[^\d] */
/* 4*/ { isdot, 5, -1 }, /* [+-]?[^\d] */
/* 5*/ { isdigit, 6, -1 }, /* [+-]?\. */
/* 6*/ { isdigit, 6, 7 }, /* [+-]?(\d\d*)?\.\d* yes! */
/* 7*/ { ise, 8, -1 }, /* [+-]?(\d\d*)?(\.\d*)? */
/* 8*/ { issign, 9, 9 }, /* [+-]?(\d\d*)?(\.\d*)?[eE] */
/* 9*/ { isdigit, 10, -1 }, /* [+-]?(\d\d*)?(\.\d*)?[eE][+-]? */
/*10*/ { isdigit, 10, -1 }, /* [+-]?(\d\d*)?(\.\d*)?[eE][+-]?\d\d*
yes! */
};
/*acceptable real*/
int acc_real(int i){
switch(i) {case 2:case 6:case 10:return 1;}
return 0; }

int czek(char *s, test *fsm, int(*yes)(int)){

int sta = 0;
while(sta!=-1 && *s) {

if (fsm[sta].fp((int)*s))

{ sta = fsm[sta].y; s++; }
else{ sta = fsm[sta].n; }
}

sta=yes(sta); /*did it end in success?*/
return sta; }

int grok(char *s) {
if (czek(s, fsm_dec, acc_dec )) {
printf( "dec: %s\n", s); return 0; }
else if (czek(s, fsm_rad, acc_rad )) {
printf( "rad: %s\n", s); return 0; }
else if (czek(s, fsm_real,acc_real)) {
printf("real: %s\n", s); return 0; }
else {
printf("grok? %s\n", s);
}
return -1; }

int puff(char *buf, int nbuf) {

int c; char *s = buf;
while ( (c=getchar()), isregular(c) ) {
if(s-buf >= nbuf-1) return -1;
*s++ = (char)c;
}
*s = (char)0;
return 0; }

int toke(char *buf, int nbuf) {

int sta = 0; char *s=buf;
while(isspace(*s=(char)getchar())) /**/;
s++;

(void)( (-1== (sta=puff(s,nbuf-1)) )
|| (-1== (sta=grok(buf)) ) );

return sta; }

#define NBUF 10
int main() { char buf[NBUF] = "";

while (-1 != toke(buf,NBUF)) /**/;
return 0; }

/*eof*/

Keith Thompson

unread,

Mar 25, 2009, 3:15:31 PM3/25/09

to

luser-ex-troll <mij...@yahoo.com> writes:
> Traced, debugged, sieved, and splinted; is it stylish yet?

Just a few random comments; I haven't read it closely enough to make
an overall critique.

[...]

> int israd (int c){return (int)'#'==c;}
> int isdot (int c){return (int)'.'==c;}

The cast is unnecessary; character constants are already of type int.
(And I personally dislike the "constant == variable" style, but I know
a lot of people like and use it.)

> int ise (int c){return !!strchr("eE",c);}

I would have written this as
strchr(...) != NULL
rather than
!!strchr(...)

Trsnss s nt lwys a vrt.

Excuse men, I mean:
Terseness is not always a virtue.

[...]

> int toke(char *buf, int nbuf) {
> int sta = 0; char *s=buf;
> while(isspace(*s=(char)getchar())) /**/;

The cast is unnecessary; without it, the result of getchar() will be
implicitly converted to char by the assignment.

In fact, *most* casts are unnecessary.

> s++;
> (void)( (-1== (sta=puff(s,nbuf-1)) )
> || (-1== (sta=grok(buf)) ) );
> return sta; }

In the statement before the return, you're computing results and
throwing them away -- and discarding clarity along with them. I think
the statement is equivalent to:

if ((sta=puff(s,nbuf-1)) != -1) {
sta=grok(buf);
}

or, even better:

if ((sta = puff(s, nbuf - 1)) != -1) {
sta = grok(buf);
}

> #define NBUF 10
> int main() { char buf[NBUF] = "";
> while (-1 != toke(buf,NBUF)) /**/;
> return 0; }
>
> /*eof*/

I find your code layout to be jarring. I'd write the above as:

int main(void)
{
char buf[NBUF] = "";
while (-1 != toke(buf, NBUF)) {
continue;
}
return 0;
}

(Actually there are some other changes I'd make, but I limited myself
to adding the void keyword and changing the layout.)

luser-ex-troll

unread,

Mar 25, 2009, 4:16:50 PM3/25/09

to

On Mar 25, 2:15 pm, Keith Thompson <ks...@mib.org> wrote:

> luser-ex-troll <mijo...@yahoo.com> writes:
> > Traced, debugged, sieved, and splinted; is it stylish yet?
>
> Just a few random comments; I haven't read it closely enough to make
> an overall critique.

I'm grateful nonetheless.

> [...]
>
> > int israd (int c){return (int)'#'==c;}
> > int isdot (int c){return (int)'.'==c;}
>
> The cast is unnecessary; character constants are already of type int.
> (And I personally dislike the "constant == variable" style, but I know
> a lot of people like and use it.)
>

I chose it here so the 'c's would line up nicely relative to the
strchr counterparts, and then followed through elsewhere for
consistency.

> > int ise (int c){return !!strchr("eE",c);}
>
> I would have written this as
> strchr(...) != NULL
> rather than
> !!strchr(...)
>
> Trsnss s nt lwys a vrt.
>
> Excuse men, I mean:
> Terseness is not always a virtue.
>

With the exception of wooing a Russian princess will billiard chalk, I
agree. I started with no punctuation at all but splint complained
about type mismatching, and with focus on brevity, I opted for the
minimal syntax adjustment (another option was adding -type to splint,
but that seemed to be throwing out the baby).

> [...]
>
> > int toke(char *buf, int nbuf) {
> > int sta = 0; char *s=buf;
> > while(isspace(*s=(char)getchar())) /**/;
>
> The cast is unnecessary; without it, the result of getchar() will be
> implicitly converted to char by the assignment.
>
> In fact, *most* casts are unnecessary.
>

Indeed, but splint demands either this or a +charint switch.

> > s++;
> > (void)( (-1== (sta=puff(s,nbuf-1)) )
> > || (-1== (sta=grok(buf)) ) );
> > return sta; }
>
> In the statement before the return, you're computing results and
> throwing them away -- and discarding clarity along with them. I think
> the statement is equivalent to:
>
> if ((sta=puff(s,nbuf-1)) != -1) {
> sta=grok(buf);
> }
>
> or, even better:
>
> if ((sta = puff(s, nbuf - 1)) != -1) {
> sta = grok(buf);
> }
>

Yes. That's 500% better. Thanks.

> > #define NBUF 10
> > int main() { char buf[NBUF] = "";
> > while (-1 != toke(buf,NBUF)) /**/;
> > return 0; }
>
> > /*eof*/
>
> I find your code layout to be jarring. I'd write the above as:
>
> int main(void)
> {
> char buf[NBUF] = "";
> while (-1 != toke(buf, NBUF)) {
> continue;
> }
> return 0;
> }
>
> (Actually there are some other changes I'd make, but I limited myself
> to adding the void keyword and changing the layout.)

Seriously? 8 lines versus 3? Perhaps my hardware constraints have
suggested more terseness (The olpc xo-1 has a 6"x4.5" lcd), but here I
think it really pays off. It is, after all, a stub for testing the
module before linking into the larger program.

But I'm truly curious to know how you would really format it: blank
lines after declarations and before return?

--
l*t
? fix post stand under you do

Keith Thompson

unread,

Mar 25, 2009, 4:43:32 PM3/25/09

to

luser-ex-troll <mij...@yahoo.com> writes:
> On Mar 25, 2:15 pm, Keith Thompson <ks...@mib.org> wrote:
>> luser-ex-troll <mijo...@yahoo.com> writes:

[...]

>> > #define NBUF 10
>> > int main() { char buf[NBUF] = "";
>> > while (-1 != toke(buf,NBUF)) /**/;
>> > return 0; }
>>
>> > /*eof*/
>>
>> I find your code layout to be jarring. I'd write the above as:
>>
>> int main(void)
>> {
>> char buf[NBUF] = "";
>> while (-1 != toke(buf, NBUF)) {
>> continue;
>> }
>> return 0;
>> }
>>
>> (Actually there are some other changes I'd make, but I limited myself
>> to adding the void keyword and changing the layout.)
>
> Seriously? 8 lines versus 3? Perhaps my hardware constraints have
> suggested more terseness (The olpc xo-1 has a 6"x4.5" lcd), but here I
> think it really pays off. It is, after all, a stub for testing the
> module before linking into the larger program.
>
> But I'm truly curious to know how you would really format it: blank
> lines after declarations and before return?

There are several minor style points here on which I'm undecided. I
might put the opening brace for the function either on the same line
as the prototype or on the next line by itself; the former is more
consistent with they way I use braces in other contexts, and the
latter is probably a throwback to K&R C, where parameter declarations
are typically separated from the function declaration.

How to write a loop with an emtpy body is another thing on which I'm
undecided. I always use braces for compound statements, even when
they're not necessary (a habit I picked up from Perl where they're
always mandatory, but I find it safer and more consistent in C as
well). I used the "continue" keyword here because I think it clearly
expresses what's going on; I might use an empty comment instead if I
were in the mood. I wouldn't use
while (condition);
because it's just too terse for my tastes, and too easy to mistake for
a typo. Your own empty comment on the same line isn't bad.

I might put a blank line between the declarations and statements, but
I might not bother for something this small.

I think I see why you put the return statement on a different
indentation than the rest of the function body, but I wouldn't do it
that way; syntactically, return is just another statement. And I
really dislike putting code on a line after a '{', or before a '}'.

Here's another way I might write it if I were a bit more concerned
with vertical space.

int main(void) {
char buf[NBUF] = "";

while (toke(buf, NBUF) != -1) continue;
return 0;
}

In real life, I'd follow my employer's coding standards if I were
writing code for work, or the style of the existing code if I were
working on an existing project. But if I were writing my own code for
my own purposes, I'd feel free to indulge my own idiosyncracies (which
are of course far more rational and consistent than everyone else's
idiosyncracies).

luser-ex-troll

unread,

Mar 25, 2009, 5:15:50 PM3/25/09

to

Thanks. This is exactly what I've been itching for.

My final super-terse version actually began like the 5-line version
here. Then while doing lots of scrolling up and down I thought: J J
<< . And then it seemed nice to put the return type and the return
value on the same level to help keep straight when the int is
representing true/false as 1/0 or 0/-1. I don't plan on adopting this
format as a general habit, but the specific situation suggested a
benefit.

Fortunately, I am at my own mercy for decisions of this sort, but that
makes me responsible for the decision as well.

For most purposes I agree with your recommendations and appreciate the
sound ground upon which they stand.

--
lxt
his name is bobo. he knows no mercy.

pete

unread,

Mar 25, 2009, 6:14:58 PM3/25/09

to

Keith Thompson wrote:
> I wouldn't use
> while (condition);
> because it's just too terse for my tastes, and too easy to mistake for
> a typo.

I would write that this way:

while (condition) {
;
}

--
pete

luser-ex-troll

unread,

Mar 25, 2009, 6:22:44 PM3/25/09

to

nice. but what about:

while(condition){;}

?

--
lxt
as funky as you wan na be

Richard

unread,

Mar 25, 2009, 6:27:18 PM3/25/09

to

luser-ex-troll <mij...@yahoo.com> writes:

Any C programmer capable of following any C is not going to think

while (condition);

is a typo if there is no further bracketing and indentation.

while(condition){;}

and

while (condition) {
;
}

are far less obvious or readable IMO.

pete

unread,

Mar 25, 2009, 6:35:48 PM3/25/09

to

Richard wrote:
> luser-ex-troll <mij...@yahoo.com> writes:
>
>> On Mar 25, 5:14 pm, pete <pfil...@mindspring.com> wrote:
>>> Keith Thompson wrote:
>>>> I wouldn't use
>>>> while (condition);
>>>> because it's just too terse for my tastes, and too easy to mistake for
>>>> a typo.
>>> I would write that this way:
>>>
>>> while (condition) {
>>> ;
>>> }

> Any C programmer capable of following any C is not going to think
>
> while (condition);
>
> is a typo if there is no further bracketing and indentation.

That's not the entire problem.
Sometimes the next line can be easily mistaken
for part of the loop.

--
pete

luser-ex-troll

unread,

Mar 25, 2009, 6:50:28 PM3/25/09

to

On Mar 25, 5:35 pm, pete <pfil...@mindspring.com> wrote:
> Richard wrote:

what about?

while(condition) { /* (: ;) */; }

smiles are free!

--
lxt

Keith Thompson

unread,

Mar 25, 2009, 7:09:16 PM3/25/09

to

luser-ex-troll <mij...@yahoo.com> writes:
[...]

> what about?
>
> while(condition) { /* (: ;) */; }
>
> smiles are free!

It's cute.

Determining whether I meant that as a compliment is left as an
exercise.

luser-ex-troll

unread,

Mar 25, 2009, 7:38:05 PM3/25/09

to

On Mar 25, 6:09 pm, Keith Thompson <ks...@mib.org> wrote:

> luser-ex-troll <mijo...@yahoo.com> writes:
>
> [...]
>
> > what about?
>
> > while(condition) { /* (: ;) */; }
>
> > smiles are free!
>
> It's cute.
>
> Determining whether I meant that as a compliment is left as an
> exercise.
>

Though that remain a mystery (subjunctive, right? cause /he/ knows),
this thread should provide useful fodder for anyone searching the
archive for replacing gotos, coding a finite state machine to execute
a regular expression, how to make C look like something else, and
unnecessary cuteness.

--
lxt
"Real programmers set the universal constants at the start such that
the
universe evolves to contain the disk with the data they want."

Nate Eldredge

unread,

Mar 25, 2009, 7:39:51 PM3/25/09

to

luser-ex-troll <mij...@yahoo.com> writes:

I personally tend to write

while (condition) ;

I think the added space draws more attention to the empty body, since I
ordinarily don't leave spaces before semicolons at the end of a statement.

I also sometimes do

while (condition) /* keep going */ ;

pete

unread,

Mar 25, 2009, 7:55:22 PM3/25/09

to

I don't like to write trivial comments.
The thing about comments, especially trivial ones,
is that they don't always get updated
when the code changes when the programmers are busy
trying to get the code to do what they want it to do.

Working with deadlines, causes programmers to try to work fast.
I like having conventions which reduce the amount of decissions
I have to make about how to write something.

I have never worked in a place where the coding conventions
prohibited one line looops, but I have heard of it.

--
pete

CBFalconer

unread,

Mar 25, 2009, 7:16:47 PM3/25/09

to

I would write:

while (condition) continue;

Richard Heathfield

unread,

Mar 25, 2009, 9:04:18 PM3/25/09

to

CBFalconer said:

> pete wrote:
>> Keith Thompson wrote:
>>
>>> I wouldn't use
>>> while (condition);
>>> because it's just too terse for my tastes, and too easy to
>>> mistake for a typo.
>>
>> I would write that this way:
>>
>> while (condition) {
>> ;
>> }
>
> I would write:
>
> while (condition) continue;

Putting the 'continue' on a separate line makes it clearer exactly
when the condition is met (when stepping through the code with a
debugger - not something I do a lot nowadays, but I used to, and
some people still do). And using the (optional) braces to mark off
the loop body is a good habit to get into, as it can save all kinds
of embarrassment later when the loop is maintained.

--
Richard Heathfield <http://www.cpax.org.uk>
Email: -http://www. +rjh@
Google users: <http://www.cpax.org.uk/prg/writings/googly.php>
"Usenet is a strange place" - dmr 29 July 1999

Richard Harter

unread,

Mar 25, 2009, 10:03:51 PM3/25/09

to

On Wed, 25 Mar 2009 17:14:58 -0500, pete <pfi...@mindspring.com>
wrote:

My view on this is that a loop without a body should be obvious
to the reader. If the next line then it is a one line loop.
Either

while (condition);
or
while (condition) {}

is okay; anything more is sheer pointless ugliness.

CBFalconer

unread,

Mar 25, 2009, 10:04:59 PM3/25/09

to

Richard Heathfield wrote:
> CBFalconer said:
>> pete wrote:
>>> Keith Thompson wrote:
>>>
>>>> I wouldn't use
>>>> while (condition);
>>>> because it's just too terse for my tastes, and too easy to
>>>> mistake for a typo.
>>>
>>> I would write that this way:
>>>
>>> while (condition) {
>>> ;
>>> }
>>
>> I would write:
>>
>> while (condition) continue;
>
> Putting the 'continue' on a separate line makes it clearer exactly
> when the condition is met (when stepping through the code with a
> debugger - not something I do a lot nowadays, but I used to, and
> some people still do). And using the (optional) braces to mark off
> the loop body is a good habit to get into, as it can save all kinds
> of embarrassment later when the loop is maintained.

Since those are not among my objectives, I would do none of them.

Richard Harter

unread,

Mar 25, 2009, 11:16:21 PM3/25/09

to

On Thu, 26 Mar 2009 02:03:51 GMT, c...@tiac.net (Richard Harter)
wrote:

>On Wed, 25 Mar 2009 17:14:58 -0500, pete <pfi...@mindspring.com>
>wrote:
>
>>Keith Thompson wrote:
>>> I wouldn't use
>>> while (condition);
>>> because it's just too terse for my tastes, and too easy to mistake for
>>> a typo.
>>
>>I would write that this way:
>>
>> while (condition) {
>> ;
>> }
>
>My view on this is that a loop without a body should be obvious
>to the reader. If the next line then it is a one line loop.

^ is not indented

pete

unread,

Mar 26, 2009, 12:36:34 AM3/26/09

to

Richard Harter wrote:
> On Thu, 26 Mar 2009 02:03:51 GMT, c...@tiac.net (Richard Harter)
> wrote:
>
>> On Wed, 25 Mar 2009 17:14:58 -0500, pete <pfi...@mindspring.com>
>> wrote:
>>
>>> Keith Thompson wrote:
>>>> I wouldn't use
>>>> while (condition);
>>>> because it's just too terse for my tastes, and too easy to mistake for
>>>> a typo.
>>> I would write that this way:
>>>
>>> while (condition) {
>>> ;
>>> }
>> My view on this is that a loop without a body should be obvious
>> to the reader. If the next line then it is a one line loop.
> ^ is not indented

I've seen code where the next line *is* indented.

--
pete

Nate Eldredge

unread,

Mar 26, 2009, 2:06:28 AM3/26/09

to

pete <pfi...@mindspring.com> writes:

> Nate Eldredge wrote:
>> I personally tend to write
>>
>> while (condition) ;
>>
>> I think the added space draws more attention to the empty body, since I
>> ordinarily don't leave spaces before semicolons at the end of a statement.
>>
>> I also sometimes do
>>
>> while (condition) /* keep going */ ;
>
> I don't like to write trivial comments.
> The thing about comments, especially trivial ones,
> is that they don't always get updated
> when the code changes when the programmers are busy
> trying to get the code to do what they want it to do.

It's a good point. I think this is a little different from the usual

x = 3; /* assign 3 to x */

because in some sense the comment *is* the code. But it's a subtle
distinction even in my mind.

Han from China

unread,

Mar 26, 2009, 1:05:10 AM3/26/09

to

Richard Harter wrote:
> My view on this is that a loop without a body should be obvious
> to the reader. If the next line then it is a one line loop.
> Either
>
> while (condition);

Yes, while(condition); should suffice. If you hire someone who
can't see the empty loop body in that, then you need to have
a word with HR, since that kind of employee is probably going
to screw up the code during work and clog the water cooler
with crazy putty during lunch breaks. However, the economy
being what it is, if you must hire someone like that, then
the following should be adequate:

/*****************************************************/
/* EMPTY LOOP BODY BELOW THE BELOW COMMENT, DUMBASS! */
/* EMPTY LOOP BODY BELOW, DUMBASS! */
while(condition); /* HERE IT IS! */
/* EMPTY LOOP BODY ABOVE, DUMBASS! */
/* EMPTY LOOP BODY ABOVE THE ABOVE COMMENT, DUMBASS! */
/*****************************************************/

Yours,
Han from China

--
"Only entropy comes easy." -- Anton Chekhov

Han from China

unread,

Mar 26, 2009, 1:12:59 AM3/26/09

to

Richard Harter wrote:
>> Richard Harter, c...@tiac.net
>> http://home.tiac.net/~cri, http://www.varinoma.com
>> If I do not see as far as others, it is because
>> I stand in the footprints of giants.

Just to beat the others to it: Richard, DON'T QUOTE SIGS. It's
against THE RULES, and I need THE RULES to have some kind of
ORDER without STRESS, since with STRESS, my MEDICATION doesn't
have the full BENEFITS. Please DON'T quote SIGS. Thank YOU.

P.S. two CRACKS on the sidewalk, MUST avoid the CRACKS.

Phil Carmody

unread,

Mar 26, 2009, 3:32:35 AM3/26/09

to

CBFalconer <cbfal...@yahoo.com> writes:
> Richard Heathfield wrote:
>> CBFalconer said:
>>> pete wrote:
>>>> Keith Thompson wrote:
>>>>
>>>>> I wouldn't use
>>>>> while (condition);
>>>>> because it's just too terse for my tastes, and too easy to
>>>>> mistake for a typo.
>>>>
>>>> I would write that this way:
>>>>
>>>> while (condition) {
>>>> ;
>>>> }
>>>
>>> I would write:
>>>
>>> while (condition) continue;
>>
>> Putting the 'continue' on a separate line makes it clearer exactly
>> when the condition is met (when stepping through the code with a
>> debugger - not something I do a lot nowadays, but I used to, and
>> some people still do). And using the (optional) braces to mark off
>> the loop body is a good habit to get into, as it can save all kinds
>> of embarrassment later when the loop is maintained.
>
> Since those are not among my objectives, I would do none of them.

You never intend to maintain your code? Yikes, that's a
scary attitude.

Phil
--
Marijuana is indeed a dangerous drug.
It causes governments to wage war against their own people.
-- Dave Seaman (sci.math, 19 Mar 2009)

Flash Gordon

unread,

Mar 26, 2009, 3:07:21 AM3/26/09

to

Someone else once suggested
while (condition) continue;
I decided I like it so now use it.
--
Flash Gordon

Phil Carmody

unread,

Mar 26, 2009, 4:23:46 AM3/26/09

to

Unwrapping the echelon, putting names next to styles:

>>> On Mar 25, 5:14 pm, pete <pfil...@mindspring.com> wrote:

while (condition) {
;
}

>> luser-ex-troll <mij...@yahoo.com> writes:
while(condition){;}

> Nate Eldredge wrote:
while (condition) ;

while (condition) /* keep going */ ;

Flash Gordon <sm...@spam.causeway.com> writes:
while (condition) continue;

I can see continue's merits. I might try that and see if it fits.
I find I don't use continue for anything (I've even worked in a
company where it was against the coding standards as it was a
'confusing minority technique' or some crap like that), so it
seems as if it might feel odd typing it. At least the first few
times.

Traditionally I'm between Pete and l-e-t:
while(condition) { ; }
as I like to draw a little attention, but only a little, to the
empty block. I'm a religious block-rather-than-statement user.

nick_keigh...@hotmail.com

unread,

Mar 26, 2009, 4:33:00 AM3/26/09

to

On 26 Mar, 04:36, pete <pfil...@mindspring.com> wrote:
> Richard Harter wrote:
> > On Thu, 26 Mar 2009 02:03:51 GMT, c...@tiac.net (Richard Harter)
> > wrote:

> >> On Wed, 25 Mar 2009 17:14:58 -0500, pete <pfil...@mindspring.com>
> >> wrote:
> >>> Keith Thompson wrote:

> >>>> I wouldn't use
> >>>> while (condition);

> >>>> because it's just too terse for my tastes, and too easy to mistake for
> >>>> a typo.
> >>> I would write that this way:
>
> >>> while (condition) {
> >>> ;
> >>> }
> >> My view on this is that a loop without a body should be obvious
> >> to the reader. If the next line then it is a one line loop.
> > ^ is not indented
>
> I've seen code where the next line *is* indented.

that's my style

while (condition)
;

I like the ; on a line of it's own so it stands out

Richard Heathfield

unread,

Mar 26, 2009, 7:08:18 AM3/26/09

to

CBFalconer said:

> Richard Heathfield wrote:
>> CBFalconer said:

<snip>

>>> I would write:
>>>
>>> while (condition) continue;
>>
>> Putting the 'continue' on a separate line makes it clearer
>> exactly when the condition is met (when stepping through the code
>> with a debugger - not something I do a lot nowadays, but I used
>> to, and some people still do). And using the (optional) braces to
>> mark off the loop body is a good habit to get into, as it can
>> save all kinds of embarrassment later when the loop is
>> maintained.
>
> Since those are not among my objectives, I would do none of them.

That certainly shows consistency, anyway. But my advice was not
aimed at you. I have long since given up trying to educate you, but
your mistakes can be used to educate others.

pete

unread,

Mar 26, 2009, 7:31:11 AM3/26/09

to

I meant that I have seen code indented like this:

while (condition);
printf("Hello ");
printf("World\n");

> while (condition)
> ;
>
> I like the ; on a line of it's own so it stands out

--
pete

Richard Heathfield

unread,

Mar 26, 2009, 7:31:53 AM3/26/09

to

pete said:

<snip>

> I meant that I have seen code indented like this:
>
> while (condition);
> printf("Hello ");
> printf("World\n");

Ouch. And that's why I always use {}

pete

unread,

Mar 26, 2009, 8:19:46 AM3/26/09

to

pete wrote:

> I have never worked in a place where the coding conventions
> prohibited one line looops, but I have heard of it.

http://www.psgd.org/paul/docs/cstyle/cstyle08.htm

--
pete

Richard Harter

unread,

Mar 26, 2009, 10:18:32 AM3/26/09

to

On Thu, 26 Mar 2009 06:31:11 -0500, pete <pfi...@mindspring.com>
wrote:

I dare say you have; people have all sorts of notions. Perhaps I
am wrong but I would have supposed that good coders indent only
when a new block is started or when a single statement is being
written on more than one line. (Or in special situations not
covered by these two cases. :-))

IMO the major function of indentation is to visually represent
subordination, i.e., B is indented with respect to A if B in some
sense is a part of A. One shouldn't indent unless there is a
specific reason for indenting.

An objections to the formatting in the example code is that it is
fragile. It suggests that the indented statement is subordinate
when it is not. Regardless of that it is all too easy to miss the
presence or absence of a semicolon on the "while" line.

My view is that one always uses braces unless (a) the body does
not contain multiple lines and (b) it can fit on the line.

These are style issues; as such they are a matter of personal
preference. Still, there are guides to good style, such as
simplicity and consistency.

Keith Thompson

unread,

Mar 26, 2009, 11:17:04 AM3/26/09

to

c...@tiac.net (Richard Harter) writes:
> On Thu, 26 Mar 2009 06:31:11 -0500, pete <pfi...@mindspring.com> wrote:

[...]

>>I meant that I have seen code indented like this:
>>
>> while (condition);
>> printf("Hello ");
>> printf("World\n");
>>
>>
>>> while (condition)
>>> ;
>>>
>>> I like the ; on a line of it's own so it stands out
>
> I dare say you have; people have all sorts of notions. Perhaps I
> am wrong but I would have supposed that good coders indent only
> when a new block is started or when a single statement is being
> written on more than one line. (Or in special situations not
> covered by these two cases. :-))

[...]

I've seen too many cases where the indentation is inconsistent because
the author had a different tabstop setting than I do (e.g., 4 columns
vs. 8). I've even seen inconsistencies within the same file, where
apparently two maintainers had different tabstop settings.

Coding standards should ban the use of tab characters in C source
code.

Richard

unread,

Mar 26, 2009, 11:45:15 AM3/26/09

to

You echo my thoughts oh great one.

--
"Avoid hyperbole at all costs, its the most destructive argument on
the planet" - Mark McIntyre in comp.lang.c

Richard

unread,

Mar 26, 2009, 11:47:44 AM3/26/09

to

Flash Gordon <sm...@spam.causeway.com> writes:

How horrendous. The first thing I looked for seeing that was a higher
level loop.

Hint : "while(--x);" executes no consitional statement so include no
conditional statement.

If a C programmer can not see what

while(x--);

does then he has no business in the code and will certainly screw up at

while(*d++=*s++);

time.

Richard Harter

unread,

Mar 26, 2009, 11:49:18 AM3/26/09

to

On Thu, 26 Mar 2009 08:17:04 -0700, Keith Thompson
<ks...@mib.org> wrote:

>c...@tiac.net (Richard Harter) writes:
>> On Thu, 26 Mar 2009 06:31:11 -0500, pete <pfi...@mindspring.com> wrote:
>[...]
>>>I meant that I have seen code indented like this:
>>>
>>> while (condition);
>>> printf("Hello ");
>>> printf("World\n");
>>>
>>>
>>>> while (condition)
>>>> ;
>>>>
>>>> I like the ; on a line of it's own so it stands out
>>
>> I dare say you have; people have all sorts of notions. Perhaps I
>> am wrong but I would have supposed that good coders indent only
>> when a new block is started or when a single statement is being
>> written on more than one line. (Or in special situations not
>> covered by these two cases. :-))
>[...]
>
>I've seen too many cases where the indentation is inconsistent because
>the author had a different tabstop setting than I do (e.g., 4 columns
>vs. 8). I've even seen inconsistencies within the same file, where
>apparently two maintainers had different tabstop settings.
>
>Coding standards should ban the use of tab characters in C source
>code.

I've seen the same thing and I agree about banning tab
characters. One of the conveniences of the vslick editor that I
use is that by default it automatically converts tabs into spaces
when text is entered. I imagine that other editors have
something similar.

Ben Bacarisse

unread,

Mar 26, 2009, 12:19:45 PM3/26/09

to

Richard <rgr...@gmail.com> writes:

> Flash Gordon <sm...@spam.causeway.com> writes:
<snip>

>> Someone else once suggested
>> while (condition) continue;
>> I decided I like it so now use it.
>
> How horrendous. The first thing I looked for seeing that was a higher
> level loop.

It is almost impossible code so as to guard against all possible
misunderstandings.

> Hint : "while(--x);" executes no consitional statement so include no
> conditional statement.

That is not right -- the ; is not part of the while. The above /does/
execute a statement conditionally, the question is simply about how
best to write that it. C offers two obvious choices: an expression
statement with no expression, and a "continue" statement. My
preference is, like you, for the empty expression statement but I have
used the other form in the past.

> If a C programmer can not see what
>
> while(x--);
>
> does then he has no business in the code

I would say the same about while(x--) continue;

--
Ben.

Richard

unread,

Mar 26, 2009, 12:35:39 PM3/26/09

to

Ben Bacarisse <ben.u...@bsb.me.uk> writes:

> Richard <rgr...@gmail.com> writes:
>
>> Flash Gordon <sm...@spam.causeway.com> writes:
> <snip>
>>> Someone else once suggested
>>> while (condition) continue;
>>> I decided I like it so now use it.
>>
>> How horrendous. The first thing I looked for seeing that was a higher
>> level loop.
>
> It is almost impossible code so as to guard against all possible
> misunderstandings.

Well, err, yes.

>
>> Hint : "while(--x);" executes no consitional statement so include no
>> conditional statement.
>
> That is not right -- the ; is not part of the while. The above /does/
> execute a statement conditionally, the question is simply about how
> best to write that it. C offers two obvious choices: an expression
> statement with no expression, and a "continue" statement. My
> preference is, like you, for the empty expression statement but I have
> used the other form in the past.

Incredibly pedantic Ben. I just threw that in. In this case in question
it is always the case.

>
>> If a C programmer can not see what
>>
>> while(x--);
>>
>> does then he has no business in the code
>
> I would say the same about while(x--) continue;

You would but only to be contrary:-; Tne point is that there IS extra code
here that is not required. That is "un-C-like" IMO.

My point with the above is that I saw the "continue" before the While as
I scanned down.

One can not legislate for everyone so keep the code terse and
"C". Adding in empty brackets etc is akin to #define PRINT in my
experience.

luser-ex-troll

unread,

Mar 26, 2009, 12:41:52 PM3/26/09

to

On Mar 26, 11:19 am, Ben Bacarisse <ben.use...@bsb.me.uk> wrote:
> Richard <rgrd...@gmail.com> writes:

I both agree and disagree with both of you. Although while(cond); is
syntactically valid, and any C programmer worth their salt should
understand this if they look at it. But one indulges in speculation
about how best to offer that minimum bit of help which may be needed
by some future reader without being obnoxious to those who can grok it
without such aid.

For more fuel to the fire, I've investigated the hodge-podge of int-
bool interpretations in this little program and am surprised at the
results. Is it normal for different functions in the same file to have
such wildly differing notions about good and evil?

#include <ctype.h>
#include <stdio.h>
#include <string.h>

/* character classes */
#define eq(a,b) (int)a==b
#define within(a,b) strchr(a,b)!=NULL
int israd (int c){return eq('#',c);}
int isdot (int c){return eq('.',c);}
int ise (int c){return within("eE",c);}
int issign (int c){return within("+-",c);}
int isdelim (int c){return within("()<>{}[]%/",c);}
int isregular(int c)
{return c!=EOF && !isspace(c) && !isdelim(c);}
#undef within
#undef eq

typedef struct { int (*fp)(int); int y, n; } test;

/* n.b. this machine has no x+, use xx* */
/* ^[+-]?\d+$ */
test fsm_dec[] = {
/* 0*/ { issign, 1, 1 },
/* 1*/ { isdigit, 2, -1 }, /* [+-]?! ??(\d)?? */
/* 2*/ { isdigit, 2, -1 }, /* [+-]?\d\d* yes! */
};
int acc_dec(int i){ return i==2; } /*acceptable decimal?*/

/* ^\d+[#][a-Z0-9]+$ */
test fsm_rad[] = {
/* 0*/ { isdigit, 1, -1 },
/* 1*/ { isdigit, 1, 2 }, /* \d\d* */
/* 2*/ { israd, 3, -1 }, /* \d\d*[^\d] */
/* 3*/ { isalnum, 4, -1 }, /* \d\d*# */
/* 4*/ { isalnum, 4, -1 }, /* \d\d*#\x\x* yes! */
};
int acc_rad(int i){ return i==4; } /*acceptable radix?*/

/* ^[+-]?(\d+(\.\d*)?)|(\d*\.\d+)([eE][+-]?\d+)?$ */
test fsm_real[] = {
/* 0*/ { issign, 1, 1 },
/* 1*/ { isdigit, 2, 4 }, /* [+-]? */
/* 2*/ { isdigit, 2, 3 }, /* [+-]?\d\d* yes! */
/* 3*/ { isdot, 6, 7 }, /* [+-]?\d\d*[^\d] */
/* 4*/ { isdot, 5, -1 }, /* [+-]?[^\d] */
/* 5*/ { isdigit, 6, -1 }, /* s?\. where s is [+-] */
/* 6*/ { isdigit, 6, 7 }, /* s?(\d\d*)?\.\d* yes! */
/* 7*/ { ise, 8, -1 }, /* s?(\d\d*)?(\.\d*)? */
/* 8*/ { issign, 9, 9 }, /* s?\d*(\.\d*)?[eE] */
/* 9*/ { isdigit, 10, -1 }, /* s?\d*(\.\d*)?[eE][+-]? */
/*10*/ { isdigit, 10, -1 }, /* s?\d*(\.\d*)?[eE]s?\d\d* yes! */
};
/*acceptable real*/
int acc_real(int i){
switch(i) { case 2: case 6: case 10: return 1; }
return 0; }

/*1/0*/
/*what's the reverse of reverse polish?*/
int czek(char *s, test *fsm, int(*yes)(int)){
int sta = 0;
while(sta!=-1 && *s) {
if (fsm[sta].fp((int)*s)) { sta=fsm[sta].y; s++; }
else { sta=fsm[sta].n; } }
return yes(sta); }

/*0/-1*/
int grok(char *s) { /*dig?*/
if (czek(s, fsm_dec, acc_dec )) {
printf( "dec: %s\n", s); return 0; }
else if (czek(s, fsm_rad, acc_rad )) {
printf( "rad: %s\n", s); return 0; }
else if (czek(s, fsm_real,acc_real)) {
printf("real: %s\n", s); return 0; }
/*else*/
printf("grok? %s\n", s);
return -1; }

/*0/-1*/
int puff(char *buf, int nbuf) {
int c; char *s = buf;
while ( (c=getchar()), isregular(c) ) {
if(s-buf >= nbuf-1) return -1;
*s++ = (char)c; }
*s = (char)0; (void)ungetc(c,stdin);
return 0; }

/*1/-1*/
int toke(char *buf, int nbuf) {
int sta = 0; char *s=buf;
while (isspace(*s=(char)getchar())) /*jump down, turn around*/;
if ((sta=puff(++s,nbuf-1))!=-1)
sta=grok(buf);
return sta; }

/*0/1*/
#define NBUF 10
int main(void) { char buf[NBUF] = "";
while (-1 != toke(buf,NBUF)) /*pick a bale a day*/;
return 0; }

/*eof*/

Richard

unread,

Mar 26, 2009, 1:18:10 PM3/26/09

to

luser-ex-troll <mij...@yahoo.com> writes:

While sounding reasonable your thinking is basically flawed. If you need
to add "help" for someone to understand

while(x--);

then where do you end?

Do you add a comment to explain why you are NOT casting malloc()?

Someone that does not understand that expression has no place modifying
the code.

The semi colon is an important part of C and, rightly or wrongly, one
must learn to put precedence on recognising it.

I've heard and read all the explanations from people over the years
about how they think they are hinting the code. But, in my experience of
moving around and seeng code bases from many people, all the individual
"initiatives" to help "less experienced" programmers failed because of
one important thing:

* What the original programmer thought of as a problem was not a problem
for others.

As a result his "help" merely obfuscated the code and introduced
unnecessary bulk.

Write it in terse C with comments only where appropriate.

It really is like the pascal guy who #defines BEGIN.

jameskuyper

unread,

Mar 26, 2009, 1:20:02 PM3/26/09

to

Ben Bacarisse wrote:
> Richard <rgr...@gmail.com> writes:
...

> > Hint : "while(--x);" executes no consitional statement so include no
> > conditional statement.
>
> That is not right -- the ; is not part of the while.

I'm not quite sure what you mean by that objection. It's certainly not
a part of the keyword "while", but it is part of a while statement.

The relevant grammatical production is described in section 6.8.5:
> iteration-statement:
> while ( expression ) statement

The ';' in "while(--x);" is part of the "statement" portion of an
iteration statement. It is no less a part of the while statement than
the 'x' is.

Default User

unread,

Mar 26, 2009, 1:32:27 PM3/26/09

to

Richard Heathfield wrote:

> CBFalconer said:

> > I would write:
> >
> > while (condition) continue;
>
> Putting the 'continue' on a separate line makes it clearer exactly
> when the condition is met (when stepping through the code with a
> debugger - not something I do a lot nowadays, but I used to, and
> some people still do). And using the (optional) braces to mark off

> the loop body is a good habit to get into, as it can save all kinds

> of embarrassment later when the loop is maintained.

I tend to prefer using the same structure for the empty block as I
would if it had something in it. I would not normally put in a continue
statement, but just leave it empty.

while (condition)
{
}

A comment in the compound statement would be a good idea as well. That
makes it extra clear that it wasn't some sort of editing error.

Brian

--
Day 51 of the "no grouchy usenet posts" project

Richard Heathfield

unread,

Mar 26, 2009, 1:52:54 PM3/26/09

to

Default User said:

<snip>

> I would not normally put in a
> continue statement, but just leave it empty.
>
> while (condition)
> {
> }
>
> A comment in the compound statement would be a good idea as well.
> That makes it extra clear that it wasn't some sort of editing
> error.

Yes, but that's precisely why I use continue - it does the same job
as a comment, but is quicker to type than
/* this line intentionally left blank */

:-)

Flash Gordon

unread,

Mar 26, 2009, 2:29:33 PM3/26/09

to

I don't.

> Hint : "while(--x);" executes no consitional statement so include no
> conditional statement.
>
> If a C programmer can not see what
>
> while(x--);
>
> does then he has no business in the code and will certainly screw up at
>
> while(*d++=*s++);
>
> time.

I agree that a programmer should be able to see and understand what all
of the other alternatives mean. To me it is more a matter of which will
make it clearest that I intended that there is no body.

I'm not really bothered about which method people use for an empty body
and would happily switch to whatever method was being used by anyone
else I happened to be working with.
--
Flash Gordon

Ben Bacarisse

unread,

Mar 26, 2009, 3:17:37 PM3/26/09

to

jameskuyper <james...@verizon.net> writes:

> Ben Bacarisse wrote:
>> Richard <rgr...@gmail.com> writes:
> ...
>> > Hint : "while(--x);" executes no consitional statement so include no
>> > conditional statement.
>>
>> That is not right -- the ; is not part of the while.
>
> I'm not quite sure what you mean by that objection. It's certainly not
> a part of the keyword "while", but it is part of a while statement.

Yes, it was not a good way to put it. I thought it became clear later
with the text you've snipped. By saying '"while(--x);" executes no
consitional[sic] statement so include no conditional statement'
Richard seems to be suggesting that there is no sub-statement
conditionally controlled by the (outer) while. If there were true,
the ; would, most likely, come from the production for "while" but it
does not as you illustrate:

> The relevant grammatical production is described in section 6.8.5:
>> iteration-statement:
>> while ( expression ) statement
>
> The ';' in "while(--x);" is part of the "statement" portion of an
> iteration statement. It is no less a part of the while statement than
> the 'x' is.

--
Ben.

rio

unread,

Mar 26, 2009, 6:25:26 PM3/26/09

to

"Richard" <rgr...@gmail.com> ha scritto nel messaggio
news:gqg831$866$3...@news.motzarella.org...

> Flash Gordon <sm...@spam.causeway.com> writes:
>
>> Nate Eldredge wrote:
>>> luser-ex-troll <mij...@yahoo.com> writes:
>>>
>>>> On Mar 25, 5:14 pm, pete <pfil...@mindspring.com> wrote:
>>>>> Keith Thompson wrote:
>>>>>> I wouldn't use
>>>>>> while (condition);
>>>>>> because it's just too terse for my tastes, and too easy to mistake for
>>>>>> a typo.
>>>>> I would write that this way:
>>>>>
>>>>> while (condition) {
>>>>> ;
>>>>> }

i not use while() much these times

>>>>> --
>>>>> pete
>>>> nice. but what about:
>>>>
>>>> while(condition){;}
>>>
>>> I personally tend to write
>>>
>>> while (condition) ;
>>>
>>> I think the added space draws more attention to the empty body, since I
>>> ordinarily don't leave spaces before semicolons at the end of a statement.
>>>
>>> I also sometimes do
>>>
>>> while (condition) /* keep going */ ;
>>
>> Someone else once suggested
>> while (condition) continue;
>> I decided I like it so now use it.
>
> How horrendous. The first thing I looked for seeing that was a higher
> level loop.
>
> Hint : "while(--x);" executes no consitional statement so include no
> conditional statement.

> If a C programmer can not see what
>
> while(x--);

> does then he has no business in the code and will certainly screw up at

the above should be the same of "x=-1;" right??
( should be the same of w[x++]=a but with x--)
x=3, x--=(2,3)(1,2)(0,-1)

how many of you can do this without one compiler or book?

x--, x++ , ++x, ++x

it is all over complicated "++x" is enoughf

> while(*d++=*s++);

this is over complex too.
"while(*r++=*s++);" is not more easy than

.0: a=*s|++s|*r=a|++r|a#.0

or in C
W: a=*s;++s;*r=a;++r; if(a) goto W;

but no one would believe in that
and in the facts that a multiple instructions for line are ok
etc etc

rio

unread,

Mar 26, 2009, 6:38:28 PM3/26/09

to

"rio" <a@b.c> ha scritto nel messaggio
news:49cbffb4$0$1113$4faf...@reader3.news.tin.it...

> "Richard" <rgr...@gmail.com> ha scritto nel messaggio
> news:gqg831$866$3...@news.motzarella.org...

>> Flash Gordon <sm...@spam.causeway.com> writes:
>>
>>> Nate Eldredge wrote:
>>>> luser-ex-troll <mij...@yahoo.com> writes:
>>>>
>>>>> On Mar 25, 5:14 pm, pete <pfil...@mindspring.com> wrote:
>>>>>> Keith Thompson wrote:
>> while(x--);
>
>> does then he has no business in the code and will certainly screw up at
>

> the above should be the same of "x=-1;" right??
> ( should be the same of w[x++]=a but with x--)
> x=3, x--=(2,3)(1,2)(0,-1)

(value_in_x, return_value)
x=3, x--=(2,3)(1,2)(0,1)(-1,0)

Barry Schwarz

unread,

Mar 26, 2009, 9:19:15 PM3/26/09

to

On Wed, 25 Mar 2009 06:57:50 -0700 (PDT), luser-ex-troll
<mij...@yahoo.com> wrote:

>On Mar 25, 7:08 am, Barry Schwarz <schwa...@dqel.com> wrote:
>> On Tue, 24 Mar 2009 20:46:46 -0700 (PDT), luser-ex-troll

snip

>> >int check(char *buf, test *fsm, int(*yes)(int)){ char *s = buf; int
>> >sta = 0;
>>
>> Notice how usenet adds line breaks you didn't intend at inopportune
>> points. Additionally, multiple statements per line tend to hinder
>> readability.
>
>Yes, irritating, but acknowledged.
>

snip

>> > if (check(buf,decimal,dec_accept)) { printf( "dec: %s\n",
>> >buf); return 0; }
>>
>> Here it is even worse. And it destroys your attempt at consistent
>> indentation.
>
>I know. But it's so pretty with 85 columns. I'll split such things for
>future postings, but I'm keeping it this way on disk.

Bad idea. If you are having problems with code, the only way we can
help is if you post the exact code (preferably using cut and paste).
While adding new lines is probably low risk, it is not risk free.

snip

>Thanks a bunch. I'll keep the lines much shorter to guard against ugly
>splits. Is there any way to defend against those extra newlines?

Decide where a visual break will introduce the least problem for the
reader. For example, the above if could be

if (check(buf,decimal,dec_accept))
{printf( "dec: %s\n", buf);
return 0;
}

--
Remove del for email

Barry Schwarz

unread,

Mar 26, 2009, 9:19:15 PM3/26/09

to

On Wed, 25 Mar 2009 11:55:07 -0700 (PDT), luser-ex-troll
<mij...@yahoo.com> wrote:

>Traced, debugged, sieved, and splinted; is it stylish yet?

snip

>/* ^[+-]?(\d+(\.\d*)?)|(\d*\.\d+)([eE][+-]?\d+)?$ */
>test fsm_real[] = {
>/* 0*/ { issign, 1, 1 },
>/* 1*/ { isdigit, 2, 4 }, /* [+-]? */
>/* 2*/ { isdigit, 2, 3 }, /* [+-]?\d\d* yes! */
>/* 3*/ { isdot, 6, 7 }, /* [+-]?\d\d*[^\d] */
>/* 4*/ { isdot, 5, -1 }, /* [+-]?[^\d] */

>/* 5*/ { isdigit, 6, -1 }, /* [+-]?\. */
>/* 6*/ { isdigit, 6, 7 }, /* [+-]?(\d\d*)?\.\d* yes! */
>/* 7*/ { ise, 8, -1 }, /* [+-]?(\d\d*)?(\.\d*)? */
>/* 8*/ { issign, 9, 9 }, /* [+-]?(\d\d*)?(\.\d*)?[eE] */
>/* 9*/ { isdigit, 10, -1 }, /* [+-]?(\d\d*)?(\.\d*)?[eE][+-]? */
>/*10*/ { isdigit, 10, -1 }, /* [+-]?(\d\d*)?(\.\d*)?[eE][+-]?\d\d*
>yes! */

Just out of curiosity, I wonder why 2e7 is not valid?

Barry Schwarz

unread,

Mar 26, 2009, 9:19:15 PM3/26/09

to

On Wed, 25 Mar 2009 06:57:50 -0700 (PDT), luser-ex-troll
<mij...@yahoo.com> wrote:

>On Mar 25, 7:08 am, Barry Schwarz <schwa...@dqel.com> wrote:
>> On Tue, 24 Mar 2009 20:46:46 -0700 (PDT), luser-ex-troll

snip

>> >int puff(char *buf, int nbuf) { char *s = buf; int c;
>> > while ((c=getchar()) != EOF) {
>>
>> It would be more user friendly if you used '\n' as your terminator
>> rather than EOF.
>>
>
>I don't understand: the program should reject an EOF and demand the
>line be finished?! A newline separator is handled by isspace on the
>next line.

Most user input is terminated with the Enter key which results in
getchar returning '\n'. Forcing the user to figure out how to enter
EOF (it gets asked fairly frequently and is system specific) is just
unfriendly. I don't think I know how to force EOF on my IBM mainframe
except at the start of a line with a "/*" which means I have to give
you the '\n' anyway.

If you like, check for both. (Some here even insist on it.)

>> > if(isspace(c) || isdelim(c))
>> > break;
>> > if(nbuf < s-buf-1)
>> > return -1;
>> > *s++ = c;
>> > }
>> > *s++ = 0;
>
>> >return 0; }

Ben Bacarisse

unread,

Mar 26, 2009, 10:11:26 PM3/26/09

to

Barry Schwarz <schw...@dqel.com> writes:

It is valid according to the PostScript reference manual I have, and it
seems to be according to my reading of the comments. Does the code
not honour the comments?

The comments seem to suggest that the code will take e, E and E45 as
floating point numbers and this is not what my reference says. My
reading of the PS text is that floating point number has the form

[+-]?(\d+.\d*|\d*.\d+|\d+)([eE][+-]?\d+)?

--
Ben.

CBFalconer

unread,

Mar 26, 2009, 8:32:38 PM3/26/09

to

Richard Harter wrote:
> Keith Thompson <ks...@mib.org> wrote:
>> c...@tiac.net (Richard Harter) writes:
>>

... snip ...

>>
>>> I dare say you have; people have all sorts of notions. Perhaps
>>> I am wrong but I would have supposed that good coders indent
>>> only when a new block is started or when a single statement is
>>> being written on more than one line. (Or in special situations
>>> not covered by these two cases. :-))
>>
>> [...]
>>
>> I've seen too many cases where the indentation is inconsistent
>> because the author had a different tabstop setting than I do
>> (e.g., 4 columns vs. 8). I've even seen inconsistencies within
>> the same file, where apparently two maintainers had different
>> tabstop settings.
>>
>> Coding standards should ban the use of tab characters in C
>> source code.
>
> I've seen the same thing and I agree about banning tab
> characters. One of the conveniences of the vslick editor that
> I use is that by default it automatically converts tabs into
> spaces when text is entered. I imagine that other editors have
> something similar.

And then there are the Vedit users, who can list a whole set of tab
stops. Very handy, but not too useful for indented code. The wise
user includes a comment in his original which specifies how to set
up the tabs.

--
[mail]: Chuck F (cbfalconer at maineline dot net)
[page]: <http://cbfalconer.home.att.net>
Try the download section.

CBFalconer

unread,

Mar 26, 2009, 8:37:36 PM3/26/09

to

Richard Heathfield wrote:
> pete said:
>
> <snip>
>
>> I meant that I have seen code indented like this:
>>
>> while (condition);
>> printf("Hello ");
>> printf("World\n");
>
> Ouch. And that's why I always use {}

Then how do you code printing:

"Hello Hello Hello Hello Hello World\n"

with a user controllable count of "Hello"s? :-)

luser-ex-troll

unread,

Mar 26, 2009, 10:31:29 PM3/26/09

to

On Mar 26, 8:19 pm, Barry Schwarz <schwa...@dqel.com> wrote:
> On Wed, 25 Mar 2009 11:55:07 -0700 (PDT), luser-ex-troll
>

I wonder why you wonder.
I'm getting correct recognition of 2e7. Perhaps there was a transient
error in one of the intermediate versions, but the current one accepts
2e7 just fine; and the machine quoted here appears to be the correct
one. Let's trace it to make sure.

in state 0, issign('2') returns false, transit to 1.
in state 1, isdigit('2') returns true, transit to 2, increment s.
in state 2, isdigit('e') returns false, transit to 3.
in state 3, isdot('e') returns false, transit to 7.
in state 7, ise('e') returns true, transit to 8, increment s.
in state 8, issign('7') returns false, transit to 9.
in state 9, isdigit('7') returns true, transit to 10, increment s.
s now points to a nul character and so the loop ends,
and acc_real(10) returns true indicating the machine
terminated in an acceptable final state.

I cannot reproduce the error. It appears correct to me.
--
lxt

luser-ex-troll

unread,

Mar 26, 2009, 10:34:22 PM3/26/09

to

On Mar 26, 8:19 pm, Barry Schwarz <schwa...@dqel.com> wrote:
> On Wed, 25 Mar 2009 06:57:50 -0700 (PDT), luser-ex-troll
>

> <mijo...@yahoo.com> wrote:
> >On Mar 25, 7:08 am, Barry Schwarz <schwa...@dqel.com> wrote:
> >> On Tue, 24 Mar 2009 20:46:46 -0700 (PDT), luser-ex-troll
>
> snip
>
> >> >int puff(char *buf, int nbuf) { char *s = buf; int c;
> >> > while ((c=getchar()) != EOF) {
>
> >> It would be more user friendly if you used '\n' as your terminator
> >> rather than EOF.
>
> >I don't understand: the program should reject an EOF and demand the
> >line be finished?! A newline separator is handled by isspace on the
> >next line.
>
> Most user input is terminated with the Enter key which results in
> getchar returning '\n'. Forcing the user to figure out how to enter
> EOF (it gets asked fairly frequently and is system specific) is just
> unfriendly. I don't think I know how to force EOF on my IBM mainframe
> except at the start of a line with a "/*" which means I have to give
> you the '\n' anyway.
>
> If you like, check for both. (Some here even insist on it.)

I'm fairly certain that I am.
Right here, in fact|
V

> >> > if(isspace(c) || isdelim(c))
> >> > break;
> >> > if(nbuf < s-buf-1)
> >> > return -1;
> >> > *s++ = c;
> >> > }
> >> > *s++ = 0;
>
> >> >return 0; }

--
lxt

pete

unread,

Mar 26, 2009, 10:38:22 PM3/26/09

to

CBFalconer wrote:
> Richard Heathfield wrote:
>> pete said:
>>
>> <snip>
>>
>>> I meant that I have seen code indented like this:
>>>
>>> while (condition);
>>> printf("Hello ");
>>> printf("World\n");
>> Ouch. And that's why I always use {}
>
> Then how do you code printing:
>
> "Hello Hello Hello Hello Hello World\n"
>
> with a user controllable count of "Hello"s? :-)

Not with the code statements which are shown above.

The number of "Hello "'s
that the shown code statements can output,
is limited to one.

--
pete

Keith Thompson

unread,

Mar 26, 2009, 11:04:34 PM3/26/09

to

No, the wise user uses only spaces, never tabs, for indentation.

Which is better, a comment telling every person viewing the file how
to set up the tabs (possibly using different methods depending on
which editor or other tool you're using), or using spaces and thereby
cleanly eliminating the need for either the comment or the setup?

Yes, this is my personal opinion, but it's a very strongly held one.

CBFalconer

unread,

Mar 26, 2009, 11:29:32 PM3/26/09

to

Keith Thompson wrote:
> CBFalconer <cbfal...@yahoo.com> writes:
>
... snip ...

>
>> And then there are the Vedit users, who can list a whole set of
>> tab stops. Very handy, but not too useful for indented code.
>> The wise user includes a comment in his original which specifies
>> how to set up the tabs.
>
> No, the wise user uses only spaces, never tabs, for indentation.
>
> Which is better, a comment telling every person viewing the file
> how to set up the tabs (possibly using different methods
> depending on which editor or other tool you're using), or using
> spaces and thereby cleanly eliminating the need for either the
> comment or the setup?

Well, I was thinking of generic file editing, not C source. I
wasn't specific about it.

CBFalconer

unread,

Mar 26, 2009, 11:35:35 PM3/26/09

to

Not with the unshown lines, which included:

int i = 5;
and
#define condition i--

Besides, I think your assumptions cause infinite Hellos or zero.

Ben Bacarisse

unread,

Mar 27, 2009, 12:36:09 AM3/27/09

to

CBFalconer <cbfal...@yahoo.com> writes:

> pete wrote:
>> CBFalconer wrote:
>>> Richard Heathfield wrote:
>>>> pete said:
>>>>
>>>> <snip>
>>>>
>>>>> I meant that I have seen code indented like this:
>>>>>
>>>>> while (condition);
>>>>> printf("Hello ");
>>>>> printf("World\n");
>>>>
>>>> Ouch. And that's why I always use {}
>>>
>>> Then how do you code printing:
>>>
>>> "Hello Hello Hello Hello Hello World\n"
>>>
>>> with a user controllable count of "Hello"s? :-)
>>
>> Not with the code statements which are shown above.
>>
>> The number of "Hello "'s that the shown code statements can
>> output, is limited to one.
>
> Not with the unshown lines, which included:
>
> int i = 5;
> and
> #define condition i--

These would make no difference.

> Besides, I think your assumptions cause infinite Hellos or zero.
> :-)

No, you have missed the point of the example. It was to show how
confusing bad indentation can be, particularly when there is a tiny,
but significant, ';' on the while line.

--
Ben.

Richard Heathfield

unread,

Mar 27, 2009, 3:56:33 AM3/27/09

to

Keith Thompson said:

> CBFalconer <cbfal...@yahoo.com> writes:
<snip>

>>
>> And then there are the Vedit users, who can list a whole set of
>> tab stops. Very handy, but not too useful for indented code.
>> The wise user includes a comment in his original which specifies
>> how to set up the tabs.
>
> No, the wise user uses only spaces, never tabs, for indentation.

Given the context, I can understand your response, Keith.
Nevertheless, I cannot agree with your claim, because it is
equivalent to "the user who sometimes or always uses tabs for
indentation is unwise". Personally, I thought about this a long
time ago and decided to use spaces - but I know that some people,
just as wise as me if not wiser, have also thought about this and
reached the opposite conclusion. This isn't a matter of "my way
wise, his way unwise", but a matter of "what works for you". It's
one of those (very few) issues that can indeed be resolved by
postmodernist fudging. You use what you want, I'll use what I want,
and I'll run your code through indent before I try to read it (and
doubtless you will do the same to mine). As it happens, you and I
both chose spaces, so indent won't have much to do (in that
respect) - but just because we agree on this, it does not mean that
we are wiser than someone who chose tabs.

> Which is better, a comment telling every person viewing the file
> how to set up the tabs (possibly using different methods depending
> on which editor or other tool you're using), or using spaces and
> thereby cleanly eliminating the need for either the comment or the
> setup?

Fine - but if you now ask a tabs advocate to write a "which is
better", you may well find that he makes out an equally strong
case.

>
> Yes, this is my personal opinion, but it's a very strongly held
> one.

Perhaps just a touch /too/ strongly held?

Richard Heathfield

unread,

Mar 27, 2009, 4:13:53 AM3/27/09

to

CBFalconer said:

> Richard Heathfield wrote:
>> pete said:
>>
>> <snip>
>>
>>> I meant that I have seen code indented like this:
>>>
>>> while (condition);
>>> printf("Hello ");
>>> printf("World\n");
>>
>> Ouch. And that's why I always use {}
>
> Then how do you code printing:
>
> "Hello Hello Hello Hello Hello World\n"
>
> with a user controllable count of "Hello"s? :-)

Properly.

pete

unread,

Mar 27, 2009, 7:09:19 AM3/27/09

to

That's the entire point exactly.

--
pete

Ben Bacarisse

unread,

Mar 27, 2009, 11:55:18 AM3/27/09

to

luser-ex-troll <mij...@yahoo.com> writes:

I see you have correctly ignored my remark about string such as e2,
.e2 etc! I thought you might be getting these wrong based on the
later comments. I would correct these to reflect what has been seen
by the time the machine gets into that particular state. The last
line suggest that .e2 will be taken as a real but, presumably, it
won't be because of previous matches to get into state 10.

Also, I don't think your top comment is correct or at least it is a
little misleading:

/* ^[+-]?(\d+(\.\d*)?)|(\d*\.\d+)([eE][+-]?\d+)?$ */

suggests that '+2' will be taken as a real when it is not one (at not
least formally).

--
Ben.

Keith Thompson

unread,

Mar 27, 2009, 12:04:09 PM3/27/09

to

Richard Heathfield <r...@see.sig.invalid> writes:
> Keith Thompson said:
>> CBFalconer <cbfal...@yahoo.com> writes:
> <snip>
>>>
>>> And then there are the Vedit users, who can list a whole set of
>>> tab stops. Very handy, but not too useful for indented code.
>>> The wise user includes a comment in his original which specifies
>>> how to set up the tabs.
>>
>> No, the wise user uses only spaces, never tabs, for indentation.
>
> Given the context, I can understand your response, Keith.
> Nevertheless, I cannot agree with your claim, because it is
> equivalent to "the user who sometimes or always uses tabs for
> indentation is unwise". Personally, I thought about this a long
> time ago and decided to use spaces - but I know that some people,
> just as wise as me if not wiser, have also thought about this and
> reached the opposite conclusion. This isn't a matter of "my way
> wise, his way unwise", but a matter of "what works for you". It's
> one of those (very few) issues that can indeed be resolved by
> postmodernist fudging. You use what you want, I'll use what I want,
> and I'll run your code through indent before I try to read it (and
> doubtless you will do the same to mine). As it happens, you and I
> both chose spaces, so indent won't have much to do (in that
> respect) - but just because we agree on this, it does not mean that
> we are wiser than someone who chose tabs.

I don't want to pass all code through indent before reading it. For
one thing, depending on the environment, that's not always even an
option; for another, indent can change the layout in ways that
shouldn't affect what the compiler sees, but can adversely affect what
a human reader sees.

Different programmers use different brace placement, among other
things. I don't want to change that; for example, I might need to
conform to the existing style when I make changes and check them in.

And if there's a stray semicolon:
while (condition);
do_something;
indent will quietly change it to:
while (condition);
do_something;
making it harder to figure out what was actually intended. (That's
admittedly an unusual case.)

What I frequently do is run code through expand (a Unix tool that
replaces each tab with the right number of spaces) -- but figuring out
what options to pass to expand can be non-trivial. For many years, I
only saw code written with the assumption that tabstops are set every
8 columns, because that's the default setting on the Unix systems I
used. Now I often see code that looks incorrect unless I set tabstops
to 4 columns, or sometimes 2. There's no explicit indication of what
the tabstop setting should be; I just have to play with it until the
code looks right.

>> Which is better, a comment telling every person viewing the file
>> how to set up the tabs (possibly using different methods depending
>> on which editor or other tool you're using), or using spaces and
>> thereby cleanly eliminating the need for either the comment or the
>> setup?
>
> Fine - but if you now ask a tabs advocate to write a "which is
> better", you may well find that he makes out an equally strong
> case.

I'd be interested in seeing such an argument.

>> Yes, this is my personal opinion, but it's a very strongly held
>> one.
>
> Perhaps just a touch /too/ strongly held?

Perhaps -- except that I'm right and everyone who disagrees with me is
wrong. 8-)}

Back in the old days, I did use tabs for indentation -- but not
necessarily one tab per level. My usual style was to indent 3 columns
per level (I now use 4), but with tabstops set to 8 columns. So the
beginning of a deeply indented line might have one or more tabs
followed by one or more spaces. As long as everyone reading or
editing the code had their tabstops set to 8 columns, that was ok (and
it saved a little disk space). Later people seem to have gotten the
idea that each indentation level must be represented by a single tab
character, with the tabstop settings adjusted as necessary. But until
there's universal agreement on tabstop settings, there's always the
risk that code will be formatted inconsistently. I've found that the
best way to avoid inconsistency is to use spaces exclusively.

(Except in Makefiles, which require tab characters -- sigh.)

luserXtrog

unread,

Mar 27, 2009, 4:05:59 PM3/27/09

to

On Mar 27, 10:55 am, Ben Bacarisse <ben.use...@bsb.me.uk> wrote:

Thanks for the attention. I had some trouble coming up with an
expression that would guarantee at least one digit before or after the
decimal point (so .e1 would be rejected. I went around in circles
with: write an expression, draw the directed graph, fix expression,
fix graph, new expression, newfangled graph. When I came to a graph
that seemed to work, I just copied the expression that led me there.

The RE for reals that you posted is superior to mine. At some point I
plan to clean up the comments to better reflect the accumulated
knowledge that each state represents; but for the moment, I'm focused
on expanding the code to process the remaining syntactic entities so
it can be reincorporated into the larger project.

I wonder if I can modify the machine to count balanced parentheses in
strings?

--
laser-extra-glo

luserXtrog

unread,

Mar 27, 2009, 4:30:58 PM3/27/09

to

On Mar 27, 11:04 am, Keith Thompson <ks...@mib.org> wrote:
> Richard Heathfield <r...@see.sig.invalid> writes:
> > Keith Thompson said:

Vim has a :retab command which could help with this. To update a file
written with 4-column tabs to use 8-column tabs (& retain original
appearance), you could do:
:%s/^I/ /g
:%retab
I only recently saw the light and did this to all my files. Of course
this method will clobber any tabs which may be present for other
purposes (ie. in string literals), so may not be appropriate for code
acquired from elsewhere. In fact, one of the programs I've posted here
needlessly duplicated the standard isspace() function; it contained a
tab in a string literal that perhaps should be preserved during such a
conversion. I recommend investigation of the possibility of doing this
within your editor; be careful.

> > Fine - but if you now ask a tabs advocate to write a "which is
> > better", you may well find that he makes out an equally strong
> > case.
>
> I'd be interested in seeing such an argument.

As a former partisan for that side, I can give you what I thought was
an argument for it: It's easier. Of course, not being true (or not for
long, anyway), it's a pretty lousy argument.

> >> Yes, this is my personal opinion, but it's a very strongly held
> >> one.
>
> > Perhaps just a touch /too/ strongly held?
>
> Perhaps -- except that I'm right and everyone who disagrees with me is
> wrong. 8-)}
>
> Back in the old days, I did use tabs for indentation -- but not
> necessarily one tab per level. My usual style was to indent 3 columns
> per level (I now use 4), but with tabstops set to 8 columns. So the
> beginning of a deeply indented line might have one or more tabs
> followed by one or more spaces. As long as everyone reading or
> editing the code had their tabstops set to 8 columns, that was ok (and
> it saved a little disk space). Later people seem to have gotten the
> idea that each indentation level must be represented by a single tab
> character, with the tabstop settings adjusted as necessary. But until
> there's universal agreement on tabstop settings, there's always the
> risk that code will be formatted inconsistently. I've found that the
> best way to avoid inconsistency is to use spaces exclusively.

Your editor should have settings to do this transparently; so pressing
the tab key inserts 4 spaces, but a second press replaces those with a
TAB character (which is always 8 by this method).

for vim, it's:
:set softtabstop=4
and these are nice with it (tangentially related):
:set shiftwidth=4
:set autoindent
:set smartindent

> (Except in Makefiles, which require tab characters -- sigh.)
>

Yes, they really look weird when you're used to 4 columns.

--
lxt

luserXtrog

unread,

Mar 27, 2009, 6:14:33 PM3/27/09

to

On Mar 26, 8:19 pm, Barry Schwarz <schwa...@dqel.com> wrote:
> On Wed, 25 Mar 2009 06:57:50 -0700 (PDT), luser-ex-troll
>

I'm starting to appreciate this one. Two-space indents, no space after
initial bracketing, and the closing curly balances the opening one on
two axes. The condition is tight and the payload is loose.
Would padding out the closing curly allow this to be compressed thus?:

if (check(buf,decimal,dec_accept)) {
printf( "dec: %d\n", (int)strtol(buf,NULL,10));
return 0; }

Or is that sheer madness?

--
lose rex ball

Ian Collins

unread,

Mar 27, 2009, 6:38:05 PM3/27/09

to

Are you really trying to make your code as illegible as possible?
What's wrong with

if (check(buf,decimal,dec_accept))
{
printf( "dec: %s\n", buf);
return 0;
}

?

--
Ian Collins

luserXtrog

unread,

Mar 27, 2009, 6:51:12 PM3/27/09

to

On Mar 26, 5:25 pm, "rio" <a...@b.c> wrote:
> "Richard" <rgrd...@gmail.com> ha scritto nel messaggionews:gqg831$866$3...@news.motzarella.org...

>
>
>
> > Flash Gordon <s...@spam.causeway.com> writes:
>
> >> Nate Eldredge wrote:

It's valid, but fails my subjective test for "pretty".

> but no one would believe in that
> and in the facts that a multiple instructions for line are ok
> etc etc
>
> > time.

I have no opposition to multiple statements on a line, in fact one of
my goals is the maximum power per line (another subjective measure,
but I have a very small screen!). But the primary purpose of this
thread was to replace gotos with more appropriate structures (where
appropriate, iff appropriate). I think the unnecessary temporary
variable sinks this one.

But it appears this was motivated by a difficulty with (pre-/post-)fix
(in-/de)crement operations, so how about a little refresher.

The compiler (in a broad sense) discovers prefix operators first.
So it does them first. Then it finds the variable, so it resolves that
level of the expression to the value of that variable.
You just have to correlate the direction of time with the direction
the text reads and it should make sense naturally (they both go left
to right).

++ x ;
increment value do it
x ++ ;
value gonna get incremented now

* s ++ /*...*/
the thing at which value (points) (and increment value later)...
= * /*...*/
assumes the value of the thing at which ...
buf ++ /*...*/
value (points) (and increment value later)...
;
it's later. all increments shall have happened.

--
lxt

Ben Bacarisse

unread,

Mar 27, 2009, 7:21:13 PM3/27/09

to

luserXtrog <mij...@yahoo.com> writes:

> On Mar 27, 10:55 am, Ben Bacarisse <ben.use...@bsb.me.uk> wrote:

<snip>

>> I see you have correctly ignored my remark about string such as e2,
>> .e2 etc! I thought you might be getting these wrong based on the
>> later comments. I would correct these to reflect what has been seen
>> by the time the machine gets into that particular state. The last
>> line suggest that .e2 will be taken as a real but, presumably, it
>> won't be because of previous matches to get into state 10.
>>
>> Also, I don't think your top comment is correct or at least it is a
>> little misleading:
>>
>> /* ^[+-]?(\d+(\.\d*)?)|(\d*\.\d+)([eE][+-]?\d+)?$ */
>>
>> suggests that '+2' will be taken as a real when it is not one (at not
>> least formally).
>
> Thanks for the attention.

Well done for understanding it. I just read through it again (I do
before posting, I really do) and the typo rate is so high it is almost
nonsense.

<snip>

> I wonder if I can modify the machine to count balanced parentheses in
> strings?

It would not be hard I think. Obviously you can not do it with a
plain FSM, but the FSM driver (czek) could keep a counter. Each state
structure would need an extra +1, -1 or 0 to get added to the counter
when a "yes" transition occurs.

You could be a little more general and pass some sort of "match state"
structure to the checking functions. You could keep track of pretty
much anything that way but the extra generality might not be worth the
extra complexity.

In fact, a lot of lexical objects that have matching parenthesis are
so simple it may not be worth writing it into these FSM function. I
have, on occasion, just written a "collect_until_matching(']');"
function to do this sort of thing.

--
Ben.