Google Groups no longer supports new Usenet posts or subscriptions. Historical content remains viewable.
Dismiss

[PATCH] long option processing

11 views
Skip to first unread message

Luke Palmer

unread,
Apr 28, 2003, 5:20:36 AM4/28/03
to perl6-i...@perl.org
This patch implements a new, um, subsystem which does long option
processing. I also changed test_main.c to use it. I'll do imcc soon.

I added longopt.c, include/parrot/longopt.h, and docs/dev/longopt.dev.
I added these to MANIFEST.detailed and root.in. I'm not sure how to
send a patch with new files (if that's even possible), so I just
included the new files before the patch itself.

I was reluctant to make it a subsystemish thing, but if more than one
thing's going to use it, it needs a header. Is there a better place I
could put these things? Or name these things?

Oh, I'm not quite sure how to write tests for command invocations.
Some help?

So, if none of these caveats apply, here's the new files and the
patch. Enjoy!

Luke

=============================
include/parrot/longopt.h
=============================
/* longopt.h
* Copyright: (When this is determined...it will go here)
* CVS Info
* $Id$
* Overview:
* Command line option parsing (for pre-initialized code)
* Data Structure and Algorithms:
* History:
* Notes:
* References:
*/

#if !defined(PARROT_LONGOPT_H_GUARD)
#define PARROT_LONGOPT_H_GUARD

/* I use a char* here because this needs to be easily statically
* initialized, and because the interpreter is probably not running
* yet.
*/
typedef const char* longopt_string_t;

typedef enum {
OPTION_required_FLAG = 0x1
} OPTION_flags;

struct longopt_opt_decl {
int opt_short;
int opt_id;
OPTION_flags opt_flags;
longopt_string_t opt_long[10]; /* An array of long aliases */
};

struct longopt_opt_info {
int opt_index; /* The index within argv */
int opt_id; /* 0 signifies end of options */
longopt_string_t opt_arg; /* A pointer to any argument's position */
longopt_string_t opt_error;

const char* _shortopt_pos;
};

#define LONGOPT_OPT_INFO_INIT { 1, 0, NULL, NULL, NULL }

int longopt_get(int argc, const char* argv[],
const struct longopt_opt_decl options[],
struct longopt_opt_info* info_buf);

#endif

/*
* Local variables:
* c-indentation-style: bsd
* c-basic-offset: 4
* indent-tabs-mode: nil
* End:
*
* vim: expandtab shiftwidth=4:
*/

==========================
longopt.c
==========================
/* longopt.c
* Copyright: (When this is determined...it will go here)
* CVS Info
* $Id$
* Overview:
* Data Structure and Algorithms:
* History:
* Notes:
* References:
*/

#include "parrot/parrot.h"
#include "parrot/longopt.h"

static int longopt_get_longopt(int argc, const char* argv[],
const struct longopt_opt_decl options[],
struct longopt_opt_info* info_buf);

static int longopt_get_shortopt(int argc, const char* argv[],
const struct longopt_opt_decl options[],
struct longopt_opt_info* info_buf);

static char longopt_error_buffer[512];

/* longopt_get: Gets long or short options, specified
* in options[] (see longopt.dev). Call it iteratively
* with the same info_buf until it returns 0 or -1.
* 0 means end of options.
* -1 means error, with error put in info_buf->opt_error
* Any other value is a valid option id.
*/
int
longopt_get(int argc, const char* argv[],
const struct longopt_opt_decl options[],
struct longopt_opt_info* info_buf)
{
int dex = info_buf->opt_index;

info_buf->opt_id = 0;
info_buf->opt_arg = info_buf->opt_error = NULL;

if (dex >= argc || argv[dex] == NULL)
return 0;

if (argv[dex][0] != '-'
|| argv[dex][1] == '\0')
return 0;

if (info_buf->_shortopt_pos)
return longopt_get_shortopt(argc, argv, options, info_buf);

if (argv[dex][1] == '-') { /* Long option or -- */
if (argv[dex][2] == '\0') {
++info_buf->opt_index;
return 0;
}
else { /* Long option */
return longopt_get_longopt(argc, argv, options, info_buf);
}
}
else { /* Short option */
return longopt_get_shortopt(argc, argv, options, info_buf);
}
}


/* longopt_get_longopt: Find the option id of a long option.
* Fill info_buf appropriately, and return the option id.
* argv[info_buf->opt_index] is guaranteed to have at least
* three characters and start with --.
*/
static int
longopt_get_longopt(int argc, const char* argv[],
const struct longopt_opt_decl options[],
struct longopt_opt_info* info_buf)
{
int dex = info_buf->opt_index;
int optlen = 0;
const struct longopt_opt_decl* dptr;

while (argv[dex][optlen] != '\0' && argv[dex][optlen] != '=') {
optlen++;
}

for (dptr = options; dptr->opt_id; dptr++) {
int sptr;
/* For each listed long option... */
for (sptr = 0; dptr->opt_long[sptr]; sptr++) {
if (strncmp(dptr->opt_long[sptr], argv[dex], optlen) == 0
&& dptr->opt_long[sptr][optlen] == '\0') {
/* Found it */
info_buf->opt_id = dptr->opt_id;
++info_buf->opt_index;

/* XXX: (LP) if a longopt is given an argument when it's
* not expecting one, it is just ignored. Bad. */

if (dptr->opt_flags & OPTION_required_FLAG) {
if (argv[dex][optlen] == '=') {
info_buf->opt_arg = &argv[dex][optlen+1];
}
else {
info_buf->opt_arg = argv[dex+1];
++info_buf->opt_index;
}
}

return dptr->opt_id;
}
}
}

/* Couldn't find it. */
info_buf->opt_id = -1;
snprintf(longopt_error_buffer, 512,
"Option %s not known", argv[dex]);
info_buf->opt_error = longopt_error_buffer;
return -1;
}

/* longopt_get_shortopt: Find the option id of the next short option.
* This next short option may be in the middle of a bundle (-abcd),
* and info_buf->_shortopt_pos maintains a pointer into that bundle.
* argv[info_buf->opt_index] is guaranteed to be at least two characters
* long and start with a dash.
*/
static int
longopt_get_shortopt(int argc, const char* argv[],
const struct longopt_opt_decl options[],
struct longopt_opt_info* info_buf)
{
int dex = info_buf->opt_index;
const struct longopt_opt_decl* dptr;
const char* pos;


if (!info_buf->_shortopt_pos)
info_buf->_shortopt_pos = &argv[dex][1];
pos = info_buf->_shortopt_pos;

for (dptr = options; dptr->opt_id; dptr++) {
if (dptr->opt_short == *pos) {
/* Found it */
info_buf->opt_id = dptr->opt_id;

if (dptr->opt_flags & OPTION_required_FLAG) {
if (*(pos + 1)) {
info_buf->opt_arg = pos + 1;
}
else {
info_buf->opt_arg = argv[dex+1];
}
info_buf->_shortopt_pos = NULL;
++info_buf->opt_index;
}
else { /* No argument expected */
if (! *(pos + 1)) {
info_buf->_shortopt_pos = NULL;
++info_buf->opt_index;
}
else {
++info_buf->_shortopt_pos;
}
}

return dptr->opt_id;
}
}

/* Couldn't find it in the table */
info_buf->opt_id = -1;
snprintf(longopt_error_buffer, 512,
"Option -%c not known", *pos);
info_buf->opt_error = longopt_error_buffer;
return -1;
}


/*
* Local variables:
* c-indentation-style: bsd
* c-basic-offset: 4
* indent-tabs-mode: nil
* End:
*
* vim: expandtab shiftwidth=4:
*/


=========================
docs/dev/longopt.dev
=========================
=head1 TITLE

longopt.h / longopt.c

=head1 SUMMARY

These two files implement rudimentary long option parsing. They have
little to do with Parrot itself, other than that the parrot binary and
imcc both needed long options. So this gives it to them.

=head1 USAGE

To use longopt, you first need to #include "parrot/longopt.h"
(it comes with parrot/parrot.h, too). Then you need to set up
the options table, which is an array of C<struct longopt_opt_decl>s.

Each element of this array has four components: the short option,
the option id (generally the same as the short option), some flags,
and finally a list of up to nine long options (all for this one behavior),
terminated with a NULL pointer.

There is currently one possible flag: OPTION_required_FLAG, which
states that this option has a required argument. Optional arguments
are not supported, and they should be.

The array should be terminated with an element that has 0 for the
option id. So, for example:

struct longopt_opt_decl options[] = {
{ 'f', 'f', 0, { "--foo", NULL } },
{ 'b', 'b', OPTION_required_FLAG, { "--bar", NULL } },
{ 0, 128, 0, { "--baz", "--bazbar", NULL } },
{ 0, 0, 0, { NULL } }
};

This is a structure that specifies three options.

Some various ways you could give these options on the command line follow:

program --baz --bar=arg --foo somefile
program --bar arg -f somefile
program -f -b arg --bazbar somefile
program -barg -f somefile
program -fbarg somefile

So it basically behaves how most GNU programs do. It accepts - as a
real argument, and -- as a non argument, but that specifies that
only non-flags will follow. Again, just like GNU.

No options can follow a non-option, however. This is because programs
that this is written for, like parrot, usually want to pass options
given after the file to the file they're executing.

=head1 BUGS

It won't complain if you don't give it an argument to an option expecting
one. It will just set the opt_arg pointer to NULL.

It won't complain if you give an argument to an option not expecting one.
It will just ignore it (this only applies to the --foo=bar style).

=head1 AUTHOR

Luke Palmer <fibo...@babylonia.flatirons.org>

========================
Here's the PATCH
========================
Index: MANIFEST
===================================================================
RCS file: /cvs/public/parrot/MANIFEST,v
retrieving revision 1.334
diff -u -r1.334 MANIFEST
--- MANIFEST 27 Apr 2003 07:42:20 -0000 1.334
+++ MANIFEST 28 Apr 2003 09:07:12 -0000
@@ -132,6 +132,7 @@
docs/dev/dod.dev
docs/dev/infant.dev
docs/dev/jit_i386.dev
+docs/dev/longopt.dev
docs/dev/rx.dev
docs/embed.pod
docs/faq.pod
@@ -1257,6 +1258,7 @@
include/parrot/jit.h
include/parrot/key.h
include/parrot/list.h
+include/parrot/longopt.h
include/parrot/memory.h
include/parrot/method_util.h
include/parrot/misc.h
@@ -1729,6 +1731,7 @@
lib/Text/Balanced.pm
libnci.def
list.c
+longopt.c
make.pl
malloc.c
malloc-trace.c
Index: MANIFEST.detailed
===================================================================
RCS file: /cvs/public/parrot/MANIFEST.detailed,v
retrieving revision 1.5
diff -u -r1.5 MANIFEST.detailed
--- MANIFEST.detailed 27 Apr 2003 07:42:20 -0000 1.5
+++ MANIFEST.detailed 28 Apr 2003 09:07:13 -0000
@@ -131,6 +131,7 @@
[devel]doc docs/dev/dod.dev
[devel]doc docs/dev/infant.dev
[main]doc docs/dev/jit_i386.dev
+[main]doc docs/dev/longopt.dev
[main]doc docs/dev/rx.dev
[main]doc docs/embed.pod
[main]doc docs/faq.pod
@@ -1256,6 +1257,7 @@
[devel]include include/parrot/jit.h
[devel]include include/parrot/key.h
[devel]include include/parrot/list.h
+[devel]include include/parrot/longopt.h
[devel]include include/parrot/memory.h
[devel]include include/parrot/method_util.h
[devel]include include/parrot/misc.h
@@ -1728,6 +1730,7 @@
[devel] lib/Text/Balanced.pm
[] libnci.def
[] list.c
+[] longopt.c
[] make.pl
[] malloc.c
[] malloc-trace.c
Index: test_main.c
===================================================================
RCS file: /cvs/public/parrot/test_main.c,v
retrieving revision 1.61
diff -u -r1.61 test_main.c
--- test_main.c 18 Mar 2003 11:57:57 -0000 1.61
+++ test_main.c 28 Apr 2003 09:07:13 -0000
@@ -10,6 +10,7 @@
* References:
*/
#include "parrot/embed.h"
+#include "parrot/longopt.h"
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
@@ -19,6 +20,21 @@

char *parseflags(Parrot_Interp interpreter, int *argc, char **argv[]);

+static struct longopt_opt_decl options[] = {
+ { 'b', 'b', 0, { "--bounds-checks", NULL } },
+ { 'd', 'd', 0, { "--debugging", NULL } },
+ { 'h', 'h', 0, { "--help", NULL } },
+ { 'j', 'j', 0, { "--jit", NULL } },
+ { 'p', 'p', 0, { "--profiling", NULL } },
+ { 'P', 'P', 0, { "--predereferencing", NULL } },
+ { 'g', 'g', 0, { "--no-computed-goto", NULL } },
+ { 't', 't', 0, { "--tracing", NULL } },
+ { 'v', 'v', 0, { "--version", NULL } },
+ { '.', '.', 0, { "--wait", NULL } },
+ {'\0', 128, 0, { "--gc-debug", NULL } },
+ {'\0', 0, 0, { NULL } }
+};
+
static void usage(void);

static void version(void);
@@ -58,20 +74,28 @@
char *
parseflags(Parrot_Interp interpreter, int *argc, char **argv[])
{
+ struct longopt_opt_info opt = LONGOPT_OPT_INFO_INIT;
+
if (*argc == 1) {
usage();
}

- /* skip the program name arg */
- (*argc)--;
- (*argv)++;
+ ++*argv;
+ --*argc;
+ opt.opt_index = 0;
+

#ifdef HAVE_COMPUTED_GOTO
setopt(PARROT_CGOTO_FLAG);
#endif

- while ((*argc) && (*argv)[0][0] == '-') {
- switch ((*argv)[0][1]) {
+ while (longopt_get(*argc, *argv, options, &opt)) {
+ if (opt.opt_id == -1) {
+ fprintf(stderr, "parrot: %s\n", opt.opt_error);
+ Parrot_exit(1);
+ }
+
+ switch (opt.opt_id) {
case 'b':
setopt(PARROT_BOUNDS_FLAG);
break;
@@ -107,35 +131,27 @@
* attach a debuggger. */
fgetc(stdin);
break;
- case '-':
- if ((*argv)[0][2] == '\0') {
- (*argc)--;
- (*argv)++;
- goto OUT;
- } else if (strncmp((*argv)[0], "--gc-debug", 10) == 0) {
+ case 128:
#if DISABLE_GC_DEBUG
- Parrot_warn(interpreter, PARROT_WARNINGS_ALL_FLAG,
- "PARROT_GC_DEBUG is set but the binary was "
- "compiled with DISABLE_GC_DEBUG.");
+ Parrot_warn(interpreter, PARROT_WARNINGS_ALL_FLAG,
+ "PARROT_GC_DEBUG is set but the binary was "
+ "compiled with DISABLE_GC_DEBUG.");
#endif
- setopt(PARROT_GC_DEBUG_FLAG);
- break;
- }
- case '\0': /* bare '-' means read from stdin */
- goto OUT;
- default:
- fprintf(stderr, "parrot: Invalid flag %c used\n",
- (*argv)[0][1]);
- Parrot_exit(1);
+ setopt(PARROT_GC_DEBUG_FLAG);
+ break;
}
-
- (*argc)--;
- (*argv)++;
}
+ *argv += opt.opt_index;
+ *argc -= opt.opt_index;

OUT:

- return (*argv)[0];
+ if ((*argv)[0])
+ return (*argv)[0];
+ else {
+ usage();
+ return 0; /* This won't happen */
+ }
}

static void
@@ -149,17 +165,18 @@

fprintf(stderr,
"Usage: parrot [switches] [--] programfile [arguments]\n\
- -b Activate bounds checks\n\
- -d Activate debugging\n\
- -h Display this message\n\
- -j Activate Just-In-Time compiler\n\
- -p Activate profiling\n\
- -P Activate predereferencing\n\
- -g %s\n\
- -t Activate tracing\n\
- -v Display version information\n\
- -. Wait for a keypress (gives Windows users time to attach a debugger)\n\
- --gc-debug\n\
+ -b --bounds-checks Activate bounds checks\n\
+ -d --debugging Activate debugging\n\
+ -h --help Display this message\n\
+ -j --jit Activate Just-In-Time compiler\n\
+ -p --profiling Activate profiling\n\
+ -P --predereferencing Activate predereferencing\n\
+ -g --no-computed-goto %s\n\
+ -t --tracing Activate tracing\n\
+ -v --version Display version information\n\
+ -. --wait Wait for a keypress (gives Windows users\n\
+ time to attach a debugger)\n\
+ --gc-debug\n\
Enable garbage collection debugging mode. This may also be enabled\n\
by setting the environment variable $PARROT_GC_DEBUG to 1.\n\
\n",
Index: config/gen/makefiles/root.in
===================================================================
RCS file: /cvs/public/parrot/config/gen/makefiles/root.in,v
retrieving revision 1.74
diff -u -r1.74 root.in
--- config/gen/makefiles/root.in 27 Apr 2003 08:09:05 -0000 1.74
+++ config/gen/makefiles/root.in 28 Apr 2003 09:07:13 -0000
@@ -91,7 +91,7 @@
$(INC)/regfuncs.h $(INC)/string_funcs.h $(INC)/encoding.h \
$(INC)/chartype.h $(INC)/oplib.h $(INC)/sub.h $(INC)/unicode.h \
$(INC)/perltypes.h $(INC)/exit.h $(INC)/nci.h $(INC)/pobj.h \
- $(INC)/thread.h $(INC)/tsq.h
+ $(INC)/thread.h $(INC)/tsq.h $(INC)/longopt.h


ALL_H_FILES = $(GENERAL_H_FILES)
@@ -118,7 +118,7 @@
packout$(O) byteorder$(O) debug$(O) smallobject$(O) \
headers$(O) dod$(O) method_util$(O) exit$(O) \
misc$(O) spf_render$(O) spf_vtable$(O) datatypes$(O) fingerprint$(O) \
- nci$(O) cpu_dep$(O) ${asmfun_o} tsq$(O)
+ nci$(O) cpu_dep$(O) ${asmfun_o} tsq$(O) longopt$(O)

O_FILES = $(INTERP_O_FILES) $(IO_O_FILES) $(CLASS_O_FILES) \
$(ENCODING_O_FILES) $(CHARTYPE_O_FILES)
@@ -386,6 +386,8 @@
method_util$(O) : $(GENERAL_H_FILES)

string$(O) : $(GENERAL_H_FILES)
+
+longopt$(O) : $(GENERAL_H_FILES)

chartype$(O) : $(GENERAL_H_FILES)

Index: include/parrot/parrot.h
===================================================================
RCS file: /cvs/public/parrot/include/parrot/parrot.h,v
retrieving revision 1.61
diff -u -r1.61 parrot.h
--- include/parrot/parrot.h 15 Apr 2003 21:49:04 -0000 1.61
+++ include/parrot/parrot.h 28 Apr 2003 09:07:13 -0000
@@ -229,6 +229,7 @@
#include "parrot/nci.h"
#include "parrot/thread.h"
#include "parrot/tsq.h"
+#include "parrot/longopt.h"
#endif

/*

Steve Fink

unread,
May 10, 2003, 4:00:56 PM5/10/03
to Luke Palmer, perl6-i...@perl.org
On Apr-28, Luke Palmer wrote:
> This patch implements a new, um, subsystem which does long option
> processing. I also changed test_main.c to use it. I'll do imcc soon.

Should I infer from this patch that getopt_long() isn't standard? I
can easily believe that, but I just wanted to check before committing
this.

Garrett Rooney

unread,
May 10, 2003, 4:09:48 PM5/10/03
to Steve Fink, Luke Palmer, perl6-i...@perl.org

That's correct, getopt_long is non-standard.

-garrett

Steve Fink

unread,
May 10, 2003, 5:32:23 PM5/10/03
to Luke Palmer, perl6-i...@perl.org
On Apr-28, Luke Palmer wrote:
> This patch implements a new, um, subsystem which does long option
> processing. I also changed test_main.c to use it. I'll do imcc soon.

Thanks, applied.

> I added longopt.c, include/parrot/longopt.h, and docs/dev/longopt.dev.
> I added these to MANIFEST.detailed and root.in. I'm not sure how to
> send a patch with new files (if that's even possible), so I just
> included the new files before the patch itself.

If you're using straight diff, you can diff against /dev/null on unix.
Or you can cvs add them and then do a cvs diff -uN. The -N will handle
added and deleted files.

> I was reluctant to make it a subsystemish thing, but if more than one
> thing's going to use it, it needs a header. Is there a better place I
> could put these things? Or name these things?

I like it much better as a "subsystemish thing", if by that you mean
it gets its own .h and .c files. And the names seem fine to me.

> Oh, I'm not quite sure how to write tests for command invocations.
> Some help?

The tests would go into t/src. I'd have to do more digging to figure
out how to pass different arguments to the executables, but if you're
ok with manually constructing an argv[] array, you could do it that
way. Look at t/src/basic.t for an example.

One thing I don't understand about basic.t, and intlist.t and probably
others -- there are TODO {} things scattered around, but they don't
seem to be used ($TODO is never set to a defined value). I assume that
stuff can just be ignored; I'm guessing it's a remnant of when the
tests weren't compiling correctly under Windows. (I *think* they do
now?)

Bruce Gray

unread,
May 11, 2003, 6:40:58 PM5/11/03
to perl6-i...@perl.org, Steve Fink
On Sat, 10 May 2003 14:32:23 -0700, Steve Fink wrote:
>One thing I don't understand about basic.t, and intlist.t and probably
>others
Also:
t/src/exit.t
t/src/list.t
t/src/sprintf.t

> there are TODO {} things scattered around, but they don't
>seem to be used ($TODO is never set to a defined value). I assume that
>stuff can just be ignored; I'm guessing it's a remnant of when the
>tests weren't compiling correctly under Windows.

Correct. They were all being skipped on Win32 for want of a static
library. Mattia Barbon resolved the problem and removed the $TODO
defining statements. See:
http://bugs6.perl.org/cgi-bin/cvsmonitor/cvsmonitor.pl?cmd=viewBrowseChangeSet&module=perl_public.parrot&id=3202

>(I *think* they do now?)

Yes, all the t/src/ tests pass on Win32.

Removing the remaining TODO scaffolding in these tests might prevent
future confusion.

--
Hope this helps,
Bruce Gray

Steve Fink

unread,
May 14, 2003, 3:02:56 AM5/14/03
to Bruce Gray, perl6-i...@perl.org

Cool! Done.

0 new messages