Google Groups no longer supports new Usenet posts or subscriptions. Historical content remains viewable.
Dismiss

Would someone tell me if I'm on the wrong track?

26 views
Skip to first unread message

luser- -droog

unread,
May 10, 2011, 2:47:01 AM5/10/11
to
I'm trying to build a new memory allocation scheme for
my postscript interpreter to satisfy these requirements:
% pointer + offset + size + short tag <= 32bits
% 'garbage-collection'-ready (structure the heap for easy iteration)
% all memory is in one region for easy restore from disk

So I've written a prototype where the "pointer" is an index
into an extendable address table stored in the heap itself.
And it seems to work! But I don't trust it, you know?

I'd appreciate any comments the experts here could offer.

One apology. I'm trying to use mmap if available or malloc
as a fallback, but ifdefs, it seems, can't be made pretty.
Or, at least I have yet to discover how, unless it's just to
"put it in a file you don't need to look at". :)

569(1)01:27 AM:proto 0> make v
cc -g -Wall v.c -o v
570(1)01:27 AM:proto 0> v
put seven, got a 7
571(1)01:27 AM:proto 0> cat v.c
#define MMAP
#define _GNU_SOURCE

#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include <unistd.h>

#ifdef MMAP
#include <sys/mman.h>
#endif /* otherwise, use malloc/realloc/free */

void error(char *msg) {
fprintf(stderr, "%s\n", msg);
exit(EXIT_FAILURE);
}

unsigned pgsz /*= getpagesize()*/;

typedef struct {
unsigned char *base;
unsigned used;
unsigned max;
} mfile;

/* initialize the memory file */
void initmem(mfile *mem) {
#ifdef MMAP
mem->base = mmap(NULL, pgsz, PROT_READ|PROT_WRITE, MAP_SHARED
|MAP_ANONYMOUS,-1,0 );
if (mem->base == MAP_FAILED)
#else
mem->base = malloc(pgsz);
if (mem->base == NULL)
#endif
error("unable to initialize memory file");
mem->used = 0;
mem->max = pgsz;
}

/* destroy memory file */
void exitmem(mfile *mem) {
#ifdef MMAP
munmap(mem->base, mem->max);
#else
free(mem->base);
#endif
mem->base = NULL;
mem->used = 0;
mem->max = 0;
}

/* grow memory by sz */
void growmem(mfile *mem, unsigned sz) {
unsigned char *tmp;
if (sz < pgsz) sz = pgsz;
else sz = (sz/pgsz + 1) * pgsz;
sz += mem->max;
#ifdef MMAP
tmp = mremap(mem->base, mem->max, sz, MREMAP_MAYMOVE);
if (tmp == MAP_FAILED)
#else
tmp = realloc(mem->base, sz);
if (!tmp)
#endif
error("unable to grow memory");
mem->base = tmp;
mem->max = sz;
}

/* allocate memory, returns offset in memory file */
unsigned mfalloc(mfile *mem, unsigned sz) {
unsigned adr = mem->used;
if (sz + mem->used > mem->max) growmem(mem,sz);
mem->used += sz;
return adr;
}


#define TABSZ 1000
typedef struct {
unsigned nexttab;
unsigned nextent;
struct {
unsigned adr;
} tab[TABSZ];
} mtab;

/* allocate and initialize a new table */
unsigned initmtab(mfile *mem) {
unsigned adr;
adr = mfalloc(mem, sizeof(mtab)); /*create mtab at address 0*/
memset(mem->base + adr, 0, sizeof(mtab));
return adr;
}

/* allocate memory, returns table index */
unsigned mtalloc(mfile *mem, unsigned mtabloc, unsigned sz) {
mtab *tab = (void *)(mem->base + mtabloc);
if (tab->nextent >= TABSZ)
return mtalloc(mem, tab->nexttab, sz);
else {
unsigned ent = tab->nextent;
tab->nextent++;
tab->tab[ent].adr = mfalloc(mem, sz);
if (tab->nextent == TABSZ) {
tab->nexttab = initmtab(mem);
}
return ent;
}
}

/* fetch a value from a composite object */
void get(mfile *mem, unsigned ent, unsigned offset, unsigned sz, void
*dest) {
mtab *tab;
unsigned mtabloc = 0;
tab = (void *)(mem->base + mtabloc);
while (ent >= TABSZ) {
tab = (void *)(mem->base + mtabloc);
mtabloc = tab->nexttab;
ent -= TABSZ;
}
memcpy(dest, mem->base + tab->tab[ent].adr + offset*sz, sz);
}

/* put a value into a composite object */
void put(mfile *mem, unsigned ent, unsigned offset, unsigned sz, void
*src) {
mtab *tab;
unsigned mtabloc = 0;
tab = (void *)(mem->base + mtabloc);
while (ent >= TABSZ) {
tab = (void *)(mem->base + mtabloc);
mtabloc = tab->nexttab;
ent -= TABSZ;
}
memcpy(mem->base + tab->tab[ent].adr + offset*sz, src, sz);
}

mfile mem;

/* initialize everything */
void init(void) {
pgsz = getpagesize();
initmem(&mem);
(void)initmtab(&mem); /* init table zero */
}

/* destroy everything */
void xit(void) {
exitmem(&mem);
}


int main() {
init();
unsigned adr;
int seven = 7;
int ret;
adr = mtalloc(&mem, 0, sizeof seven);
put(&mem, adr, 0, sizeof seven, &seven);
get(&mem, adr, 0, sizeof seven, &ret);
printf("put seven, got a %d\n", ret);

xit();
return 0;
}

--
572(1)01:27 AM:proto 0> !ni
nice -n 19 firefox &
[3] 4978
573(2)01:27 AM:proto 0>

luser- -droog

unread,
May 10, 2011, 3:05:24 AM5/10/11
to
On May 10, 1:47 am, luser- -droog <mijo...@yahoo.com> wrote:
> I'm trying to build a new memory allocation scheme for
> my postscript interpreter to satisfy these requirements:
> % pointer + offset + size + short tag <= 32bits
> % 'garbage-collection'-ready (structure the heap for easy iteration)
> % all memory is in one region for easy restore from disk
>
> So I've written a prototype where the "pointer" is an index
> into an extendable address table stored in the heap itself.
> And it seems to work! But I don't trust it, you know?
[...]

A little more testing always helps.
It still looks good. I feel a little better.

581(2)02:02 AM:proto 0> tail -20 v.c


printf("put seven, got a %d\n", ret);

unsigned adr2;
adr2 = mtalloc(&mem, 0, 8*sizeof seven);
put(&mem, adr2, 6, sizeof seven, &seven);
get(&mem, adr2, 6, sizeof seven, &ret);
printf("put seven in slot 7, got a %d\n", ret);

unsigned adr3;
char str[] = "beads in buddha's necklace";
char sret[sizeof str];
adr3 = mtalloc(&mem, 0, strlen(str)+1);
put(&mem, adr3, 0, sizeof str, str);
get(&mem, adr3, 0, sizeof str, sret);
printf("stored and retrieved %s\n", sret);

xit();
return 0;
}

582(2)02:02 AM:proto 0> v


put seven, got a 7

put seven in slot 7, got a 7
stored and retrieved beads in buddha's necklace
583(2)02:02 AM:proto 0>

io_x

unread,
May 11, 2011, 2:31:13 AM5/11/11
to

"luser- -droog" <mij...@yahoo.com> ha scritto nel messaggio
news:70cf2865-5c4a-47a5...@k22g2000yqh.googlegroups.com...

what about if this mult above overflow?

> sz += mem->max;

what about if this sz += overflow?
i not understand well the remain (too much complex
for my little think)
but i think, it is better to see if sz overflow
and in input to write "if((int) sz <0) return -1;"
so that each function can fail.
never exist one function can not fail


luser- -droog

unread,
May 11, 2011, 2:46:53 AM5/11/11
to
On May 11, 1:31 am, "io_x" <a...@b.c.invalid> wrote:
> "luser- -droog" <mijo...@yahoo.com> ha scritto nel messaggionews:70cf2865-5c4a-47a5...@k22g2000yqh.googlegroups.com...

That shouldn't be a problem for my application as sizes
will be limited to 16bit quantities.

> >    sz += mem->max;
>
> what about if this sz += overflow?
> i not understand well the remain (too much complex
> for my little think)
> but i think, it is better to see if sz overflow
> and in input to write "if((int) sz <0)  return  -1;"
> so that each function can fail.
> never exist one function can not fail

Yeah, you're right. There should be a lot more checking
throughout.

luser- -droog

unread,
May 12, 2011, 2:05:45 AM5/12/11
to
On May 11, 1:31 am, "io_x" <a...@b.c.invalid> wrote:
> "luser- -droog" <mijo...@yahoo.com> ha scritto nel messaggionews:70cf2865-5c4a-47a5...@k22g2000yqh.googlegroups.com...

>
>
>
> > I'm trying to build a new memory allocation scheme for
> > my postscript interpreter to satisfy these requirements:
> > % pointer + offset + size + short tag <= 32bits
> > % 'garbage-collection'-ready (structure the heap for easy iteration)
> > % all memory is in one region for easy restore from disk
>
> > So I've written a prototype where the "pointer" is an index
> > into an extendable address table stored in the heap itself.
> > And it seems to work! But I don't trust it, you know?
>
> > I'd appreciate any comments the experts here could offer.
>
[...]

> > /* grow memory by sz */
> > void growmem(mfile *mem, unsigned sz) {
> >    unsigned char *tmp;
> >    if (sz < pgsz) sz = pgsz;
> >    else sz = (sz/pgsz + 1) * pgsz;
>
> what about if this mult above overflow?
>
> >    sz += mem->max;
>
> what about if this sz += overflow?
> i not understand well the remain (too much complex
> for my little think)
> but i think, it is better to see if sz overflow
> and in input to write "if((int) sz <0)  return  -1;"
> so that each function can fail.
> never exist one function can not fail

unsigned sz;
(int) sz < 0

Does this just check the high bit (assuming 2's complement)?

Seebs

unread,
May 12, 2011, 3:36:18 AM5/12/11
to
On 2011-05-12, luser- -droog <mij...@yahoo.com> wrote:
> unsigned sz;
> (int) sz < 0
>
> Does this just check the high bit (assuming 2's complement)?

Not necessarily.

If you're comfortable assuming 2s complement, I bet you can do better with:
sz > INT_MAX

-s
--
Copyright 2011, all wrongs reversed. Peter Seebach / usenet...@seebs.net
http://www.seebs.net/log/ <-- lawsuits, religion, and funny pictures
http://en.wikipedia.org/wiki/Fair_Game_(Scientology) <-- get educated!
I am not speaking for my employer, although they do rent some of my opinions.

pete

unread,
May 12, 2011, 7:51:10 AM5/12/11
to
Seebs wrote:
>
> On 2011-05-12, luser- -droog <mij...@yahoo.com> wrote:
> > unsigned sz;
> > (int) sz < 0
> >
> > Does this just check the high bit (assuming 2's complement)?
>
> Not necessarily.
>
> If you're comfortable assuming 2s complement,
> I bet you can do better with:
> sz > INT_MAX

I don't see anything unusual
about an unsigned int object
holding a value greater than INT_MAX.

--
pete

pete

unread,
May 12, 2011, 8:01:35 AM5/12/11
to

I don't know,
but I don't see the relevance of ((int) sz < 0).

If you want to check for wrap around after (sz += mem->max), then

if (sz - mem->max > sz) ThenItWrapped;

--
pete

Seebs

unread,
May 12, 2011, 11:57:53 AM5/12/11
to
On 2011-05-12, pete <pfi...@mindspring.com> wrote:
>> unsigned sz;
>> (int) sz < 0

>> Does this just check the high bit (assuming 2's complement)?

> I don't know,
> but I don't see the relevance of ((int) sz < 0).

The relevance was in that question there.

On a typical 2s complement system, it is quite likely that if you convert
a value greater than INT_MAX but no larger than UINT_MAX to a signed integer
type, you will end up with a negative value. Thus,
sz > INT_MAX
and
(int) sz < 0
are both fancy ways of saying something much like:

(sz & ((UNIT_MAX) & ~(UINT_MAX >> 1))) != 0

(unless my lack of morning coffee made me get that wrong...)

luser- -droog

unread,
May 12, 2011, 12:33:21 PM5/12/11
to

I think that's closer to what io_x was suggesting.
For my current purposes sz will be no more than 8*USHORTMAX (if there
is such a thing). But mem-max itself could conceivably get pretty big.
(~2 gigs, right? assuming 32-bit words).

If I approach this limit, I think I can squeeze a little more life out
of this if I make mem->max and mem->used 64bits (since they cover the
whole space), but change the adresses to be relative to the table
location (mtabloc, above) instead of mem->base.

I'm eager to start building on top of this module, but I want to get
as close to "right" as is practical.

pete

unread,
May 12, 2011, 6:04:45 PM5/12/11
to
Seebs wrote:
>
> On 2011-05-12, pete <pfi...@mindspring.com> wrote:
> >> unsigned sz;
> >> (int) sz < 0
>
> >> Does this just check the high bit (assuming 2's complement)?
>
> > I don't know,
> > but I don't see the relevance of ((int) sz < 0).
>
> The relevance was in that question there.
>
> On a typical 2s complement system,
> it is quite likely that if you convert
> a value greater than INT_MAX but no larger than UINT_MAX
> to a signed integer type,
> you will end up with a negative value. Thus,
> sz > INT_MAX
> and
> (int) sz < 0
> are both fancy ways of saying something much like:
>
> (sz & ((UNIT_MAX) & ~(UINT_MAX >> 1))) != 0
>
> (unless my lack of morning coffee made me get that wrong...)

But
why would you care about
whether or not an unsigned object
held a value which was greater than INT_MAX?

--
pete

Seebs

unread,
May 12, 2011, 6:18:07 PM5/12/11
to

Reasons might include:
* Want to check whether high bit is set
* Want to be sure it's well-defined to convert it to int

Peter Nilsson

unread,
May 13, 2011, 1:47:47 AM5/13/11
to
On May 12, 4:05 pm, luser- -droog <mijo...@yahoo.com> wrote:
> unsigned sz;
> (int) sz < 0

Conversion of an unsigned int value not in the range of int to
an int is unspecified in C90 and (to all intents and purposes)
undefined behaviour in C99.

Also note that UINT_MAX may be more than 2 * INT_MAX + 1.

Addition of two unsigned ints a and b will 'wrap' if and only
if (a > -1u - b) [i.e. mathematically a + b > UINT_MAX.]

--
Peter

luser- -droog

unread,
May 13, 2011, 2:05:38 AM5/13/11
to
On May 12, 5:04 pm, pete <pfil...@mindspring.com> wrote:
> Seebs wrote:
>

Well, I don't, really. But the only response my code got
was a cryptic message from io_x containing this bit
of c as an example. So it caught my eye and I wondered:
what the heck is this trying to do?

I think the gist of what io_x was trying to say is:
don't trust your input values in a function;
check that they are all in a meaningful range.

I think he recommended this code as a quick-and-dirty
failsafe check for the validity of the sz parameter.
Hitting the high-bit would mean attempting to allocate
more than 1 gigabyte in my delicate little suite of
functions.

But he may have picked that parameter arbitrarily.
I'd be much more worried about overflowing the mem->max
or mem->used values.

io_x

unread,
May 13, 2011, 4:21:40 AM5/13/11
to

"luser- -droog" <mij...@yahoo.com> ha scritto nel messaggio
news:a4c3ad37-8227-4a79...@n11g2000yqf.googlegroups.com...

....
----------
ok i mean above; but it is difficult to calculate the right
range, so because i'm a little lazy
so i use one standard way for index "parameter"<Unsigned_max/2
here == (int)"parameter" but not always;
the day i need all the unsigned as index i change


io_x

unread,
May 13, 2011, 4:21:48 AM5/13/11
to
---------------------
"Peter Nilsson" <ai...@acay.com.au> ha scritto nel messaggio
news:1143794b-7eeb-417a...@q12g2000prb.googlegroups.com...

--
Peter
-------------------------

you can program a set of manchines

you can to program one machine

if someone has to put one value in a register
he should know how many bits that
register has

the result of program one set of machines
instead of one machine only is code more bugger
or that begin soon difficult to understand
for humans
Buona Giornata

Ben Bacarisse

unread,
May 13, 2011, 10:34:59 PM5/13/11
to
Seebs <usenet...@seebs.net> writes:

> On 2011-05-12, luser- -droog <mij...@yahoo.com> wrote:
>> unsigned sz;
>> (int) sz < 0
>>
>> Does this just check the high bit (assuming 2's complement)?
>
> Not necessarily.
>
> If you're comfortable assuming 2s complement, I bet you can do better with:
> sz > INT_MAX

The assumption is not 2's complement as far as I can see. Your test
works in all of C's three permitted signed integer representations (how
can it not? -- it does not involve any signed integers). It fails if
unsigned int has more then one more value bit that signed int.

--
Ben.

pete

unread,
May 13, 2011, 11:19:40 PM5/13/11
to

Also if
unsigned int has less than one more value bit than signed int.

--
pete

Ben Bacarisse

unread,
May 14, 2011, 7:02:50 AM5/14/11
to
pete <pfi...@mindspring.com> writes:

Yes. It would have been simpler just to say that it fails unless
unsigned int has exactly one more value bit than signed int.

--
Ben.

Tim Rentsch

unread,
May 19, 2011, 1:21:13 AM5/19/11
to
Peter Nilsson <ai...@acay.com.au> writes:

> On May 12, 4:05 pm, luser- -droog <mijo...@yahoo.com> wrote:
>> unsigned sz;
>> (int) sz < 0
>
> Conversion of an unsigned int value not in the range of int to
> an int is unspecified in C90 and (to all intents and purposes)
> undefined behaviour in C99.

Correction: in both cases the behavior is implementation defined.
In C90 that is limited to producing an implementation-defined
value. C99 expands that just slightly to allow raising an
implementation-defined signal. This is different from UB both in
principle and in practice: in principle, because whatever occurs
must be documented; in practice, because the range of behaviors
that actually occur is (and always will be) much more well-behaved
than arbitrary undefined behavior. Invoking such behavior may not
be generally desirable, but it is not undefined behavior, not even
just to all intents and purposes.

Peter Nilsson

unread,
May 19, 2011, 2:47:56 AM5/19/11
to
Tim Rentsch <t...@alumni.caltech.edu> wrote:

> Peter Nilsson <ai...@acay.com.au> writes:
> > Conversion of an unsigned int value not in the range of int to
> > an int is unspecified in C90 and (to all intents and purposes)
> > undefined behaviour in C99.
>
> Correction:  in both cases the behavior is implementation defined.

So, unspecified but requiring the implementation to document its
choice.

> In C90 that is limited to producing an implementation-defined
> value.  C99 expands that just slightly to allow raising an
> implementation-defined signal.  This is different from UB both
> in principle

Perhaps.

> and in practice:

Well, I suppose it's possible to do...

void bye(int sig) { _Exit(0); }

int main(void)
{
for (int sig = 0; sig < INT_MAX; sig++)
signal(sig, bye);
signal(INT_MAX, bye);
...safe and sound...
}

But that's not exactly practical on a 64-bit int machine.

--
Peter

Tim Rentsch

unread,
May 19, 2011, 2:00:38 PM5/19/11
to
Peter Nilsson <ai...@acay.com.au> writes:

> Tim Rentsch <t...@alumni.caltech.edu> wrote:
>> Peter Nilsson <ai...@acay.com.au> writes:
>> > Conversion of an unsigned int value not in the range of int to
>> > an int is unspecified in C90 and (to all intents and purposes)
>> > undefined behaviour in C99.
>>
>> Correction: in both cases the behavior is implementation defined.
>
> So, unspecified but requiring the implementation to document its
> choice.

Yes, that follows from the definition of implementation-defined
behavior. It's still important to make the distinction.

>> In C90 that is limited to producing an implementation-defined
>> value. C99 expands that just slightly to allow raising an
>> implementation-defined signal. This is different from UB both
>> in principle
>
> Perhaps.
>
>> and in practice:
>
> Well, I suppose it's possible to do...
>
> void bye(int sig) { _Exit(0); }
>
> int main(void)
> {
> for (int sig = 0; sig < INT_MAX; sig++)
> signal(sig, bye);
> signal(INT_MAX, bye);
> ...safe and sound...
> }
>
> But that's not exactly practical on a 64-bit int machine.

Oh come on, I know you're smarter than that. Because
the signal in question is implementation-defined, it
must be documented, and an appropriate signal call
can be written. More to the point, such calls are
necessary only if the program is going to run on
implementations that do in fact raise a signal in
these cases, and almost none do; simply checking
the documentation for the implementations that are
to be supported will reveal which ones need to set
a signal handler, and the most likely case is that
none will.

You seem to be assuming that it only makes sense to write
code that is guaranteed to be 100% portable for any
imaginable conforming implementation. Obviously that isn't
realistically practical for a program of any significant
size -- there are bound to be some rough edges here and
there. As non-portable transgressions go, converting an
out-of-range value to a signed type is among the least
likely to be problematic.

0 new messages