Tuple-translation in C: my mistake or memory corruption?

118 views
Skip to first unread message

Tim Holy

unread,
May 3, 2015, 2:30:55 PM5/3/15
to juli...@googlegroups.com
Did I screw up the code below? This is in service of #10911, so I can compare
old and new versions of algorithms with code like:

int julia_subtype_le(args...)
{
result_old = jl_subtype_le_old(args...)
result_new = jl_subtype_le_new(translate(args...)...); // in julia-ish C
if (result_old != result_new)
complain_loudly();
end
return result_old;
}

where "translate" calls the code below.

I _think_ it's a memory corruption problem rather than a mistake, but I'm
really hoping it's a dumb mistake on my part and that someone publicly
humiliates me. Because it will save me a lot of time.

(This is approach number 4 for #10911; I've been working on this for days on
end, and were I twenty years younger I would have long ago burned my laptop to
a crisp and devoted the rest of my life to knitting. It's amazing how many
ways there are to break julia if you don't get subtype & intersection
relationships exactly correct. My admiration for Jeff knows no bounds.)

Best,
--Tim

// On-the-fly translation of NTuple{N,T} into Tuple{Vararg{T,N}}
// These are outrageously defensive in terms of GC behavior, because
// (1) who knows where these will be called from, and (2) I got more
// and more desperate to figure out the segfault.
static jl_datatype_t *ntuple_translate_tuple(jl_datatype_t *tt);
static jl_value_t *ntuple_translate(jl_value_t *v)
{
jl_value_t *tva = NULL, *result = NULL;
JL_GC_PUSH3(&v, &tva, &result);
if (jl_is_tuple_type(v)) {
result = (jl_value_t*)ntuple_translate_tuple((jl_datatype_t*)v);
JL_GC_POP();
return result;
}
if (!jl_is_ntuple_type(v)) {
JL_GC_POP();
return v;
}
tva = (jl_value_t*) jl_wrap_vararg(jl_tparam1(v), jl_tparam0(v));
result = (jl_value_t*)jl_tupletype_fill(1, tva);
JL_GC_POP();
return result;
}

static jl_svec_t *ntuple_translate_data(jl_value_t **data, int n)
{
int i;
jl_value_t **protected;
jl_value_t *translated = NULL;
jl_svec_t *snew = NULL;
JL_GC_PUSHARGS(protected, 2*n+2);
for (i = 0; i < n; i++) {
protected[i] = data[i];
protected[i+n] = NULL;
}
protected[2*n] = translated;
protected[2*n+1] = (jl_value_t*)snew;
//snew = jl_alloc_svec_uninit(n);
snew = jl_alloc_svec(n);
assert(jl_is_svec(snew));
for (i = 0; i < n; i++)
protected[n+i] = jl_svecref(snew, i);
for (i = 0; i < n; i++) {
assert(jl_is_svec(snew));
translated = ntuple_translate(data[i]);
assert(jl_is_svec(snew)); // <----Failure occurs here
jl_svecset(snew, i, translated);
}
JL_GC_POP();
return snew;
}

static jl_svec_t *ntuple_translate_svec(jl_svec_t *sv)
{
jl_svec_t *result=NULL;
JL_GC_PUSH2(&sv, &result);
result = ntuple_translate_data(jl_svec_data(sv), jl_svec_len(sv));
JL_GC_POP();
return result;
}

static jl_datatype_t *ntuple_translate_tuple(jl_datatype_t *tt)
{
assert(jl_is_tuple_type(tt));
jl_svec_t *snew = NULL;
JL_GC_PUSH2(&tt, &snew);
snew = ntuple_translate_svec(tt->parameters);
jl_datatype_t *result = jl_apply_tuple_type(snew);
JL_GC_POP();
return result;
}


Backtrace:

Program received signal SIGABRT, Aborted.
0x00007ffff6733cc9 in __GI_raise (sig=sig@entry=6) at
../nptl/sysdeps/unix/sysv/linux/raise.c:56
56 ../nptl/sysdeps/unix/sysv/linux/raise.c: No such file or directory.
(gdb) bt
#0 0x00007ffff6733cc9 in __GI_raise (sig=sig@entry=6) at
../nptl/sysdeps/unix/sysv/linux/raise.c:56
#1 0x00007ffff67370d8 in __GI_abort () at abort.c:89
#2 0x00007ffff672cb86 in __assert_fail_base (fmt=0x7ffff687d830 "%s%s%s:%u:
%s%sAssertion `%s' failed.\n%n",
assertion=assertion@entry=0x7ffff7795a08 "(((jl_value_t*)(((jl_taggedvalue_t
*) ((char *)((snew)) - __builtin_offsetof (jl_taggedvalue_t, value)))-
>type_bits&~(size_t)3))==(jl_value_t*)(jl_simplevector_type))",
file=file@entry=0x7ffff77959e3 "jltypes.c",
line=line@entry=321, function=function@entry=0x7ffff77964c0
<__PRETTY_FUNCTION__.10413> "ntuple_translate_data") at assert.c:92
#3 0x00007ffff672cc32 in __GI___assert_fail (
assertion=0x7ffff7795a08 "(((jl_value_t*)(((jl_taggedvalue_t *) ((char *)
((snew)) - __builtin_offsetof (jl_taggedvalue_t, value)))-
>type_bits&~(size_t)3))==(jl_value_t*)(jl_simplevector_type))",
file=0x7ffff77959e3 "jltypes.c", line=321,
function=0x7ffff77964c0 <__PRETTY_FUNCTION__.10413> "ntuple_translate_data")
at assert.c:101
#4 0x00007ffff6da695c in ntuple_translate_data (data=0x7ffdf40a7698, n=2) at
jltypes.c:321
#5 0x00007ffff6daf2b6 in jl_tuple_subtype_ (child=0x7ffdf40a7698, cl=2,
pdt=0x7ffdf5cde0b0, ta=0, invariant=0) at jltypes.c:2480
#6 0x00007ffff6dafabf in jl_subtype_le (a=0x7ffdf40bdab0, b=0x7ffdf5cde0b0, ta=0,
invariant=0) at jltypes.c:2587
[truncated; line numbers definitely don't correspond to what's currently in
master]

Jameson Nash

unread,
May 3, 2015, 3:48:37 PM5/3/15
to juli...@googlegroups.com
I can't promise miracles, but there is a GC mistake here. i've written below what I think you meant to write in that part of the function:

static jl_svec_t *ntuple_translate_data(jl_value_t **data, int n)
{
    int i;
    jl_value_t **protected;
    jl_value_t **translated;
    jl_svec_t **snew;

    JL_GC_PUSHARGS(protected, 2*n+2);
    for (i = 0; i < n; i++) {
        protected[i] = data[i];
        protected[i+n] = NULL;
    }
    translated = &protected[2*n];
    snew = &protected[2*n+1];
...
    JL_GC_POP();
    return *snew;
}

not an error, but this function doesn't need a GC frame:
static jl_svec_t *ntuple_translate_svec(jl_svec_t *sv)
{
    return ntuple_translate_data(jl_svec_data(sv), jl_svec_len(sv));
}
because (a) the argument `sv` should have been rooted by the caller and (b) there is no gc-collection point between return being assigned and the JL_GC_POP call on the next line

Tim Holy

unread,
May 3, 2015, 5:16:22 PM5/3/15
to juli...@googlegroups.com
Thanks a ton, Jameson---you were totally right about the error. For the
record, I think this:

static jl_svec_t *ntuple_translate_data(jl_value_t **data, int n)
{
int i;
jl_value_t **protected;
JL_GC_PUSHARGS(protected, 2*n+1);
for (i = 0; i < n; i++) {
protected[i] = data[i];
protected[i+n] = NULL; // just in case
}
protected[2*n] = NULL;
jl_svec_t *snew = jl_alloc_svec(n);
protected[2*n] = (jl_value_t*) snew;
for (i = 0; i < n; i++)
protected[i+n] = jl_svecref(snew, i);
assert(jl_is_svec(snew));
for (i = 0; i < n; i++) {
assert(jl_is_svec(snew));
jl_value_t *translated = ntuple_translate(data[i]);
assert(jl_is_svec(snew));
jl_svecset(snew, i, translated);
}
JL_GC_POP();
return snew;
}

also works (mostly) and is a little simpler. The good news is I can now run
all the tests if I run normally. The bad news is that I'm still getting a
segfault only when I run under gdb. (Both are with julia-debug, too. Strange.)
I'm trying valgrind and rr. But this feels like progress, so many thanks!

--Tim
Reply all
Reply to author
Forward
0 new messages