I won't have much time to look into it in the next few days, but I can
perhaps later in the week. This is a very fast algorithm, so it'd be
great to have a working implementation.
David
Darn. I'll probably have time to look at it next week too.
This is strange:
>> a = np.random.rand(100)
>> window = 1
>> ll = roly.linkedlist.move_median(a, window)
>> dh = roly.doubleheap.move_median(a, window)
>> (ll[window:] == dh[window:]).all()
True
>>
>> window = 2
>> ll = roly.linkedlist.move_median(a, window)
>> dh = roly.doubleheap.move_median(a, window)
>> (ll[window:] == dh[window:]).all()
True
>>
>> window = 3
>> ll = roly.linkedlist.move_median(a, window)
>> dh = roly.doubleheap.move_median(a, window)
>> (ll[window:] == dh[window:]).all()
True
>>
>> window = 4
>> ll = roly.linkedlist.move_median(a, window)
>> dh = roly.doubleheap.move_median(a, window)
>> (ll[window:] == dh[window:]).all()
True
>>
>> window = 5
>> ll = roly.linkedlist.move_median(a, window)
>> dh = roly.doubleheap.move_median(a, window)
>> (ll[window:] == dh[window:]).all()
True
>>
>> window = 6
>> ll = roly.linkedlist.move_median(a, window)
>> dh = roly.doubleheap.move_median(a, window)
>> (ll[window:] == dh[window:]).all()
True
>>
>> window = 7
>> ll = roly.linkedlist.move_median(a, window)
>> dh = roly.doubleheap.move_median(a, window)
>> (ll[window:] == dh[window:]).all()
False
>>
>> (ll[window:] == dh[window:])
array([ True, True, True, True, True, True, True, True, True,
True, True, True, True, True, True, False, False, True,
True, True, True, True, True, True, True, True, True,
True, True, True, True, True, True, False, True, True,
True, True, True, True, True, True, True, True, True,
True, True, True, False, True, True, True, True, True,
True, True, True, True, True, True, True, True, True,
False, False, False, True, True, True, True, True, True,
True, True, False, False, False, False, False, True, True,
True, True, True, True, True, True, True, True, True,
True, True, False], dtype=bool)
>> np.absolute(ll[window:] - dh[window:]).max()
0.1940714915596824
Am I converting window=7 to even?
Richard,
I think I introduced a bug in your double heap moving median code when
I tried to modify the API to fit the needs of roly.
Can you spot any bugs in the code below? Does it matter if I select
from the small or big heaps? Or that I select from both when the
window is even?
win_s *
init_winstruct(int nw)
{
win_s * w;
w = malloc(sizeof *w);
if (!w) {printf("malloc failed! Bye."); exit(EXIT_FAILURE);}
w->nodes = malloc(nw * sizeof *(w->nodes));
if (!w->nodes) {printf("malloc failed! Bye."); exit(EXIT_FAILURE);}
w->small = malloc(nw * sizeof *(w->small));
if (!w->small) {printf("malloc failed! Bye."); exit(EXIT_FAILURE);}
w->index = 0;
w->nw = nw;
w->nb = nw/2;
w->ns = nw - w->nb;
w->big = w->small + w->ns;
if (nw % 2 == 1){
w->odd = 1;
} else {
w->odd = 0;
}
return w;
}
double
get_median(win_s *w)
{
if(w->odd) {
return w->small[0]->value;
} else {
return (w->small[0]->value + w->big[0]->value) / 2.0;
}
}
I added some debugging print statements. In the following example the
buffer that holds the small values is not always sorted:
>> a = np.arange(10).astype('float64')
>> window = 7
>> import roly
>> roly.doubleheap.move_median(a, window)
# This one is good
Small Heap (length=4)
3.000000 2.000000 1.000000 0.000000
Big Heap (length=3)
4.000000 5.000000 6.000000
Median: 3.000000
# Note that 4.0 should be the first value of the small heap; output is wrong
Small Heap (length=4)
3.000000 2.000000 1.000000 4.000000
Big Heap (length=3)
5.000000 7.000000 6.000000
Median: 3.000000
# This one is good
Small Heap (length=4)
5.000000 2.000000 3.000000 4.000000
Big Heap (length=3)
6.000000 7.000000 8.000000
Median: 5.000000
# This one is good
Small Heap (length=4)
6.000000 5.000000 3.000000 4.000000
Big Heap (length=3)
7.000000 9.000000 8.000000
Median: 6.000000
array([ nan, nan, nan, nan, nan, nan, 3., 3., 5., 6.])