Patch 7.4.351

113 views
Skip to first unread message

Bram Moolenaar

unread,
Jul 2, 2014, 1:06:38 PM7/2/14
to vim...@googlegroups.com

Patch 7.4.351
Problem: sort() is not stable.
Solution: When the items are identical, compare the pointers.
Files: src/eval.c, src/testdir/test55.in, src/testdir/test55.ok


*** ../vim-7.4.350/src/eval.c 2014-06-25 17:31:04.942737863 +0200
--- src/eval.c 2014-07-02 18:52:19.102313288 +0200
***************
*** 17334,17339 ****
--- 17334,17340 ----
static char_u *item_compare_func;
static dict_T *item_compare_selfdict;
static int item_compare_func_err;
+ static int item_compare_keep_zero;
static void do_sort_uniq __ARGS((typval_T *argvars, typval_T *rettv, int sort));
#define ITEM_COMPARE_FAIL 999

***************
*** 17374,17379 ****
--- 17375,17386 ----
n2 = strtod((char *)p2, (char **)&p2);
res = n1 == n2 ? 0 : n1 > n2 ? 1 : -1;
}
+
+ /* When the result would be zero, compare the pointers themselves. Makes
+ * the sort stable. */
+ if (res == 0 && !item_compare_keep_zero)
+ res = s1 > s2 ? 1 : -1;
+
vim_free(tofree1);
vim_free(tofree2);
return res;
***************
*** 17396,17402 ****
if (item_compare_func_err)
return 0;

! /* copy the values. This is needed to be able to set v_lock to VAR_FIXED
* in the copy without changing the original list items. */
copy_tv(&(*(listitem_T **)s1)->li_tv, &argv[0]);
copy_tv(&(*(listitem_T **)s2)->li_tv, &argv[1]);
--- 17403,17409 ----
if (item_compare_func_err)
return 0;

! /* Copy the values. This is needed to be able to set v_lock to VAR_FIXED
* in the copy without changing the original list items. */
copy_tv(&(*(listitem_T **)s1)->li_tv, &argv[0]);
copy_tv(&(*(listitem_T **)s2)->li_tv, &argv[1]);
***************
*** 17415,17420 ****
--- 17422,17433 ----
if (item_compare_func_err)
res = ITEM_COMPARE_FAIL; /* return value has wrong type */
clear_tv(&rettv);
+
+ /* When the result would be zero, compare the pointers themselves. Makes
+ * the sort stable. */
+ if (res == 0 && !item_compare_keep_zero)
+ res = s1 > s2 ? 1 : -1;
+
return res;
}

***************
*** 17509,17514 ****
--- 17522,17528 ----
ptrs[i++] = li;

item_compare_func_err = FALSE;
+ item_compare_keep_zero = FALSE;
/* test the compare function */
if (item_compare_func != NULL
&& item_compare2((void *)&ptrs[0], (void *)&ptrs[1])
***************
*** 17536,17541 ****
--- 17550,17556 ----

/* f_uniq(): ptrs will be a stack of items to remove */
item_compare_func_err = FALSE;
+ item_compare_keep_zero = TRUE;
item_compare_func_ptr = item_compare_func
? item_compare2 : item_compare;

*** ../vim-7.4.350/src/testdir/test55.in 2014-06-26 22:33:47.850693627 +0200
--- src/testdir/test55.in 2014-07-02 19:00:09.238320492 +0200
***************
*** 332,340 ****
:$put =string(reverse(sort(l)))
:$put =string(sort(reverse(sort(l))))
:$put =string(uniq(sort(l)))
! :let l=[7, 9, 18, 12, 22, 10.0e-16, -1, 0xff, 0.22, 'foo']
:$put =string(sort(copy(l), 'n'))
! :let l=[7, 9, 18, 12, 22, 10.0e-16, -1, 0xff, 0, -0, 0.22, 'foo', 'FOOBAR',{}, []]
:$put =string(sort(copy(l), 1))
:$put =string(sort(copy(l), 'i'))
:$put =string(sort(copy(l)))
--- 332,340 ----
:$put =string(reverse(sort(l)))
:$put =string(sort(reverse(sort(l))))
:$put =string(uniq(sort(l)))
! :let l=[7, 9, 'one', 18, 12, 22, 'two', 10.0e-16, -1, 'three', 0xff, 0.22, 'four']
:$put =string(sort(copy(l), 'n'))
! :let l=[7, 9, 18, 12, 22, 10.0e-16, -1, 0xff, 0, -0, 0.22, 'bar', 'BAR', 'Bar', 'Foo', 'FOO', 'foo', 'FOOBAR', {}, []]
:$put =string(sort(copy(l), 1))
:$put =string(sort(copy(l), 'i'))
:$put =string(sort(copy(l)))
*** ../vim-7.4.350/src/testdir/test55.ok 2014-06-26 22:33:47.850693627 +0200
--- src/testdir/test55.ok 2014-07-02 19:00:57.078321225 +0200
***************
*** 101,110 ****
[[0, 1, 2], [0, 1, 2], 4, 2, 2, 1.5, 'xaaa', 'x8', 'foo6', 'foo', 'foo', 'A11', '-0']
['-0', 'A11', 'foo', 'foo', 'foo6', 'x8', 'xaaa', 1.5, 2, 2, 4, [0, 1, 2], [0, 1, 2]]
['-0', 'A11', 'foo', 'foo6', 'x8', 'xaaa', 1.5, 2, 4, [0, 1, 2]]
! [-1, 'foo', 1.0e-15, 0.22, 7, 9, 12, 18, 22, 255]
! ['foo', 'FOOBAR', -1, 0, 0, 0.22, 1.0e-15, 12, 18, 22, 255, 7, 9, [], {}]
! ['foo', 'FOOBAR', -1, 0, 0, 0.22, 1.0e-15, 12, 18, 22, 255, 7, 9, [], {}]
! ['FOOBAR', 'foo', -1, 0, 0, 0.22, 1.0e-15, 12, 18, 22, 255, 7, 9, [], {}]
['aa', 'bb']
['aa', 'bb']
['', 'aa', 'bb', '']
--- 101,110 ----
[[0, 1, 2], [0, 1, 2], 4, 2, 2, 1.5, 'xaaa', 'x8', 'foo6', 'foo', 'foo', 'A11', '-0']
['-0', 'A11', 'foo', 'foo', 'foo6', 'x8', 'xaaa', 1.5, 2, 2, 4, [0, 1, 2], [0, 1, 2]]
['-0', 'A11', 'foo', 'foo6', 'x8', 'xaaa', 1.5, 2, 4, [0, 1, 2]]
! [-1, 'one', 'two', 'three', 'four', 1.0e-15, 0.22, 7, 9, 12, 18, 22, 255]
! ['bar', 'BAR', 'Bar', 'Foo', 'FOO', 'foo', 'FOOBAR', -1, 0, 0, 0.22, 1.0e-15, 12, 18, 22, 255, 7, 9, [], {}]
! ['bar', 'BAR', 'Bar', 'Foo', 'FOO', 'foo', 'FOOBAR', -1, 0, 0, 0.22, 1.0e-15, 12, 18, 22, 255, 7, 9, [], {}]
! ['BAR', 'Bar', 'FOO', 'FOOBAR', 'Foo', 'bar', 'foo', -1, 0, 0, 0.22, 1.0e-15, 12, 18, 22, 255, 7, 9, [], {}]
['aa', 'bb']
['aa', 'bb']
['', 'aa', 'bb', '']
*** ../vim-7.4.350/src/version.c 2014-07-02 18:27:44.662290695 +0200
--- src/version.c 2014-07-02 18:46:38.230308065 +0200
***************
*** 736,737 ****
--- 736,739 ----
{ /* Add new patch number below this line */
+ /**/
+ 351,
/**/

--
The early bird gets the worm. If you want something else for
breakfast, get up later.

/// Bram Moolenaar -- Br...@Moolenaar.net -- http://www.Moolenaar.net \\\
/// sponsor Vim, vote for features -- http://www.Vim.org/sponsor/ \\\
\\\ an exciting new programming language -- http://www.Zimbu.org ///
\\\ help me help AIDS victims -- http://ICCF-Holland.org ///

Jun T.

unread,
Jul 3, 2014, 3:39:30 AM7/3/14
to vim...@googlegroups.com
On 2014/07/03, at 2:06, Bram Moolenaar <Br...@Moolenaar.net> wrote:

> + static int item_compare_keep_zero;

This variable is set to FALSE for the sort() function, and only set to TRUE for
uniq(). Is this intentional?

Even if I set this variable to TRUE for sort(), the sort is still not stable
and test55 fails on my Mac:

--- test55.ok 2014-07-03 12:22:11.000000000 +0900
+++ test55.failed 2014-07-03 14:56:53.000000000 +0900
@@ -101,9 +101,9 @@
[[0, 1, 2], [0, 1, 2], 4, 2, 2, 1.5, 'xaaa', 'x8', 'foo6', 'foo', 'foo', 'A11', '-0']
['-0', 'A11', 'foo', 'foo', 'foo6', 'x8', 'xaaa', 1.5, 2, 2, 4, [0, 1, 2], [0, 1, 2]]
['-0', 'A11', 'foo', 'foo6', 'x8', 'xaaa', 1.5, 2, 4, [0, 1, 2]]
-[-1, 'one', 'two', 'three', 'four', 1.0e-15, 0.22, 7, 9, 12, 18, 22, 255]
-['bar', 'BAR', 'Bar', 'Foo', 'FOO', 'foo', 'FOOBAR', -1, 0, 0, 0.22, 1.0e-15, 12, 18, 22, 255, 7, 9, [], {}]
-['bar', 'BAR', 'Bar', 'Foo', 'FOO', 'foo', 'FOOBAR', -1, 0, 0, 0.22, 1.0e-15, 12, 18, 22, 255, 7, 9, [], {}]
+[-1, 'four', 'two', 'three', 'one', 1.0e-15, 0.22, 7, 9, 12, 18, 22, 255]
+['bar', 'Bar', 'BAR', 'FOO', 'foo', 'Foo', 'FOOBAR', -1, 0, 0, 0.22, 1.0e-15, 12, 18, 22, 255, 7, 9, [], {}]
+['bar', 'Bar', 'BAR', 'FOO', 'foo', 'Foo', 'FOOBAR', -1, 0, 0, 0.22, 1.0e-15, 12, 18, 22, 255, 7, 9, [], {}]
['BAR', 'Bar', 'FOO', 'FOOBAR', 'Foo', 'bar', 'foo', -1, 0, 0, 0.22, 1.0e-15, 12, 18, 22, 255, 7, 9, [], {}]
['aa', 'bb']
['aa', 'bb']


> + /* When the result would be zero, compare the pointers themselves. Makes
> + * the sort stable. */
> + if (res == 0 && !item_compare_keep_zero)
> + res = s1 > s2 ? 1 : -1;
> +

This doesn't work, it seems.
During the sort process the qsort() function swaps the data in the array ptrs[].
Comparing s1 and s2 is to compare the current position in the array,
which may have been already modified (swapped) by qsort().

I think we need to save the original position along with the data.
See the patch in my previous post (26 June, Re: Patch 7.4.341, the 4th post in
https://groups.google.com/forum/#!topic/vim_dev/RUByys6B3Yk). In this patch,
the original position is saved in the member 'i' of the struct sdata_T.

I think my patch works, but I guess it will make the sort somewhat slower,
because there are now two function calls per comparison. If this is a problem,
then we could modify item_compare() itself instead of using a wrapper function
item_compare_stable().

PS.
What is the best way of referring to a previous post in this mailing list?


Ben Fritz

unread,
Jul 3, 2014, 9:03:35 AM7/3/14
to vim...@googlegroups.com, takim...@kba.biglobe.ne.jp
On Thursday, July 3, 2014 2:39:30 AM UTC-5, Jun T. wrote:
>
> PS.
>
> What is the best way of referring to a previous post in this mailing list?

Probably, finding it on the Google Groups interface, and grabbing the link.

The drop-down menu next to the "reply" button on every list post has a "link" option, for example the link to the post I am responding to now is https://groups.google.com/d/msg/vim_dev/nLGEPTMo1YU/59T7WyTcavEJ

Bram Moolenaar

unread,
Jul 3, 2014, 4:54:29 PM7/3/14
to Jun T., vim...@googlegroups.com

Jun T wrote:

> On 2014/07/03, at 2:06, Bram Moolenaar <Br...@Moolenaar.net> wrote:
>
> > + static int item_compare_keep_zero;
>
> This variable is set to FALSE for the sort() function, and only set to
> TRUE for uniq(). Is this intentional?

Yes, uniq() needs the zero to decide elements are equal.

> Even if I set this variable to TRUE for sort(), the sort is still not stable
> and test55 fails on my Mac:

Setting it to TRUE has the opposite effect.

> > + /* When the result would be zero, compare the pointers
> > themselves. Makes
> > + * the sort stable. */
> > + if (res == 0 && !item_compare_keep_zero)
> > + res = s1 > s2 ? 1 : -1;
> > +
>
> This doesn't work, it seems. During the sort process the qsort()
> function swaps the data in the array ptrs[].
> Comparing s1 and s2 is to compare the current position in the array,
> which may have been already modified (swapped) by qsort().

Right, qsort may move pointers over a larger distance. It did work for
the tests I did, but I suppose that's because they start off being next
to each other and all undergo the same "large move". It will fail when
starting in another order.

> I think we need to save the original position along with the data.
> See the patch in my previous post (26 June, Re: Patch 7.4.341, the 4th post in
> https://groups.google.com/forum/#!topic/vim_dev/RUByys6B3Yk). In this patch,
> the original position is saved in the member 'i' of the struct sdata_T.
>
> I think my patch works, but I guess it will make the sort somewhat slower,
> because there are now two function calls per comparison. If this is a problem,
> then we could modify item_compare() itself instead of using a wrapper function
> item_compare_stable().

Hmm, we need to fill an array with pointers, might as well add an int
right after the pointer. Effectively this means sorting this struct:

{
void *ptr; /* the list element to be sorted */
int idx; /* original position in the list */
}

The array will take more space, qsort has larger elements to move
around, but otherwise the cost is quite low, we don't need another
function call.

> PS.
> What is the best way of referring to a previous post in this mailing list?

A URL to groups.google.com probably works best.

--
If your life is a hard drive,
Christ can be your backup.

Jun T.

unread,
Jul 4, 2014, 12:47:30 AM7/4/14
to vim...@googlegroups.com
On 2014/07/04, at 5:54, Bram Moolenaar <Br...@Moolenaar.net> wrote:
> Hmm, we need to fill an array with pointers, might as well add an int
> right after the pointer. Effectively this means sorting this struct:
>
> {
> void *ptr; /* the list element to be sorted */
> int idx; /* original position in the list */
> }

Yes, that is exactly what I did in my patch (struct sdata_T).

Jun T.

unread,
Jul 8, 2014, 12:50:26 PM7/8/14
to vim...@googlegroups.com
I did two speed tests of sort() with and without my patch
on my Linux box:

original patched ratio
test1 78.81 sec 81.06 sec 1.03
test2 5.87 sec 6.08 sec 1.04

where 'original' is the revision adc4a84f72eb, and
'patched' is with my patch for eval.c.

It seems the overhead by the wrapper function
item_compare_stable() is just a few percent.

The two tests are:

"----- test1.vim -----
let l = []
py <<END
import vim
import random
import time
rmax = 10000000
rnum = 10000000
l = vim.bindeval('l')
l += [ random.randint(0,rmax) for i in range(rnum) ]
t0 = time.time()
END
call sort(l)
py print time.time() - t0

"----- test2.vim -----
let l = []
py <<END
import vim
import time
l = vim.bindeval('l')
for dat in open('big.txt'):
l += dat.strip().split()
t0 = time.time()
END
call sort(l)
py print time.time() - t0

Here, big.txt is created by
$ cat vim/runtime/doc/*.txt > big.txt
big.txt contains 843083 words, and the list l has the same
number of string elements. Test result may change if I use
other text files.

For test1, I did a few more test with different values for rmax,
and for test2 I also tried sort(l,'i'); the results were similar
(a few percent slowdown) on the Linux box.

On my Mac, on the other hand, the results were somewhat different.
For test1, the ratio is about 1.05 for rmax = 10000000, but
the ratio becomes more than 2.0 for rmax = 100 (i.e., lots of
equivalent data).
For test2, the ratio is about 1.7, i.e., the one with my patch
is about 70% slower.
I believe these slowdown is not due to the overhead of
item_compare_stable() but due to that, in the case of qsort() on
Mac (or BSD), the number of swap is larger for the 'stable' sort
case. I think this can't be avoided by tuning the code in eval.c.



Reply all
Reply to author
Forward
0 new messages