Patch 8.2.1933
Problem: Cannot sort using locale ordering.
Solution: Add a flag for :sort and sort() to use the locale. (Dominique
Pellé, closes #7237)
Files: runtime/doc/change.txt, runtime/doc/eval.txt, src/ex_cmds.c,
src/list.c, src/testdir/test_sort.vim
*** ../vim-8.2.1932/runtime/doc/change.txt 2020-06-04 18:21:56.046395485 +0200
--- runtime/doc/change.txt 2020-11-01 13:53:59.996703330 +0100
***************
*** 1799,1805 ****
found here: |sort()|, |uniq()|.
*:sor* *:sort*
! :[range]sor[t][!] [b][f][i][n][o][r][u][x] [/{pattern}/]
Sort lines in [range]. When no range is given all
lines are sorted.
--- 1801,1807 ----
found here: |sort()|, |uniq()|.
*:sor* *:sort*
! :[range]sor[t][!] [b][f][i][l][n][o][r][u][x] [/{pattern}/]
Sort lines in [range]. When no range is given all
lines are sorted.
***************
*** 1807,1812 ****
--- 1809,1822 ----
With [i] case is ignored.
+ With [l] sort uses the current locale. See
+ `language collate` to check or set the locale used
+ for ordering. For example, with "en_US.UTF8",
+ Ö will be ordered after O and before P,
+ whereas with the Swedish locale "sv_SE.UTF8",
+ it will be after Z.
+ Case is typically ignored by the locale.
+
Options [n][f][x][o][b] are mutually exclusive.
With [n] sorting is done on the first decimal number
***************
*** 1873,1880 ****
Note that using `:sort` with `:global` doesn't sort the matching lines, it's
quite useless.
! The details about sorting depend on the library function used. There is no
! guarantee that sorting obeys the current locale. You will have to try it out.
Vim does do a "stable" sort.
The sorting can be interrupted, but if you interrupt it too late in the
--- 1883,1889 ----
Note that using `:sort` with `:global` doesn't sort the matching lines, it's
quite useless.
! `:sort` does not use the current locale unless the l flag is used.
Vim does do a "stable" sort.
The sorting can be interrupted, but if you interrupt it too late in the
*** ../vim-8.2.1932/runtime/doc/eval.txt 2020-10-23 16:49:30.112311448 +0200
--- runtime/doc/eval.txt 2020-11-01 13:54:00.000703318 +0100
***************
*** 9632,9637 ****
--- 9700,9712 ----
When {func} is given and it is '1' or 'i' then case is
ignored.
+ When {func} is given and it is 'l' then the current locale
+ is used for ordering. See `language collate` to check or set
+ the locale used for ordering. For example, with "en_US.UTF8",
+ Ö will be ordered after O and before P, whereas with the
+ Swedish locale "sv_SE.UTF8", it will be after Z.
+ Case is typically ignored by the locale.
+
When {func} is given and it is 'n' then all items will be
sorted numerical (Implementation detail: This uses the
strtod() function to parse numbers, Strings, Lists, Dicts and
*** ../vim-8.2.1932/src/ex_cmds.c 2020-10-25 17:09:46.217011625 +0100
--- src/ex_cmds.c 2020-11-01 13:54:00.000703318 +0100
***************
*** 277,282 ****
--- 277,283 ----
static char_u *sortbuf1;
static char_u *sortbuf2;
+ static int sort_lc; // sort using locale
static int sort_ic; // ignore case
static int sort_nr; // sort on number
static int sort_rx; // sort on regex instead of skipping it
***************
*** 307,313 ****
} st_u;
} sorti_T;
! static int sort_compare(const void *s1, const void *s2);
static int
sort_compare(const void *s1, const void *s2)
--- 308,320 ----
} st_u;
} sorti_T;
! static int
! string_compare(const void *s1, const void *s2)
! {
! if (sort_lc)
! return strcoll((char *)s1, (char *)s2);
! return sort_ic ? STRICMP(s1, s2) : STRCMP(s1, s2);
! }
static int
sort_compare(const void *s1, const void *s2)
***************
*** 350,357 ****
l2.st_u.line.end_col_nr - l2.st_u.line.start_col_nr + 1);
sortbuf2[l2.st_u.line.end_col_nr - l2.st_u.line.start_col_nr] = 0;
! result = sort_ic ? STRICMP(sortbuf1, sortbuf2)
! : STRCMP(sortbuf1, sortbuf2);
}
// If two lines have the same value, preserve the original line order.
--- 357,363 ----
l2.st_u.line.end_col_nr - l2.st_u.line.start_col_nr + 1);
sortbuf2[l2.st_u.line.end_col_nr - l2.st_u.line.start_col_nr] = 0;
! result = string_compare(sortbuf1, sortbuf2);
}
// If two lines have the same value, preserve the original line order.
***************
*** 398,404 ****
if (nrs == NULL)
goto sortend;
! sort_abort = sort_ic = sort_rx = sort_nr = 0;
#ifdef FEAT_FLOAT
sort_flt = 0;
#endif
--- 404,410 ----
if (nrs == NULL)
goto sortend;
! sort_abort = sort_ic = sort_lc = sort_rx = sort_nr = 0;
#ifdef FEAT_FLOAT
sort_flt = 0;
#endif
***************
*** 409,414 ****
--- 415,422 ----
;
else if (*p == 'i')
sort_ic = TRUE;
+ else if (*p == 'l')
+ sort_lc = TRUE;
else if (*p == 'r')
sort_rx = TRUE;
else if (*p == 'n')
***************
*** 614,621 ****
change_occurred = TRUE;
s = ml_get(get_lnum);
! if (!unique || i == 0
! || (sort_ic ? STRICMP(s, sortbuf1) : STRCMP(s, sortbuf1)) != 0)
{
// Copy the line into a buffer, it may become invalid in
// ml_append(). And it's needed for "unique".
--- 622,628 ----
change_occurred = TRUE;
s = ml_get(get_lnum);
! if (!unique || i == 0 || string_compare(s, sortbuf1) != 0)
{
// Copy the line into a buffer, it may become invalid in
// ml_append(). And it's needed for "unique".
*** ../vim-8.2.1932/src/list.c 2020-10-15 22:29:13.566726912 +0200
--- src/list.c 2020-11-01 13:54:00.000703318 +0100
***************
*** 1516,1521 ****
--- 1516,1522 ----
typedef struct
{
int item_compare_ic;
+ int item_compare_lc;
int item_compare_numeric;
int item_compare_numbers;
#ifdef FEAT_FLOAT
***************
*** 1594,1603 ****
p2 = (char_u *)"";
if (!sortinfo->item_compare_numeric)
{
! if (sortinfo->item_compare_ic)
! res = STRICMP(p1, p2);
else
! res = STRCMP(p1, p2);
}
else
{
--- 1595,1604 ----
p2 = (char_u *)"";
if (!sortinfo->item_compare_numeric)
{
! if (sortinfo->item_compare_lc)
! res = strcoll((char *)p1, (char *)p2);
else
! res = sortinfo->item_compare_ic ? STRICMP(p1, p2): STRCMP(p1, p2);
}
else
{
***************
*** 1706,1711 ****
--- 1707,1713 ----
goto theend; // short list sorts pretty quickly
info.item_compare_ic = FALSE;
+ info.item_compare_lc = FALSE;
info.item_compare_numeric = FALSE;
info.item_compare_numbers = FALSE;
#ifdef FEAT_FLOAT
***************
*** 1773,1778 ****
--- 1775,1785 ----
info.item_compare_func = NULL;
info.item_compare_ic = TRUE;
}
+ else if (STRCMP(info.item_compare_func, "l") == 0)
+ {
+ info.item_compare_func = NULL;
+ info.item_compare_lc = TRUE;
+ }
}
}
*** ../vim-8.2.1932/src/testdir/test_sort.vim 2020-09-23 22:38:01.507927503 +0200
--- src/testdir/test_sort.vim 2020-11-01 13:54:00.000703318 +0100
***************
*** 15,20 ****
--- 15,39 ----
" numbers compared as strings
call assert_equal([1, 2, 3], sort([3, 2, 1]))
call assert_equal([13, 28, 3], sort([3, 28, 13]))
+
+ call assert_equal(['A', 'O', 'P', 'a', 'o', 'p', 'Ä', 'Ô', 'ä', 'ô', 'œ', 'œ'],
+ \ sort(['A', 'O', 'P', 'a', 'o', 'p', 'Ä', 'Ô', 'ä', 'ô', 'œ', 'œ']))
+
+ call assert_equal(['A', 'a', 'o', 'O', 'p', 'P', 'Ä', 'Ô', 'ä', 'ô', 'œ', 'œ'],
+ \ sort(['A', 'a', 'o', 'O', 'œ', 'œ', 'p', 'P', 'Ä', 'ä', 'ô', 'Ô'], 'i'))
+
+ let lc = execute('language collate')
+ " With the following locales, the accentuated letters are ordered
+ " similarly to the non-accentuated letters...
+ if lc =~? '"\(en\|es\|de\|fr\|it\|nl\).*\.utf-\?8"'
+ call assert_equal(['a', 'A', 'ä', 'Ä', 'o', 'O', 'ô', 'Ô', 'œ', 'œ', 'p', 'P'],
+ \ sort(['A', 'a', 'o', 'O', 'œ', 'œ', 'p', 'P', 'Ä', 'ä', 'ô', 'Ô'], 'l'))
+ " ... whereas with a Swedish locale, the accentuated letters are ordered
+ " after Z.
+ elseif lc =~? '"sv.*utf-\?8"'
+ call assert_equal(['a', 'A', 'o', 'O', 'p', 'P', 'ä', 'Ä', 'œ', 'œ', 'ô', 'Ô'],
+ \ sort(['A', 'a', 'o', 'O', 'œ', 'œ', 'p', 'P', 'Ä', 'ä', 'ô', 'Ô'], 'l'))
+ endif
endfunc
func Test_sort_numeric()
***************
*** 1204,1209 ****
--- 1223,1279 ----
\ },
\ ]
+ " With the following locales, the accentuated letters are ordered
+ " similarly to the non-accentuated letters...
+ let lc = execute('language collate')
+ if lc =~? '"\(en\|es\|de\|fr\|it\|nl\).*\.utf-\?8"'
+ let tests += [
+ \ {
+ \ 'name' : 'sort with locale',
+ \ 'cmd' : '%sort l',
+ \ 'input' : [
+ \ 'A',
+ \ 'E',
+ \ 'O',
+ \ 'À',
+ \ 'È',
+ \ 'É',
+ \ 'Ô',
+ \ 'Œ',
+ \ 'Z',
+ \ 'a',
+ \ 'e',
+ \ 'o',
+ \ 'à',
+ \ 'è',
+ \ 'é',
+ \ 'ô',
+ \ 'œ',
+ \ 'z'
+ \ ],
+ \ 'expected' : [
+ \ 'a',
+ \ 'A',
+ \ 'à',
+ \ 'À',
+ \ 'e',
+ \ 'E',
+ \ 'é',
+ \ 'É',
+ \ 'è',
+ \ 'È',
+ \ 'o',
+ \ 'O',
+ \ 'ô',
+ \ 'Ô',
+ \ 'œ',
+ \ 'Œ',
+ \ 'z',
+ \ 'Z'
+ \ ]
+ \ },
+ \ ]
+ endif
if has('float')
let tests += [
\ {
*** ../vim-8.2.1932/src/version.c 2020-11-01 13:33:44.496700978 +0100
--- src/version.c 2020-11-01 13:55:51.496337061 +0100
***************
*** 752,753 ****
--- 752,755 ----
{ /* Add new patch number below this line */
+ /**/
+ 1933,
/**/
--
hundred-and-one symptoms of being an internet addict:
171. You invent another person and chat with yourself in empty chat rooms.
/// Bram Moolenaar -- Br...@Moolenaar.net --
http://www.Moolenaar.net \\\
/// sponsor Vim, vote for features --
http://www.Vim.org/sponsor/ \\\
\\\ an exciting new programming language --
http://www.Zimbu.org ///
\\\ help me help AIDS victims --
http://ICCF-Holland.org ///