vim-patch:8.2.1933: cannot sort using locale ordering

Problem:    Cannot sort using locale ordering.
Solution:   Add a flag for :sort and sort() to use the locale. (Dominique
            Pellé, closes vim/vim#7237)
55e29611d2
This commit is contained in:
Sean Dewar
2021-04-19 19:57:19 +01:00
parent 1d72b6e4cd
commit 6a0b8cbd81
5 changed files with 109 additions and 11 deletions

View File

@@ -9166,6 +9166,7 @@ static void f_sockconnect(typval_T *argvars, typval_T *rettv, FunPtr fptr)
/// struct storing information about current sort
typedef struct {
int item_compare_ic;
bool item_compare_lc;
bool item_compare_numeric;
bool item_compare_numbers;
bool item_compare_float;
@@ -9240,10 +9241,10 @@ static int item_compare(const void *s1, const void *s2, bool keep_zero)
p2 = "";
}
if (!sortinfo->item_compare_numeric) {
if (sortinfo->item_compare_ic) {
res = STRICMP(p1, p2);
if (sortinfo->item_compare_lc) {
res = strcoll(p1, p2);
} else {
res = STRCMP(p1, p2);
res = sortinfo->item_compare_ic ? STRICMP(p1, p2): STRCMP(p1, p2);
}
} else {
double n1, n2;
@@ -9378,6 +9379,7 @@ static void do_sort_uniq(typval_T *argvars, typval_T *rettv, bool sort)
}
info.item_compare_ic = false;
info.item_compare_lc = false;
info.item_compare_numeric = false;
info.item_compare_numbers = false;
info.item_compare_float = false;
@@ -9422,6 +9424,9 @@ static void do_sort_uniq(typval_T *argvars, typval_T *rettv, bool sort)
} else if (strcmp(info.item_compare_func, "i") == 0) {
info.item_compare_func = NULL;
info.item_compare_ic = true;
} else if (strcmp(info.item_compare_func, "l") == 0) {
info.item_compare_func = NULL;
info.item_compare_lc = true;
}
}
}

View File

@@ -358,6 +358,7 @@ static int linelen(int *has_tab)
static char_u *sortbuf1;
static char_u *sortbuf2;
static int sort_lc; ///< sort using locale
static int sort_ic; ///< ignore case
static int sort_nr; ///< sort on number
static int sort_rx; ///< sort on regex instead of skipping it
@@ -381,6 +382,13 @@ typedef struct {
} st_u;
} sorti_T;
static int string_compare(const void *s1, const void *s2) FUNC_ATTR_NONNULL_ALL
{
if (sort_lc) {
return strcoll((char *)s1, (char *)s2);
}
return sort_ic ? STRICMP(s1, s2) : STRCMP(s1, s2);
}
static int sort_compare(const void *s1, const void *s2)
{
@@ -424,8 +432,7 @@ static int sort_compare(const void *s1, const void *s2)
l2.st_u.line.end_col_nr - l2.st_u.line.start_col_nr + 1);
sortbuf2[l2.st_u.line.end_col_nr - l2.st_u.line.start_col_nr] = NUL;
result = sort_ic ? STRICMP(sortbuf1, sortbuf2)
: STRCMP(sortbuf1, sortbuf2);
result = string_compare(sortbuf1, sortbuf2);
}
/* If two lines have the same value, preserve the original line order. */
@@ -466,7 +473,7 @@ void ex_sort(exarg_T *eap)
regmatch.regprog = NULL;
sorti_T *nrs = xmalloc(count * sizeof(sorti_T));
sort_abort = sort_ic = sort_rx = sort_nr = sort_flt = 0;
sort_abort = sort_ic = sort_lc = sort_rx = sort_nr = sort_flt = 0;
size_t format_found = 0;
bool change_occurred = false; // Buffer contents changed.
@@ -474,6 +481,8 @@ void ex_sort(exarg_T *eap)
if (ascii_iswhite(*p)) {
} else if (*p == 'i') {
sort_ic = true;
} else if (*p == 'l') {
sort_lc = true;
} else if (*p == 'r') {
sort_rx = true;
} else if (*p == 'n') {
@@ -645,8 +654,7 @@ void ex_sort(exarg_T *eap)
s = ml_get(get_lnum);
size_t bytelen = STRLEN(s) + 1; // include EOL in bytelen
old_count += bytelen;
if (!unique || i == 0
|| (sort_ic ? STRICMP(s, sortbuf1) : STRCMP(s, sortbuf1)) != 0) {
if (!unique || i == 0 || string_compare(s, sortbuf1) != 0) {
// Copy the line into a buffer, it may become invalid in
// ml_append(). And it's needed for "unique".
STRCPY(sortbuf1, s);

View File

@@ -13,6 +13,25 @@ func Test_sort_strings()
" numbers compared as strings
call assert_equal([1, 2, 3], sort([3, 2, 1]))
call assert_equal([13, 28, 3], sort([3, 28, 13]))
call assert_equal(['A', 'O', 'P', 'a', 'o', 'p', 'Ä', 'Ô', 'ä', 'ô', 'œ', 'œ'],
\ sort(['A', 'O', 'P', 'a', 'o', 'p', 'Ä', 'Ô', 'ä', 'ô', 'œ', 'œ']))
call assert_equal(['A', 'a', 'o', 'O', 'p', 'P', 'Ä', 'Ô', 'ä', 'ô', 'œ', 'œ'],
\ sort(['A', 'a', 'o', 'O', 'œ', 'œ', 'p', 'P', 'Ä', 'ä', 'ô', 'Ô'], 'i'))
let lc = execute('language collate')
" With the following locales, the accentuated letters are ordered
" similarly to the non-accentuated letters...
if lc =~? '"\(en\|es\|de\|fr\|it\|nl\).*\.utf-\?8"'
call assert_equal(['a', 'A', 'ä', 'Ä', 'o', 'O', 'ô', 'Ô', 'œ', 'œ', 'p', 'P'],
\ sort(['A', 'a', 'o', 'O', 'œ', 'œ', 'p', 'P', 'Ä', 'ä', 'ô', 'Ô'], 'l'))
" ... whereas with a Swedish locale, the accentuated letters are ordered
" after Z.
elseif lc =~? '"sv.*utf-\?8"'
call assert_equal(['a', 'A', 'o', 'O', 'p', 'P', 'ä', 'Ä', 'œ', 'œ', 'ô', 'Ô'],
\ sort(['A', 'a', 'o', 'O', 'œ', 'œ', 'p', 'P', 'Ä', 'ä', 'ô', 'Ô'], 'l'))
endif
endfunc
func Test_sort_numeric()
@@ -1223,6 +1242,58 @@ func Test_sort_cmd()
\ },
\ ]
" With the following locales, the accentuated letters are ordered
" similarly to the non-accentuated letters...
let lc = execute('language collate')
if lc =~? '"\(en\|es\|de\|fr\|it\|nl\).*\.utf-\?8"'
let tests += [
\ {
\ 'name' : 'sort with locale',
\ 'cmd' : '%sort l',
\ 'input' : [
\ 'A',
\ 'E',
\ 'O',
\ 'À',
\ 'È',
\ 'É',
\ 'Ô',
\ 'Œ',
\ 'Z',
\ 'a',
\ 'e',
\ 'o',
\ 'à',
\ 'è',
\ 'é',
\ 'ô',
\ 'œ',
\ 'z'
\ ],
\ 'expected' : [
\ 'a',
\ 'A',
\ 'à',
\ 'À',
\ 'e',
\ 'E',
\ 'é',
\ 'É',
\ 'è',
\ 'È',
\ 'o',
\ 'O',
\ 'ô',
\ 'Ô',
\ 'œ',
\ 'Œ',
\ 'z',
\ 'Z'
\ ]
\ },
\ ]
endif
for t in tests
enew!
call append(0, t.input)