vim-patch:8.1.1122: char2nr() does not handle composing characters

Problem:    char2nr() does not handle composing characters.
Solution:   Add str2list() and list2str(). (Ozaki Kiichi, closes vim/vim#4190)
9d40128afd

'utf8' optional param is noop unlike Vim.
This commit is contained in:
Jan Edmund Lazo
2020-02-20 22:53:38 -05:00
parent 13b6f7a806
commit 7ed4837298
5 changed files with 119 additions and 2 deletions

View File

@@ -214,6 +214,7 @@ return {
line={args=1},
line2byte={args=1},
lispindent={args=1},
list2str={args={1, 2}},
localtime={},
log={args=1, func="float_op_wrapper", data="&log"},
log10={args=1, func="float_op_wrapper", data="&log10"},
@@ -321,6 +322,7 @@ return {
sqrt={args=1, func="float_op_wrapper", data="&sqrt"},
stdpath={args=1},
str2float={args=1},
str2list={args={1, 2}},
str2nr={args={1, 2}},
strcharpart={args={2, 3}},
strchars={args={1,2}},

View File

@@ -5246,6 +5246,36 @@ static void f_lispindent(typval_T *argvars, typval_T *rettv, FunPtr fptr)
}
}
// "list2str()" function
static void f_list2str(typval_T *argvars, typval_T *rettv, FunPtr fptr)
{
garray_T ga;
rettv->v_type = VAR_STRING;
rettv->vval.v_string = NULL;
if (argvars[0].v_type != VAR_LIST) {
EMSG(_(e_invarg));
return;
}
list_T *const l = argvars[0].vval.v_list;
if (l == NULL) {
return; // empty list results in empty string
}
ga_init(&ga, 1, 80);
char_u buf[MB_MAXBYTES + 1];
TV_LIST_ITER_CONST(l, li, {
buf[utf_char2bytes(tv_get_number(TV_LIST_ITEM_TV(li)), buf)] = NUL;
ga_concat(&ga, buf);
});
ga_append(&ga, NUL);
rettv->v_type = VAR_STRING;
rettv->vval.v_string = ga.ga_data;
}
/*
* "localtime()" function
*/
@@ -9353,6 +9383,17 @@ static void f_str2float(typval_T *argvars, typval_T *rettv, FunPtr fptr)
rettv->v_type = VAR_FLOAT;
}
// "str2list()" function
static void f_str2list(typval_T *argvars, typval_T *rettv, FunPtr fptr)
{
tv_list_alloc_ret(rettv, kListLenUnknown);
const char_u *p = (const char_u *)tv_get_string(&argvars[0]);
for (; *p != NUL; p += utf_ptr2len(p)) {
tv_list_append_number(rettv->vval.v_list, utf_ptr2char(p));
}
}
// "str2nr()" function
static void f_str2nr(typval_T *argvars, typval_T *rettv, FunPtr fptr)
{

View File

@@ -60,3 +60,46 @@ func Test_getvcol()
call assert_equal(2, virtcol("'["))
call assert_equal(2, virtcol("']"))
endfunc
func Test_list2str_str2list_utf8()
" One Unicode codepoint
let s = "\u3042\u3044"
let l = [0x3042, 0x3044]
call assert_equal(l, str2list(s, 1))
call assert_equal(s, list2str(l, 1))
if &enc ==# 'utf-8'
call assert_equal(str2list(s), str2list(s, 1))
call assert_equal(list2str(l), list2str(l, 1))
endif
" With composing characters
let s = "\u304b\u3099\u3044"
let l = [0x304b, 0x3099, 0x3044]
call assert_equal(l, str2list(s, 1))
call assert_equal(s, list2str(l, 1))
if &enc ==# 'utf-8'
call assert_equal(str2list(s), str2list(s, 1))
call assert_equal(list2str(l), list2str(l, 1))
endif
" Null list is the same as an empty list
call assert_equal('', list2str([]))
" call assert_equal('', list2str(test_null_list()))
endfunc
func Test_list2str_str2list_latin1()
" When 'encoding' is not multi-byte can still get utf-8 string.
" But we need to create the utf-8 string while 'encoding' is utf-8.
let s = "\u3042\u3044"
let l = [0x3042, 0x3044]
let save_encoding = &encoding
" set encoding=latin1
let lres = str2list(s, 1)
let sres = list2str(l, 1)
let &encoding = save_encoding
call assert_equal(l, lres)
call assert_equal(s, sres)
endfunc