vim-patch:9.0.1485: no functions for converting from/to UTF-16 index (#23318)

Problem:    no functions for converting from/to UTF-16 index.
Solution:   Add UTF-16 flag to existing funtions and add strutf16len() and
            utf16idx(). (Yegappan Lakshmanan, closes vim/vim#12216)

67672ef097

Co-authored-by: Yegappan Lakshmanan <yegappan@yahoo.com>
This commit is contained in:
zeertzjq
2023-04-26 09:50:37 +08:00
committed by GitHub
parent 8af97ecefa
commit 191e8b4062
6 changed files with 665 additions and 45 deletions

View File

@@ -1504,22 +1504,44 @@ char *strrep(const char *src, const char *what, const char *rep)
static void byteidx(typval_T *argvars, typval_T *rettv, int comp)
{
rettv->vval.v_number = -1;
const char *const str = tv_get_string_chk(&argvars[0]);
varnumber_T idx = tv_get_number_chk(&argvars[1], NULL);
rettv->vval.v_number = -1;
if (str == NULL || idx < 0) {
return;
}
varnumber_T utf16idx = false;
if (argvars[2].v_type != VAR_UNKNOWN) {
utf16idx = tv_get_bool(&argvars[2]);
if (utf16idx < 0 || utf16idx > 1) {
semsg(_(e_using_number_as_bool_nr), utf16idx);
return;
}
}
int (*ptr2len)(const char *);
if (comp) {
ptr2len = utf_ptr2len;
} else {
ptr2len = utfc_ptr2len;
}
const char *t = str;
for (; idx > 0; idx--) {
if (*t == NUL) { // EOL reached.
return;
}
if (comp) {
t += utf_ptr2len(t);
} else {
t += utfc_ptr2len(t);
if (utf16idx) {
const int clen = ptr2len(t);
const int c = (clen > 1) ? utf_ptr2char(t) : *t;
if (c > 0xFFFF) {
idx--;
}
}
if (idx > 0) {
t += ptr2len(t);
}
}
rettv->vval.v_number = (varnumber_T)(t - str);
@@ -1542,24 +1564,27 @@ void f_charidx(typval_T *argvars, typval_T *rettv, EvalFuncData fptr)
{
rettv->vval.v_number = -1;
if ((tv_check_for_string_arg(argvars, 0) == FAIL
|| tv_check_for_number_arg(argvars, 1) == FAIL
|| tv_check_for_opt_bool_arg(argvars, 2) == FAIL)) {
if (tv_check_for_string_arg(argvars, 0) == FAIL
|| tv_check_for_number_arg(argvars, 1) == FAIL
|| tv_check_for_opt_bool_arg(argvars, 2) == FAIL
|| (argvars[2].v_type != VAR_UNKNOWN
&& tv_check_for_opt_bool_arg(argvars, 3) == FAIL)) {
return;
}
const char *str = tv_get_string_chk(&argvars[0]);
const char *const str = tv_get_string_chk(&argvars[0]);
varnumber_T idx = tv_get_number_chk(&argvars[1], NULL);
if (str == NULL || idx < 0) {
return;
}
int countcc = 0;
varnumber_T countcc = false;
varnumber_T utf16idx = false;
if (argvars[2].v_type != VAR_UNKNOWN) {
countcc = (int)tv_get_number(&argvars[2]);
}
if (countcc < 0 || countcc > 1) {
semsg(_(e_using_number_as_bool_nr), countcc);
return;
countcc = tv_get_bool(&argvars[2]);
if (argvars[3].v_type != VAR_UNKNOWN) {
utf16idx = tv_get_bool(&argvars[3]);
}
}
int (*ptr2len)(const char *);
@@ -1571,10 +1596,18 @@ void f_charidx(typval_T *argvars, typval_T *rettv, EvalFuncData fptr)
const char *p;
int len;
for (p = str, len = 0; p <= str + idx; len++) {
for (p = str, len = 0; utf16idx ? idx >= 0 : p <= str + idx; len++) {
if (*p == NUL) {
return;
}
if (utf16idx) {
idx--;
const int clen = ptr2len(p);
const int c = (clen > 1) ? utf_ptr2char(p) : *p;
if (c > 0xFFFF) {
idx--;
}
}
p += ptr2len(p);
}
@@ -1743,6 +1776,36 @@ void f_strchars(typval_T *argvars, typval_T *rettv, EvalFuncData fptr)
}
}
/// "strutf16len()" function
void f_strutf16len(typval_T *argvars, typval_T *rettv, EvalFuncData fptr)
{
rettv->vval.v_number = -1;
if (tv_check_for_string_arg(argvars, 0) == FAIL
|| tv_check_for_opt_bool_arg(argvars, 1) == FAIL) {
return;
}
varnumber_T countcc = false;
if (argvars[1].v_type != VAR_UNKNOWN) {
countcc = tv_get_bool(&argvars[1]);
}
const char *s = tv_get_string(&argvars[0]);
varnumber_T len = 0;
int (*func_mb_ptr2char_adv)(const char **pp);
func_mb_ptr2char_adv = countcc ? mb_cptr2char_adv : mb_ptr2char_adv;
while (*s != NUL) {
const int ch = func_mb_ptr2char_adv(&s);
if (ch > 0xFFFF) {
len++;
}
len++;
}
rettv->vval.v_number = len;
}
/// "strdisplaywidth()" function
void f_strdisplaywidth(typval_T *argvars, typval_T *rettv, EvalFuncData fptr)
{
@@ -1914,6 +1977,61 @@ void f_strtrans(typval_T *argvars, typval_T *rettv, EvalFuncData fptr)
rettv->vval.v_string = transstr(tv_get_string(&argvars[0]), true);
}
/// "utf16idx()" function
void f_utf16idx(typval_T *argvars, typval_T *rettv, EvalFuncData fptr)
{
rettv->vval.v_number = -1;
if (tv_check_for_string_arg(argvars, 0) == FAIL
|| tv_check_for_opt_number_arg(argvars, 1) == FAIL
|| tv_check_for_opt_bool_arg(argvars, 2) == FAIL
|| (argvars[2].v_type != VAR_UNKNOWN
&& tv_check_for_opt_bool_arg(argvars, 3) == FAIL)) {
return;
}
const char *const str = tv_get_string_chk(&argvars[0]);
varnumber_T idx = tv_get_number_chk(&argvars[1], NULL);
if (str == NULL || idx < 0) {
return;
}
varnumber_T countcc = false;
varnumber_T charidx = false;
if (argvars[2].v_type != VAR_UNKNOWN) {
countcc = tv_get_bool(&argvars[2]);
if (argvars[3].v_type != VAR_UNKNOWN) {
charidx = tv_get_bool(&argvars[3]);
}
}
int (*ptr2len)(const char *);
if (countcc) {
ptr2len = utf_ptr2len;
} else {
ptr2len = utfc_ptr2len;
}
const char *p;
int len;
for (p = str, len = 0; charidx ? idx >= 0 : p <= str + idx; len++) {
if (*p == NUL) {
return;
}
const int clen = ptr2len(p);
const int c = (clen > 1) ? utf_ptr2char(p) : *p;
if (c > 0xFFFF) {
len++;
}
p += ptr2len(p);
if (charidx) {
idx--;
}
}
rettv->vval.v_number = len > 0 ? len - 1 : 0;
}
/// "tolower(string)" function
void f_tolower(typval_T *argvars, typval_T *rettv, EvalFuncData fptr)
{