mirror of
https://github.com/neovim/neovim.git
synced 2025-09-16 08:18:17 +00:00
vim-patch:8.2.2233: cannot convert a byte index into a character index (#13978)
Problem: Cannot convert a byte index into a character index.
Solution: Add charidx(). (Yegappan Lakshmanan, closes vim/vim#7561)
17793ef23a
This commit is contained in:
@@ -2077,6 +2077,8 @@ changenr() Number current change number
|
|||||||
chanclose({id}[, {stream}]) Number Closes a channel or one of its streams
|
chanclose({id}[, {stream}]) Number Closes a channel or one of its streams
|
||||||
chansend({id}, {data}) Number Writes {data} to channel
|
chansend({id}, {data}) Number Writes {data} to channel
|
||||||
char2nr({expr}[, {utf8}]) Number ASCII/UTF8 value of first char in {expr}
|
char2nr({expr}[, {utf8}]) Number ASCII/UTF8 value of first char in {expr}
|
||||||
|
charidx({string}, {idx} [, {countcc}])
|
||||||
|
Number char index of byte {idx} in {string}
|
||||||
cindent({lnum}) Number C indent for line {lnum}
|
cindent({lnum}) Number C indent for line {lnum}
|
||||||
clearmatches([{win}]) none clear all matches
|
clearmatches([{win}]) none clear all matches
|
||||||
col({expr}) Number column nr of cursor or mark
|
col({expr}) Number column nr of cursor or mark
|
||||||
@@ -3027,6 +3029,29 @@ char2nr({expr} [, {utf8}]) *char2nr()*
|
|||||||
A combining character is a separate character.
|
A combining character is a separate character.
|
||||||
|nr2char()| does the opposite.
|
|nr2char()| does the opposite.
|
||||||
|
|
||||||
|
*charidx()*
|
||||||
|
charidx({string}, {idx} [, {countcc}])
|
||||||
|
Return the character index of the byte at {idx} in {string}.
|
||||||
|
The index of the first character is zero.
|
||||||
|
If there are no multibyte characters the returned value is
|
||||||
|
equal to {idx}.
|
||||||
|
When {countcc} is omitted or zero, then composing characters
|
||||||
|
are not counted separately, their byte length is added to the
|
||||||
|
preceding base character.
|
||||||
|
When {countcc} is set to 1, then composing characters are
|
||||||
|
counted as separate characters.
|
||||||
|
Returns -1 if the arguments are invalid or if {idx} is greater
|
||||||
|
than the index of the last byte in {string}. An error is
|
||||||
|
given if the first argument is not a string, the second
|
||||||
|
argument is not a number or when the third argument is present
|
||||||
|
and is not zero or one.
|
||||||
|
See |byteidx()| and |byteidxcomp()| for getting the byte index
|
||||||
|
from the character index.
|
||||||
|
Examples: >
|
||||||
|
echo charidx('áb́ć', 3) returns 1
|
||||||
|
echo charidx('áb́ć', 6, 1) returns 4
|
||||||
|
echo charidx('áb́ć', 16) returns -1
|
||||||
|
|
||||||
cindent({lnum}) *cindent()*
|
cindent({lnum}) *cindent()*
|
||||||
Get the amount of indent for line {lnum} according the C
|
Get the amount of indent for line {lnum} according the C
|
||||||
indenting rules, as with 'cindent'.
|
indenting rules, as with 'cindent'.
|
||||||
|
@@ -613,6 +613,7 @@ String manipulation: *string-functions*
|
|||||||
iconv() convert text from one encoding to another
|
iconv() convert text from one encoding to another
|
||||||
byteidx() byte index of a character in a string
|
byteidx() byte index of a character in a string
|
||||||
byteidxcomp() like byteidx() but count composing characters
|
byteidxcomp() like byteidx() but count composing characters
|
||||||
|
charidx() character index of a byte in a string
|
||||||
repeat() repeat a string multiple times
|
repeat() repeat a string multiple times
|
||||||
eval() evaluate a string expression
|
eval() evaluate a string expression
|
||||||
execute() execute an Ex command and get the output
|
execute() execute an Ex command and get the output
|
||||||
|
@@ -63,6 +63,7 @@ return {
|
|||||||
chanclose={args={1, 2}},
|
chanclose={args={1, 2}},
|
||||||
chansend={args=2},
|
chansend={args=2},
|
||||||
char2nr={args={1, 2}},
|
char2nr={args={1, 2}},
|
||||||
|
charidx={args={2, 3}},
|
||||||
cindent={args=1},
|
cindent={args=1},
|
||||||
clearmatches={args={0, 1}},
|
clearmatches={args={0, 1}},
|
||||||
col={args=1},
|
col={args=1},
|
||||||
|
@@ -940,6 +940,52 @@ static void f_char2nr(typval_T *argvars, typval_T *rettv, FunPtr fptr)
|
|||||||
(const char_u *)tv_get_string(&argvars[0]));
|
(const char_u *)tv_get_string(&argvars[0]));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// "charidx()" function
|
||||||
|
static void f_charidx(typval_T *argvars, typval_T *rettv, FunPtr fptr)
|
||||||
|
{
|
||||||
|
rettv->vval.v_number = -1;
|
||||||
|
|
||||||
|
if (argvars[0].v_type != VAR_STRING
|
||||||
|
|| argvars[1].v_type != VAR_NUMBER
|
||||||
|
|| (argvars[2].v_type != VAR_UNKNOWN
|
||||||
|
&& argvars[2].v_type != VAR_NUMBER)) {
|
||||||
|
EMSG(_(e_invarg));
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
const char *str = tv_get_string_chk(&argvars[0]);
|
||||||
|
varnumber_T idx = tv_get_number_chk(&argvars[1], NULL);
|
||||||
|
if (str == NULL || idx < 0) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
int countcc = 0;
|
||||||
|
if (argvars[2].v_type != VAR_UNKNOWN) {
|
||||||
|
countcc = (int)tv_get_number(&argvars[2]);
|
||||||
|
}
|
||||||
|
if (countcc < 0 || countcc > 1) {
|
||||||
|
EMSG(_(e_invarg));
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
int (*ptr2len)(const char_u *);
|
||||||
|
if (countcc) {
|
||||||
|
ptr2len = utf_ptr2len;
|
||||||
|
} else {
|
||||||
|
ptr2len = utfc_ptr2len;
|
||||||
|
}
|
||||||
|
|
||||||
|
const char *p;
|
||||||
|
int len;
|
||||||
|
for (p = str, len = 0; p <= str + idx; len++) {
|
||||||
|
if (*p == NUL) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
p += ptr2len((const char_u *)p);
|
||||||
|
}
|
||||||
|
|
||||||
|
rettv->vval.v_number = len > 0 ? len - 1 : 0;
|
||||||
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* "cindent(lnum)" function
|
* "cindent(lnum)" function
|
||||||
*/
|
*/
|
||||||
|
@@ -833,6 +833,31 @@ func Test_byte2line_line2byte()
|
|||||||
bw!
|
bw!
|
||||||
endfunc
|
endfunc
|
||||||
|
|
||||||
|
" Test for charidx()
|
||||||
|
func Test_charidx()
|
||||||
|
let a = 'xáb́y'
|
||||||
|
call assert_equal(0, charidx(a, 0))
|
||||||
|
call assert_equal(1, charidx(a, 3))
|
||||||
|
call assert_equal(2, charidx(a, 4))
|
||||||
|
call assert_equal(3, charidx(a, 7))
|
||||||
|
call assert_equal(-1, charidx(a, 8))
|
||||||
|
call assert_equal(-1, charidx('', 0))
|
||||||
|
|
||||||
|
" count composing characters
|
||||||
|
call assert_equal(0, charidx(a, 0, 1))
|
||||||
|
call assert_equal(2, charidx(a, 2, 1))
|
||||||
|
call assert_equal(3, charidx(a, 4, 1))
|
||||||
|
call assert_equal(5, charidx(a, 7, 1))
|
||||||
|
call assert_equal(-1, charidx(a, 8, 1))
|
||||||
|
call assert_equal(-1, charidx('', 0, 1))
|
||||||
|
|
||||||
|
call assert_fails('let x = charidx([], 1)', 'E474:')
|
||||||
|
call assert_fails('let x = charidx("abc", [])', 'E474:')
|
||||||
|
call assert_fails('let x = charidx("abc", 1, [])', 'E474:')
|
||||||
|
call assert_fails('let x = charidx("abc", 1, -1)', 'E474:')
|
||||||
|
call assert_fails('let x = charidx("abc", 1, 2)', 'E474:')
|
||||||
|
endfunc
|
||||||
|
|
||||||
func Test_count()
|
func Test_count()
|
||||||
let l = ['a', 'a', 'A', 'b']
|
let l = ['a', 'a', 'A', 'b']
|
||||||
call assert_equal(2, count(l, 'a'))
|
call assert_equal(2, count(l, 'a'))
|
||||||
|
Reference in New Issue
Block a user