vim-patch:8.2.2233: cannot convert a byte index into a character index (#13978)

Problem: Cannot convert a byte index into a character index. Solution: Add charidx(). (Yegappan Lakshmanan, closes vim/vim#7561) 17793ef23a
2025-09-16 08:18:17 +00:00 · 2021-02-23 10:13:14 +09:00
parent 9d5f842807
commit 0450e155d4
5 changed files with 98 additions and 0 deletions
--- a/runtime/doc/eval.txt
+++ b/runtime/doc/eval.txt
@@ -2077,6 +2077,8 @@ changenr()			Number	current change number
 chanclose({id}[, {stream}])	Number	Closes a channel or one of its streams
 chansend({id}, {data})		Number	Writes {data} to channel
 char2nr({expr}[, {utf8}])	Number	ASCII/UTF8 value of first char in {expr}
 charidx({string}, {idx} [, {countcc}])
 				Number  char index of byte {idx} in {string}
 cindent({lnum})		Number	C indent for line {lnum}
 clearmatches([{win}])		none	clear all matches
 col({expr})			Number	column nr of cursor or mark
@@ -3027,6 +3029,29 @@ char2nr({expr} [, {utf8}])					*char2nr()*
 		A combining character is a separate character.
 		|nr2char()| does the opposite.
 							*charidx()*
 charidx({string}, {idx} [, {countcc}])
 		Return the character index of the byte at {idx} in {string}.
 		The index of the first character is zero.
 		If there are no multibyte characters the returned value is
 		equal to {idx}.
 		When {countcc} is omitted or zero, then composing characters
 		are not counted separately, their byte length is added to the
 		preceding base character.
 		When {countcc} is set to 1, then composing characters are
 		counted as separate characters.
 		Returns -1 if the arguments are invalid or if {idx} is greater
 		than the index of the last byte in {string}.  An error is
 		given if the first argument is not a string, the second
 		argument is not a number or when the third argument is present
 		and is not zero or one.
 		See |byteidx()| and |byteidxcomp()| for getting the byte index
 		from the character index.
 		Examples: >
 			echo charidx('áb́ć', 3)		returns 1
 			echo charidx('áb́ć', 6, 1)	returns 4
 			echo charidx('áb́ć', 16)		returns -1
 cindent({lnum})						*cindent()*
 		Get the amount of indent for line {lnum} according the C
 		indenting rules, as with 'cindent'.
--- a/runtime/doc/usr_41.txt
+++ b/runtime/doc/usr_41.txt
@@ -613,6 +613,7 @@ String manipulation:					*string-functions*
 	iconv()			convert text from one encoding to another
 	byteidx()		byte index of a character in a string
 	byteidxcomp()		like byteidx() but count composing characters
 	charidx()		character index of a byte in a string
 	repeat()		repeat a string multiple times
 	eval()			evaluate a string expression
 	execute()		execute an Ex command and get the output
--- a/src/nvim/eval.lua
+++ b/src/nvim/eval.lua
@@ -63,6 +63,7 @@ return {
    chanclose={args={1, 2}},
    chansend={args=2},
    char2nr={args={1, 2}},
    charidx={args={2, 3}},
    cindent={args=1},
    clearmatches={args={0, 1}},
    col={args=1},
--- a/src/nvim/eval/funcs.c
+++ b/src/nvim/eval/funcs.c
@@ -940,6 +940,52 @@ static void f_char2nr(typval_T *argvars, typval_T *rettv, FunPtr fptr)
      (const char_u *)tv_get_string(&argvars[0]));
 }
 // "charidx()" function
 static void f_charidx(typval_T *argvars, typval_T *rettv, FunPtr fptr)
 {
  rettv->vval.v_number = -1;
  if (argvars[0].v_type != VAR_STRING
      || argvars[1].v_type != VAR_NUMBER
      || (argvars[2].v_type != VAR_UNKNOWN
          && argvars[2].v_type != VAR_NUMBER)) {
    EMSG(_(e_invarg));
    return;
  }
  const char *str = tv_get_string_chk(&argvars[0]);
  varnumber_T idx = tv_get_number_chk(&argvars[1], NULL);
  if (str == NULL || idx < 0) {
    return;
  }
  int countcc = 0;
  if (argvars[2].v_type != VAR_UNKNOWN) {
    countcc = (int)tv_get_number(&argvars[2]);
  }
  if (countcc < 0 || countcc > 1) {
    EMSG(_(e_invarg));
    return;
  }
  int (*ptr2len)(const char_u *);
  if (countcc) {
    ptr2len = utf_ptr2len;
  } else {
    ptr2len = utfc_ptr2len;
  }
  const char *p;
  int len;
  for (p = str, len = 0; p <= str + idx; len++) {
    if (*p == NUL) {
      return;
    }
    p += ptr2len((const char_u *)p);
  }
  rettv->vval.v_number = len > 0 ? len - 1 : 0;
 }
 /*
 * "cindent(lnum)" function
 */
--- a/src/nvim/testdir/test_functions.vim
+++ b/src/nvim/testdir/test_functions.vim
@@ -833,6 +833,31 @@ func Test_byte2line_line2byte()
  bw!
 endfunc
 " Test for charidx()
 func Test_charidx()
  let a = 'xáb́y'
  call assert_equal(0, charidx(a, 0))
  call assert_equal(1, charidx(a, 3))
  call assert_equal(2, charidx(a, 4))
  call assert_equal(3, charidx(a, 7))
  call assert_equal(-1, charidx(a, 8))
  call assert_equal(-1, charidx('', 0))
  " count composing characters
  call assert_equal(0, charidx(a, 0, 1))
  call assert_equal(2, charidx(a, 2, 1))
  call assert_equal(3, charidx(a, 4, 1))
  call assert_equal(5, charidx(a, 7, 1))
  call assert_equal(-1, charidx(a, 8, 1))
  call assert_equal(-1, charidx('', 0, 1))
  call assert_fails('let x = charidx([], 1)', 'E474:')
  call assert_fails('let x = charidx("abc", [])', 'E474:')
  call assert_fails('let x = charidx("abc", 1, [])', 'E474:')
  call assert_fails('let x = charidx("abc", 1, -1)', 'E474:')
  call assert_fails('let x = charidx("abc", 1, 2)', 'E474:')
 endfunc
 func Test_count()
  let l = ['a', 'a', 'A', 'b']
  call assert_equal(2, count(l, 'a'))