mirror of
https://github.com/neovim/neovim.git
synced 2025-10-06 09:56:31 +00:00
vim-patch:9.0.1485: no functions for converting from/to UTF-16 index (#23318)
Problem: no functions for converting from/to UTF-16 index.
Solution: Add UTF-16 flag to existing funtions and add strutf16len() and
utf16idx(). (Yegappan Lakshmanan, closes vim/vim#12216)
67672ef097
Co-authored-by: Yegappan Lakshmanan <yegappan@yahoo.com>
This commit is contained in:
@@ -69,8 +69,10 @@ bufnr([{buf} [, {create}]]) Number Number of the buffer {buf}
|
|||||||
bufwinid({buf}) Number window ID of buffer {buf}
|
bufwinid({buf}) Number window ID of buffer {buf}
|
||||||
bufwinnr({buf}) Number window number of buffer {buf}
|
bufwinnr({buf}) Number window number of buffer {buf}
|
||||||
byte2line({byte}) Number line number at byte count {byte}
|
byte2line({byte}) Number line number at byte count {byte}
|
||||||
byteidx({expr}, {nr}) Number byte index of {nr}th char in {expr}
|
byteidx({expr}, {nr} [, {utf16}])
|
||||||
byteidxcomp({expr}, {nr}) Number byte index of {nr}th char in {expr}
|
Number byte index of {nr}th char in {expr}
|
||||||
|
byteidxcomp({expr}, {nr} [, {utf16}])
|
||||||
|
Number byte index of {nr}th char in {expr}
|
||||||
call({func}, {arglist} [, {dict}])
|
call({func}, {arglist} [, {dict}])
|
||||||
any call {func} with arguments {arglist}
|
any call {func} with arguments {arglist}
|
||||||
ceil({expr}) Float round {expr} up
|
ceil({expr}) Float round {expr} up
|
||||||
@@ -80,7 +82,7 @@ chansend({id}, {data}) Number Writes {data} to channel
|
|||||||
char2nr({expr} [, {utf8}]) Number ASCII/UTF-8 value of first char in {expr}
|
char2nr({expr} [, {utf8}]) Number ASCII/UTF-8 value of first char in {expr}
|
||||||
charclass({string}) Number character class of {string}
|
charclass({string}) Number character class of {string}
|
||||||
charcol({expr} [, {winid}]) Number column number of cursor or mark
|
charcol({expr} [, {winid}]) Number column number of cursor or mark
|
||||||
charidx({string}, {idx} [, {countcc}])
|
charidx({string}, {idx} [, {countcc} [, {utf16}]])
|
||||||
Number char index of byte {idx} in {string}
|
Number char index of byte {idx} in {string}
|
||||||
chdir({dir}) String change current working directory
|
chdir({dir}) String change current working directory
|
||||||
cindent({lnum}) Number C indent for line {lnum}
|
cindent({lnum}) Number C indent for line {lnum}
|
||||||
@@ -501,6 +503,8 @@ strptime({format}, {timestring})
|
|||||||
strridx({haystack}, {needle} [, {start}])
|
strridx({haystack}, {needle} [, {start}])
|
||||||
Number last index of {needle} in {haystack}
|
Number last index of {needle} in {haystack}
|
||||||
strtrans({expr}) String translate string to make it printable
|
strtrans({expr}) String translate string to make it printable
|
||||||
|
strutf16len({string} [, {countcc}])
|
||||||
|
Number number of UTF-16 code units in {string}
|
||||||
strwidth({expr}) Number display cell length of the String {expr}
|
strwidth({expr}) Number display cell length of the String {expr}
|
||||||
submatch({nr} [, {list}]) String or List
|
submatch({nr} [, {list}]) String or List
|
||||||
specific match in ":s" or substitute()
|
specific match in ":s" or substitute()
|
||||||
@@ -545,6 +549,8 @@ undofile({name}) String undo file name for {name}
|
|||||||
undotree() List undo file tree
|
undotree() List undo file tree
|
||||||
uniq({list} [, {func} [, {dict}]])
|
uniq({list} [, {func} [, {dict}]])
|
||||||
List remove adjacent duplicates from a list
|
List remove adjacent duplicates from a list
|
||||||
|
utf16idx({string}, {idx} [, {countcc} [, {charidx}]])
|
||||||
|
Number UTF-16 index of byte {idx} in {string}
|
||||||
values({dict}) List values in {dict}
|
values({dict}) List values in {dict}
|
||||||
virtcol({expr} [, {list}]) Number or List
|
virtcol({expr} [, {list}]) Number or List
|
||||||
screen column of cursor or mark
|
screen column of cursor or mark
|
||||||
@@ -982,7 +988,7 @@ byte2line({byte}) *byte2line()*
|
|||||||
Can also be used as a |method|: >
|
Can also be used as a |method|: >
|
||||||
GetOffset()->byte2line()
|
GetOffset()->byte2line()
|
||||||
|
|
||||||
byteidx({expr}, {nr}) *byteidx()*
|
byteidx({expr}, {nr} [, {utf16}]) *byteidx()*
|
||||||
Return byte index of the {nr}th character in the String
|
Return byte index of the {nr}th character in the String
|
||||||
{expr}. Use zero for the first character, it then returns
|
{expr}. Use zero for the first character, it then returns
|
||||||
zero.
|
zero.
|
||||||
@@ -992,6 +998,13 @@ byteidx({expr}, {nr}) *byteidx()*
|
|||||||
length is added to the preceding base character. See
|
length is added to the preceding base character. See
|
||||||
|byteidxcomp()| below for counting composing characters
|
|byteidxcomp()| below for counting composing characters
|
||||||
separately.
|
separately.
|
||||||
|
When {utf16} is present and TRUE, {nr} is used as the UTF-16
|
||||||
|
index in the String {expr} instead of as the character index.
|
||||||
|
The UTF-16 index is the index in the string when it is encoded
|
||||||
|
with 16-bit words. If the specified UTF-16 index is in the
|
||||||
|
middle of a character (e.g. in a 4-byte character), then the
|
||||||
|
byte index of the first byte in the character is returned.
|
||||||
|
Refer to |string-offset-encoding| for more information.
|
||||||
Example : >
|
Example : >
|
||||||
echo matchstr(str, ".", byteidx(str, 3))
|
echo matchstr(str, ".", byteidx(str, 3))
|
||||||
< will display the fourth character. Another way to do the
|
< will display the fourth character. Another way to do the
|
||||||
@@ -1003,11 +1016,17 @@ byteidx({expr}, {nr}) *byteidx()*
|
|||||||
If there are less than {nr} characters -1 is returned.
|
If there are less than {nr} characters -1 is returned.
|
||||||
If there are exactly {nr} characters the length of the string
|
If there are exactly {nr} characters the length of the string
|
||||||
in bytes is returned.
|
in bytes is returned.
|
||||||
|
See |charidx()| and |utf16idx()| for getting the character and
|
||||||
|
UTF-16 index respectively from the byte index.
|
||||||
|
Examples: >
|
||||||
|
echo byteidx('a😊😊', 2) returns 5
|
||||||
|
echo byteidx('a😊😊', 2, 1) returns 1
|
||||||
|
echo byteidx('a😊😊', 3, 1) returns 5
|
||||||
|
<
|
||||||
Can also be used as a |method|: >
|
Can also be used as a |method|: >
|
||||||
GetName()->byteidx(idx)
|
GetName()->byteidx(idx)
|
||||||
|
|
||||||
byteidxcomp({expr}, {nr}) *byteidxcomp()*
|
byteidxcomp({expr}, {nr} [, {utf16}]) *byteidxcomp()*
|
||||||
Like byteidx(), except that a composing character is counted
|
Like byteidx(), except that a composing character is counted
|
||||||
as a separate character. Example: >
|
as a separate character. Example: >
|
||||||
let s = 'e' .. nr2char(0x301)
|
let s = 'e' .. nr2char(0x301)
|
||||||
@@ -1131,27 +1150,36 @@ charcol({expr} [, {winid}]) *charcol()*
|
|||||||
GetPos()->col()
|
GetPos()->col()
|
||||||
<
|
<
|
||||||
*charidx()*
|
*charidx()*
|
||||||
charidx({string}, {idx} [, {countcc}])
|
charidx({string}, {idx} [, {countcc} [, {utf16}]])
|
||||||
Return the character index of the byte at {idx} in {string}.
|
Return the character index of the byte at {idx} in {string}.
|
||||||
The index of the first character is zero.
|
The index of the first character is zero.
|
||||||
If there are no multibyte characters the returned value is
|
If there are no multibyte characters the returned value is
|
||||||
equal to {idx}.
|
equal to {idx}.
|
||||||
|
|
||||||
When {countcc} is omitted or |FALSE|, then composing characters
|
When {countcc} is omitted or |FALSE|, then composing characters
|
||||||
are not counted separately, their byte length is
|
are not counted separately, their byte length is added to the
|
||||||
added to the preceding base character.
|
preceding base character.
|
||||||
When {countcc} is |TRUE|, then composing characters are
|
When {countcc} is |TRUE|, then composing characters are
|
||||||
counted as separate characters.
|
counted as separate characters.
|
||||||
|
|
||||||
|
When {utf16} is present and TRUE, {idx} is used as the UTF-16
|
||||||
|
index in the String {expr} instead of as the byte index.
|
||||||
|
|
||||||
Returns -1 if the arguments are invalid or if {idx} is greater
|
Returns -1 if the arguments are invalid or if {idx} is greater
|
||||||
than the index of the last byte in {string}. An error is
|
than the index of the last byte in {string}. An error is
|
||||||
given if the first argument is not a string, the second
|
given if the first argument is not a string, the second
|
||||||
argument is not a number or when the third argument is present
|
argument is not a number or when the third argument is present
|
||||||
and is not zero or one.
|
and is not zero or one.
|
||||||
|
|
||||||
See |byteidx()| and |byteidxcomp()| for getting the byte index
|
See |byteidx()| and |byteidxcomp()| for getting the byte index
|
||||||
from the character index.
|
from the character index and |utf16idx()| for getting the
|
||||||
|
UTF-16 index from the character index.
|
||||||
|
Refer to |string-offset-encoding| for more information.
|
||||||
Examples: >
|
Examples: >
|
||||||
echo charidx('áb́ć', 3) returns 1
|
echo charidx('áb́ć', 3) returns 1
|
||||||
echo charidx('áb́ć', 6, 1) returns 4
|
echo charidx('áb́ć', 6, 1) returns 4
|
||||||
echo charidx('áb́ć', 16) returns -1
|
echo charidx('áb́ć', 16) returns -1
|
||||||
|
echo charidx('a😊😊', 4, 0, 1) returns 2
|
||||||
<
|
<
|
||||||
Can also be used as a |method|: >
|
Can also be used as a |method|: >
|
||||||
GetName()->charidx(idx)
|
GetName()->charidx(idx)
|
||||||
@@ -8332,6 +8360,28 @@ strtrans({string}) *strtrans()*
|
|||||||
Can also be used as a |method|: >
|
Can also be used as a |method|: >
|
||||||
GetString()->strtrans()
|
GetString()->strtrans()
|
||||||
|
|
||||||
|
strutf16len({string} [, {countcc}]) *strutf16len()*
|
||||||
|
The result is a Number, which is the number of UTF-16 code
|
||||||
|
units in String {string} (after converting it to UTF-16).
|
||||||
|
|
||||||
|
When {countcc} is TRUE, composing characters are counted
|
||||||
|
separately.
|
||||||
|
When {countcc} is omitted or FALSE, composing characters are
|
||||||
|
ignored.
|
||||||
|
|
||||||
|
Returns zero on error.
|
||||||
|
|
||||||
|
Also see |strlen()| and |strcharlen()|.
|
||||||
|
Examples: >
|
||||||
|
echo strutf16len('a') returns 1
|
||||||
|
echo strutf16len('©') returns 1
|
||||||
|
echo strutf16len('😊') returns 2
|
||||||
|
echo strutf16len('ą́') returns 1
|
||||||
|
echo strutf16len('ą́', v:true) returns 3
|
||||||
|
|
||||||
|
Can also be used as a |method|: >
|
||||||
|
GetText()->strutf16len()
|
||||||
|
<
|
||||||
strwidth({string}) *strwidth()*
|
strwidth({string}) *strwidth()*
|
||||||
The result is a Number, which is the number of display cells
|
The result is a Number, which is the number of display cells
|
||||||
String {string} occupies. A Tab character is counted as one
|
String {string} occupies. A Tab character is counted as one
|
||||||
@@ -9063,6 +9113,34 @@ uniq({list} [, {func} [, {dict}]]) *uniq()* *E882*
|
|||||||
|
|
||||||
Can also be used as a |method|: >
|
Can also be used as a |method|: >
|
||||||
mylist->uniq()
|
mylist->uniq()
|
||||||
|
<
|
||||||
|
*utf16idx()*
|
||||||
|
utf16idx({string}, {idx} [, {countcc} [, {charidx}]])
|
||||||
|
Same as |charidx()| but returns the UTF-16 index of the byte
|
||||||
|
at {idx} in {string} (after converting it to UTF-16).
|
||||||
|
|
||||||
|
When {charidx} is present and TRUE, {idx} is used as the
|
||||||
|
character index in the String {string} instead of as the byte
|
||||||
|
index.
|
||||||
|
An {idx} in the middle of a UTF-8 sequence is rounded upwards
|
||||||
|
to the end of that sequence.
|
||||||
|
|
||||||
|
See |byteidx()| and |byteidxcomp()| for getting the byte index
|
||||||
|
from the UTF-16 index and |charidx()| for getting the
|
||||||
|
character index from the UTF-16 index.
|
||||||
|
Refer to |string-offset-encoding| for more information.
|
||||||
|
Examples: >
|
||||||
|
echo utf16idx('a😊😊', 3) returns 2
|
||||||
|
echo utf16idx('a😊😊', 7) returns 4
|
||||||
|
echo utf16idx('a😊😊', 1, 0, 1) returns 2
|
||||||
|
echo utf16idx('a😊😊', 2, 0, 1) returns 4
|
||||||
|
echo utf16idx('aą́c', 6) returns 2
|
||||||
|
echo utf16idx('aą́c', 6, 1) returns 4
|
||||||
|
echo utf16idx('a😊😊', 9) returns -1
|
||||||
|
<
|
||||||
|
Can also be used as a |method|: >
|
||||||
|
GetName()->utf16idx(idx)
|
||||||
|
|
||||||
|
|
||||||
values({dict}) *values()*
|
values({dict}) *values()*
|
||||||
Return a |List| with all the values of {dict}. The |List| is
|
Return a |List| with all the values of {dict}. The |List| is
|
||||||
|
@@ -1433,6 +1433,32 @@ Examples: >
|
|||||||
echo $"The square root of {{9}} is {sqrt(9)}"
|
echo $"The square root of {{9}} is {sqrt(9)}"
|
||||||
< The square root of {9} is 3.0 ~
|
< The square root of {9} is 3.0 ~
|
||||||
|
|
||||||
|
*string-offset-encoding*
|
||||||
|
A string consists of multiple characters. UTF-8 uses one byte for ASCII
|
||||||
|
characters, two bytes for other latin characters and more bytes for other
|
||||||
|
characters.
|
||||||
|
|
||||||
|
A string offset can count characters or bytes. Other programs may use
|
||||||
|
UTF-16 encoding (16-bit words) and an offset of UTF-16 words. Some functions
|
||||||
|
use byte offsets, usually for UTF-8 encoding. Other functions use character
|
||||||
|
offsets, in which case the encoding doesn't matter.
|
||||||
|
|
||||||
|
The different offsets for the string "a©😊" are below:
|
||||||
|
|
||||||
|
UTF-8 offsets:
|
||||||
|
[0]: 61, [1]: C2, [2]: A9, [3]: F0, [4]: 9F, [5]: 98, [6]: 8A
|
||||||
|
UTF-16 offsets:
|
||||||
|
[0]: 0061, [1]: 00A9, [2]: D83D, [3]: DE0A
|
||||||
|
UTF-32 (character) offsets:
|
||||||
|
[0]: 00000061, [1]: 000000A9, [2]: 0001F60A
|
||||||
|
|
||||||
|
You can use the "g8" and "ga" commands on a character to see the
|
||||||
|
decimal/hex/octal values.
|
||||||
|
|
||||||
|
The functions |byteidx()|, |utf16idx()| and |charidx()| can be used to convert
|
||||||
|
between these indices. The functions |strlen()|, |strutf16len()| and
|
||||||
|
|strcharlen()| return the number of bytes, UTF-16 code units and characters in
|
||||||
|
a string respectively.
|
||||||
|
|
||||||
------------------------------------------------------------------------------
|
------------------------------------------------------------------------------
|
||||||
option *expr-option* *E112* *E113*
|
option *expr-option* *E112* *E113*
|
||||||
|
@@ -621,6 +621,7 @@ String manipulation: *string-functions*
|
|||||||
strlen() length of a string in bytes
|
strlen() length of a string in bytes
|
||||||
strcharlen() length of a string in characters
|
strcharlen() length of a string in characters
|
||||||
strchars() number of characters in a string
|
strchars() number of characters in a string
|
||||||
|
strutf16len() number of UTF-16 code units in a string
|
||||||
strwidth() size of string when displayed
|
strwidth() size of string when displayed
|
||||||
strdisplaywidth() size of string when displayed, deals with tabs
|
strdisplaywidth() size of string when displayed, deals with tabs
|
||||||
setcellwidths() set character cell width overrides
|
setcellwidths() set character cell width overrides
|
||||||
@@ -636,6 +637,7 @@ String manipulation: *string-functions*
|
|||||||
byteidx() byte index of a character in a string
|
byteidx() byte index of a character in a string
|
||||||
byteidxcomp() like byteidx() but count composing characters
|
byteidxcomp() like byteidx() but count composing characters
|
||||||
charidx() character index of a byte in a string
|
charidx() character index of a byte in a string
|
||||||
|
utf16idx() UTF-16 index of a byte in a string
|
||||||
repeat() repeat a string multiple times
|
repeat() repeat a string multiple times
|
||||||
eval() evaluate a string expression
|
eval() evaluate a string expression
|
||||||
execute() execute an Ex command and get the output
|
execute() execute an Ex command and get the output
|
||||||
|
@@ -65,8 +65,8 @@ return {
|
|||||||
bufwinid={args=1, base=1},
|
bufwinid={args=1, base=1},
|
||||||
bufwinnr={args=1, base=1},
|
bufwinnr={args=1, base=1},
|
||||||
byte2line={args=1, base=1},
|
byte2line={args=1, base=1},
|
||||||
byteidx={args=2, base=1, fast=true},
|
byteidx={args={2, 3}, base=1, fast=true},
|
||||||
byteidxcomp={args=2, base=1, fast=true},
|
byteidxcomp={args={2, 3}, base=1, fast=true},
|
||||||
call={args={2, 3}, base=1},
|
call={args={2, 3}, base=1},
|
||||||
ceil={args=1, base=1, float_func="ceil"},
|
ceil={args=1, base=1, float_func="ceil"},
|
||||||
changenr={},
|
changenr={},
|
||||||
@@ -75,7 +75,7 @@ return {
|
|||||||
char2nr={args={1, 2}, base=1, fast=true},
|
char2nr={args={1, 2}, base=1, fast=true},
|
||||||
charclass={args=1, base=1},
|
charclass={args=1, base=1},
|
||||||
charcol={args={1, 2}, base=1},
|
charcol={args={1, 2}, base=1},
|
||||||
charidx={args={2, 3}, base=1},
|
charidx={args={2, 4}, base=1},
|
||||||
chdir={args=1, base=1},
|
chdir={args=1, base=1},
|
||||||
cindent={args=1, base=1},
|
cindent={args=1, base=1},
|
||||||
clearmatches={args={0, 1}, base=1},
|
clearmatches={args={0, 1}, base=1},
|
||||||
@@ -397,6 +397,7 @@ return {
|
|||||||
strptime={args=2, base=1},
|
strptime={args=2, base=1},
|
||||||
strridx={args={2, 3}, base=1},
|
strridx={args={2, 3}, base=1},
|
||||||
strtrans={args=1, base=1, fast=true},
|
strtrans={args=1, base=1, fast=true},
|
||||||
|
strutf16len={args={1, 2}, base=1},
|
||||||
strwidth={args=1, base=1, fast=true},
|
strwidth={args=1, base=1, fast=true},
|
||||||
submatch={args={1, 2}, base=1},
|
submatch={args={1, 2}, base=1},
|
||||||
substitute={args=4, base=1},
|
substitute={args=4, base=1},
|
||||||
@@ -435,6 +436,7 @@ return {
|
|||||||
undofile={args=1, base=1},
|
undofile={args=1, base=1},
|
||||||
undotree={},
|
undotree={},
|
||||||
uniq={args={1, 3}, base=1},
|
uniq={args={1, 3}, base=1},
|
||||||
|
utf16idx={args={2, 4}, base=1},
|
||||||
values={args=1, base=1},
|
values={args=1, base=1},
|
||||||
virtcol={args={1, 2}, base=1},
|
virtcol={args={1, 2}, base=1},
|
||||||
virtcol2col={args=3, base=1},
|
virtcol2col={args=3, base=1},
|
||||||
|
@@ -1504,22 +1504,44 @@ char *strrep(const char *src, const char *what, const char *rep)
|
|||||||
|
|
||||||
static void byteidx(typval_T *argvars, typval_T *rettv, int comp)
|
static void byteidx(typval_T *argvars, typval_T *rettv, int comp)
|
||||||
{
|
{
|
||||||
|
rettv->vval.v_number = -1;
|
||||||
|
|
||||||
const char *const str = tv_get_string_chk(&argvars[0]);
|
const char *const str = tv_get_string_chk(&argvars[0]);
|
||||||
varnumber_T idx = tv_get_number_chk(&argvars[1], NULL);
|
varnumber_T idx = tv_get_number_chk(&argvars[1], NULL);
|
||||||
rettv->vval.v_number = -1;
|
|
||||||
if (str == NULL || idx < 0) {
|
if (str == NULL || idx < 0) {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
varnumber_T utf16idx = false;
|
||||||
|
if (argvars[2].v_type != VAR_UNKNOWN) {
|
||||||
|
utf16idx = tv_get_bool(&argvars[2]);
|
||||||
|
if (utf16idx < 0 || utf16idx > 1) {
|
||||||
|
semsg(_(e_using_number_as_bool_nr), utf16idx);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
int (*ptr2len)(const char *);
|
||||||
|
if (comp) {
|
||||||
|
ptr2len = utf_ptr2len;
|
||||||
|
} else {
|
||||||
|
ptr2len = utfc_ptr2len;
|
||||||
|
}
|
||||||
|
|
||||||
const char *t = str;
|
const char *t = str;
|
||||||
for (; idx > 0; idx--) {
|
for (; idx > 0; idx--) {
|
||||||
if (*t == NUL) { // EOL reached.
|
if (*t == NUL) { // EOL reached.
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
if (comp) {
|
if (utf16idx) {
|
||||||
t += utf_ptr2len(t);
|
const int clen = ptr2len(t);
|
||||||
} else {
|
const int c = (clen > 1) ? utf_ptr2char(t) : *t;
|
||||||
t += utfc_ptr2len(t);
|
if (c > 0xFFFF) {
|
||||||
|
idx--;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (idx > 0) {
|
||||||
|
t += ptr2len(t);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
rettv->vval.v_number = (varnumber_T)(t - str);
|
rettv->vval.v_number = (varnumber_T)(t - str);
|
||||||
@@ -1542,24 +1564,27 @@ void f_charidx(typval_T *argvars, typval_T *rettv, EvalFuncData fptr)
|
|||||||
{
|
{
|
||||||
rettv->vval.v_number = -1;
|
rettv->vval.v_number = -1;
|
||||||
|
|
||||||
if ((tv_check_for_string_arg(argvars, 0) == FAIL
|
if (tv_check_for_string_arg(argvars, 0) == FAIL
|
||||||
|| tv_check_for_number_arg(argvars, 1) == FAIL
|
|| tv_check_for_number_arg(argvars, 1) == FAIL
|
||||||
|| tv_check_for_opt_bool_arg(argvars, 2) == FAIL)) {
|
|| tv_check_for_opt_bool_arg(argvars, 2) == FAIL
|
||||||
|
|| (argvars[2].v_type != VAR_UNKNOWN
|
||||||
|
&& tv_check_for_opt_bool_arg(argvars, 3) == FAIL)) {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
const char *str = tv_get_string_chk(&argvars[0]);
|
const char *const str = tv_get_string_chk(&argvars[0]);
|
||||||
varnumber_T idx = tv_get_number_chk(&argvars[1], NULL);
|
varnumber_T idx = tv_get_number_chk(&argvars[1], NULL);
|
||||||
if (str == NULL || idx < 0) {
|
if (str == NULL || idx < 0) {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
int countcc = 0;
|
|
||||||
|
varnumber_T countcc = false;
|
||||||
|
varnumber_T utf16idx = false;
|
||||||
if (argvars[2].v_type != VAR_UNKNOWN) {
|
if (argvars[2].v_type != VAR_UNKNOWN) {
|
||||||
countcc = (int)tv_get_number(&argvars[2]);
|
countcc = tv_get_bool(&argvars[2]);
|
||||||
|
if (argvars[3].v_type != VAR_UNKNOWN) {
|
||||||
|
utf16idx = tv_get_bool(&argvars[3]);
|
||||||
}
|
}
|
||||||
if (countcc < 0 || countcc > 1) {
|
|
||||||
semsg(_(e_using_number_as_bool_nr), countcc);
|
|
||||||
return;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
int (*ptr2len)(const char *);
|
int (*ptr2len)(const char *);
|
||||||
@@ -1571,10 +1596,18 @@ void f_charidx(typval_T *argvars, typval_T *rettv, EvalFuncData fptr)
|
|||||||
|
|
||||||
const char *p;
|
const char *p;
|
||||||
int len;
|
int len;
|
||||||
for (p = str, len = 0; p <= str + idx; len++) {
|
for (p = str, len = 0; utf16idx ? idx >= 0 : p <= str + idx; len++) {
|
||||||
if (*p == NUL) {
|
if (*p == NUL) {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
if (utf16idx) {
|
||||||
|
idx--;
|
||||||
|
const int clen = ptr2len(p);
|
||||||
|
const int c = (clen > 1) ? utf_ptr2char(p) : *p;
|
||||||
|
if (c > 0xFFFF) {
|
||||||
|
idx--;
|
||||||
|
}
|
||||||
|
}
|
||||||
p += ptr2len(p);
|
p += ptr2len(p);
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -1743,6 +1776,36 @@ void f_strchars(typval_T *argvars, typval_T *rettv, EvalFuncData fptr)
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// "strutf16len()" function
|
||||||
|
void f_strutf16len(typval_T *argvars, typval_T *rettv, EvalFuncData fptr)
|
||||||
|
{
|
||||||
|
rettv->vval.v_number = -1;
|
||||||
|
|
||||||
|
if (tv_check_for_string_arg(argvars, 0) == FAIL
|
||||||
|
|| tv_check_for_opt_bool_arg(argvars, 1) == FAIL) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
varnumber_T countcc = false;
|
||||||
|
if (argvars[1].v_type != VAR_UNKNOWN) {
|
||||||
|
countcc = tv_get_bool(&argvars[1]);
|
||||||
|
}
|
||||||
|
|
||||||
|
const char *s = tv_get_string(&argvars[0]);
|
||||||
|
varnumber_T len = 0;
|
||||||
|
int (*func_mb_ptr2char_adv)(const char **pp);
|
||||||
|
|
||||||
|
func_mb_ptr2char_adv = countcc ? mb_cptr2char_adv : mb_ptr2char_adv;
|
||||||
|
while (*s != NUL) {
|
||||||
|
const int ch = func_mb_ptr2char_adv(&s);
|
||||||
|
if (ch > 0xFFFF) {
|
||||||
|
len++;
|
||||||
|
}
|
||||||
|
len++;
|
||||||
|
}
|
||||||
|
rettv->vval.v_number = len;
|
||||||
|
}
|
||||||
|
|
||||||
/// "strdisplaywidth()" function
|
/// "strdisplaywidth()" function
|
||||||
void f_strdisplaywidth(typval_T *argvars, typval_T *rettv, EvalFuncData fptr)
|
void f_strdisplaywidth(typval_T *argvars, typval_T *rettv, EvalFuncData fptr)
|
||||||
{
|
{
|
||||||
@@ -1914,6 +1977,61 @@ void f_strtrans(typval_T *argvars, typval_T *rettv, EvalFuncData fptr)
|
|||||||
rettv->vval.v_string = transstr(tv_get_string(&argvars[0]), true);
|
rettv->vval.v_string = transstr(tv_get_string(&argvars[0]), true);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// "utf16idx()" function
|
||||||
|
void f_utf16idx(typval_T *argvars, typval_T *rettv, EvalFuncData fptr)
|
||||||
|
{
|
||||||
|
rettv->vval.v_number = -1;
|
||||||
|
|
||||||
|
if (tv_check_for_string_arg(argvars, 0) == FAIL
|
||||||
|
|| tv_check_for_opt_number_arg(argvars, 1) == FAIL
|
||||||
|
|| tv_check_for_opt_bool_arg(argvars, 2) == FAIL
|
||||||
|
|| (argvars[2].v_type != VAR_UNKNOWN
|
||||||
|
&& tv_check_for_opt_bool_arg(argvars, 3) == FAIL)) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
const char *const str = tv_get_string_chk(&argvars[0]);
|
||||||
|
varnumber_T idx = tv_get_number_chk(&argvars[1], NULL);
|
||||||
|
if (str == NULL || idx < 0) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
varnumber_T countcc = false;
|
||||||
|
varnumber_T charidx = false;
|
||||||
|
if (argvars[2].v_type != VAR_UNKNOWN) {
|
||||||
|
countcc = tv_get_bool(&argvars[2]);
|
||||||
|
if (argvars[3].v_type != VAR_UNKNOWN) {
|
||||||
|
charidx = tv_get_bool(&argvars[3]);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
int (*ptr2len)(const char *);
|
||||||
|
if (countcc) {
|
||||||
|
ptr2len = utf_ptr2len;
|
||||||
|
} else {
|
||||||
|
ptr2len = utfc_ptr2len;
|
||||||
|
}
|
||||||
|
|
||||||
|
const char *p;
|
||||||
|
int len;
|
||||||
|
for (p = str, len = 0; charidx ? idx >= 0 : p <= str + idx; len++) {
|
||||||
|
if (*p == NUL) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
const int clen = ptr2len(p);
|
||||||
|
const int c = (clen > 1) ? utf_ptr2char(p) : *p;
|
||||||
|
if (c > 0xFFFF) {
|
||||||
|
len++;
|
||||||
|
}
|
||||||
|
p += ptr2len(p);
|
||||||
|
if (charidx) {
|
||||||
|
idx--;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
rettv->vval.v_number = len > 0 ? len - 1 : 0;
|
||||||
|
}
|
||||||
|
|
||||||
/// "tolower(string)" function
|
/// "tolower(string)" function
|
||||||
void f_tolower(typval_T *argvars, typval_T *rettv, EvalFuncData fptr)
|
void f_tolower(typval_T *argvars, typval_T *rettv, EvalFuncData fptr)
|
||||||
{
|
{
|
||||||
|
@@ -1064,19 +1064,14 @@ func Test_byte2line_line2byte()
|
|||||||
bw!
|
bw!
|
||||||
endfunc
|
endfunc
|
||||||
|
|
||||||
" Test for byteidx() and byteidxcomp() functions
|
" Test for byteidx() using a character index
|
||||||
func Test_byteidx()
|
func Test_byteidx()
|
||||||
let a = '.é.' " one char of two bytes
|
let a = '.é.' " one char of two bytes
|
||||||
call assert_equal(0, byteidx(a, 0))
|
call assert_equal(0, byteidx(a, 0))
|
||||||
call assert_equal(0, byteidxcomp(a, 0))
|
|
||||||
call assert_equal(1, byteidx(a, 1))
|
call assert_equal(1, byteidx(a, 1))
|
||||||
call assert_equal(1, byteidxcomp(a, 1))
|
|
||||||
call assert_equal(3, byteidx(a, 2))
|
call assert_equal(3, byteidx(a, 2))
|
||||||
call assert_equal(3, byteidxcomp(a, 2))
|
|
||||||
call assert_equal(4, byteidx(a, 3))
|
call assert_equal(4, byteidx(a, 3))
|
||||||
call assert_equal(4, byteidxcomp(a, 3))
|
|
||||||
call assert_equal(-1, byteidx(a, 4))
|
call assert_equal(-1, byteidx(a, 4))
|
||||||
call assert_equal(-1, byteidxcomp(a, 4))
|
|
||||||
|
|
||||||
let b = '.é.' " normal e with composing char
|
let b = '.é.' " normal e with composing char
|
||||||
call assert_equal(0, b->byteidx(0))
|
call assert_equal(0, b->byteidx(0))
|
||||||
@@ -1084,18 +1079,184 @@ func Test_byteidx()
|
|||||||
call assert_equal(4, b->byteidx(2))
|
call assert_equal(4, b->byteidx(2))
|
||||||
call assert_equal(5, b->byteidx(3))
|
call assert_equal(5, b->byteidx(3))
|
||||||
call assert_equal(-1, b->byteidx(4))
|
call assert_equal(-1, b->byteidx(4))
|
||||||
call assert_fails("call byteidx([], 0)", 'E730:')
|
|
||||||
|
|
||||||
|
" string with multiple composing characters
|
||||||
|
let str = '-ą́-ą́'
|
||||||
|
call assert_equal(0, byteidx(str, 0))
|
||||||
|
call assert_equal(1, byteidx(str, 1))
|
||||||
|
call assert_equal(6, byteidx(str, 2))
|
||||||
|
call assert_equal(7, byteidx(str, 3))
|
||||||
|
call assert_equal(12, byteidx(str, 4))
|
||||||
|
call assert_equal(-1, byteidx(str, 5))
|
||||||
|
|
||||||
|
" empty string
|
||||||
|
call assert_equal(0, byteidx('', 0))
|
||||||
|
call assert_equal(-1, byteidx('', 1))
|
||||||
|
|
||||||
|
" error cases
|
||||||
|
call assert_fails("call byteidx([], 0)", 'E730:')
|
||||||
|
call assert_fails("call byteidx('abc', [])", 'E745:')
|
||||||
|
endfunc
|
||||||
|
|
||||||
|
" Test for byteidxcomp() using a character index
|
||||||
|
func Test_byteidxcomp()
|
||||||
|
let a = '.é.' " one char of two bytes
|
||||||
|
call assert_equal(0, byteidxcomp(a, 0))
|
||||||
|
call assert_equal(1, byteidxcomp(a, 1))
|
||||||
|
call assert_equal(3, byteidxcomp(a, 2))
|
||||||
|
call assert_equal(4, byteidxcomp(a, 3))
|
||||||
|
call assert_equal(-1, byteidxcomp(a, 4))
|
||||||
|
|
||||||
|
let b = '.é.' " normal e with composing char
|
||||||
call assert_equal(0, b->byteidxcomp(0))
|
call assert_equal(0, b->byteidxcomp(0))
|
||||||
call assert_equal(1, b->byteidxcomp(1))
|
call assert_equal(1, b->byteidxcomp(1))
|
||||||
call assert_equal(2, b->byteidxcomp(2))
|
call assert_equal(2, b->byteidxcomp(2))
|
||||||
call assert_equal(4, b->byteidxcomp(3))
|
call assert_equal(4, b->byteidxcomp(3))
|
||||||
call assert_equal(5, b->byteidxcomp(4))
|
call assert_equal(5, b->byteidxcomp(4))
|
||||||
call assert_equal(-1, b->byteidxcomp(5))
|
call assert_equal(-1, b->byteidxcomp(5))
|
||||||
|
|
||||||
|
" string with multiple composing characters
|
||||||
|
let str = '-ą́-ą́'
|
||||||
|
call assert_equal(0, byteidxcomp(str, 0))
|
||||||
|
call assert_equal(1, byteidxcomp(str, 1))
|
||||||
|
call assert_equal(2, byteidxcomp(str, 2))
|
||||||
|
call assert_equal(4, byteidxcomp(str, 3))
|
||||||
|
call assert_equal(6, byteidxcomp(str, 4))
|
||||||
|
call assert_equal(7, byteidxcomp(str, 5))
|
||||||
|
call assert_equal(8, byteidxcomp(str, 6))
|
||||||
|
call assert_equal(10, byteidxcomp(str, 7))
|
||||||
|
call assert_equal(12, byteidxcomp(str, 8))
|
||||||
|
call assert_equal(-1, byteidxcomp(str, 9))
|
||||||
|
|
||||||
|
" empty string
|
||||||
|
call assert_equal(0, byteidxcomp('', 0))
|
||||||
|
call assert_equal(-1, byteidxcomp('', 1))
|
||||||
|
|
||||||
|
" error cases
|
||||||
call assert_fails("call byteidxcomp([], 0)", 'E730:')
|
call assert_fails("call byteidxcomp([], 0)", 'E730:')
|
||||||
|
call assert_fails("call byteidxcomp('abc', [])", 'E745:')
|
||||||
endfunc
|
endfunc
|
||||||
|
|
||||||
" Test for charidx()
|
" Test for byteidx() using a UTF-16 index
|
||||||
|
func Test_byteidx_from_utf16_index()
|
||||||
|
" string with single byte characters
|
||||||
|
let str = "abc"
|
||||||
|
for i in range(3)
|
||||||
|
call assert_equal(i, byteidx(str, i, v:true))
|
||||||
|
endfor
|
||||||
|
call assert_equal(3, byteidx(str, 3, v:true))
|
||||||
|
call assert_equal(-1, byteidx(str, 4, v:true))
|
||||||
|
|
||||||
|
" string with two byte characters
|
||||||
|
let str = "a©©b"
|
||||||
|
call assert_equal(0, byteidx(str, 0, v:true))
|
||||||
|
call assert_equal(1, byteidx(str, 1, v:true))
|
||||||
|
call assert_equal(3, byteidx(str, 2, v:true))
|
||||||
|
call assert_equal(5, byteidx(str, 3, v:true))
|
||||||
|
call assert_equal(6, byteidx(str, 4, v:true))
|
||||||
|
call assert_equal(-1, byteidx(str, 5, v:true))
|
||||||
|
|
||||||
|
" string with two byte characters
|
||||||
|
let str = "a😊😊b"
|
||||||
|
call assert_equal(0, byteidx(str, 0, v:true))
|
||||||
|
call assert_equal(1, byteidx(str, 1, v:true))
|
||||||
|
call assert_equal(1, byteidx(str, 2, v:true))
|
||||||
|
call assert_equal(5, byteidx(str, 3, v:true))
|
||||||
|
call assert_equal(5, byteidx(str, 4, v:true))
|
||||||
|
call assert_equal(9, byteidx(str, 5, v:true))
|
||||||
|
call assert_equal(10, byteidx(str, 6, v:true))
|
||||||
|
call assert_equal(-1, byteidx(str, 7, v:true))
|
||||||
|
|
||||||
|
" string with composing characters
|
||||||
|
let str = '-á-b́'
|
||||||
|
call assert_equal(0, byteidx(str, 0, v:true))
|
||||||
|
call assert_equal(1, byteidx(str, 1, v:true))
|
||||||
|
call assert_equal(4, byteidx(str, 2, v:true))
|
||||||
|
call assert_equal(5, byteidx(str, 3, v:true))
|
||||||
|
call assert_equal(8, byteidx(str, 4, v:true))
|
||||||
|
call assert_equal(-1, byteidx(str, 5, v:true))
|
||||||
|
|
||||||
|
" string with multiple composing characters
|
||||||
|
let str = '-ą́-ą́'
|
||||||
|
call assert_equal(0, byteidx(str, 0, v:true))
|
||||||
|
call assert_equal(1, byteidx(str, 1, v:true))
|
||||||
|
call assert_equal(6, byteidx(str, 2, v:true))
|
||||||
|
call assert_equal(7, byteidx(str, 3, v:true))
|
||||||
|
call assert_equal(12, byteidx(str, 4, v:true))
|
||||||
|
call assert_equal(-1, byteidx(str, 5, v:true))
|
||||||
|
|
||||||
|
" empty string
|
||||||
|
call assert_equal(0, byteidx('', 0, v:true))
|
||||||
|
call assert_equal(-1, byteidx('', 1, v:true))
|
||||||
|
|
||||||
|
" error cases
|
||||||
|
call assert_fails('call byteidx(str, 0, [])', 'E745:')
|
||||||
|
endfunc
|
||||||
|
|
||||||
|
" Test for byteidxcomp() using a UTF-16 index
|
||||||
|
func Test_byteidxcomp_from_utf16_index()
|
||||||
|
" string with single byte characters
|
||||||
|
let str = "abc"
|
||||||
|
for i in range(3)
|
||||||
|
call assert_equal(i, byteidxcomp(str, i, v:true))
|
||||||
|
endfor
|
||||||
|
call assert_equal(3, byteidxcomp(str, 3, v:true))
|
||||||
|
call assert_equal(-1, byteidxcomp(str, 4, v:true))
|
||||||
|
|
||||||
|
" string with two byte characters
|
||||||
|
let str = "a©©b"
|
||||||
|
call assert_equal(0, byteidxcomp(str, 0, v:true))
|
||||||
|
call assert_equal(1, byteidxcomp(str, 1, v:true))
|
||||||
|
call assert_equal(3, byteidxcomp(str, 2, v:true))
|
||||||
|
call assert_equal(5, byteidxcomp(str, 3, v:true))
|
||||||
|
call assert_equal(6, byteidxcomp(str, 4, v:true))
|
||||||
|
call assert_equal(-1, byteidxcomp(str, 5, v:true))
|
||||||
|
|
||||||
|
" string with two byte characters
|
||||||
|
let str = "a😊😊b"
|
||||||
|
call assert_equal(0, byteidxcomp(str, 0, v:true))
|
||||||
|
call assert_equal(1, byteidxcomp(str, 1, v:true))
|
||||||
|
call assert_equal(1, byteidxcomp(str, 2, v:true))
|
||||||
|
call assert_equal(5, byteidxcomp(str, 3, v:true))
|
||||||
|
call assert_equal(5, byteidxcomp(str, 4, v:true))
|
||||||
|
call assert_equal(9, byteidxcomp(str, 5, v:true))
|
||||||
|
call assert_equal(10, byteidxcomp(str, 6, v:true))
|
||||||
|
call assert_equal(-1, byteidxcomp(str, 7, v:true))
|
||||||
|
|
||||||
|
" string with composing characters
|
||||||
|
let str = '-á-b́'
|
||||||
|
call assert_equal(0, byteidxcomp(str, 0, v:true))
|
||||||
|
call assert_equal(1, byteidxcomp(str, 1, v:true))
|
||||||
|
call assert_equal(2, byteidxcomp(str, 2, v:true))
|
||||||
|
call assert_equal(4, byteidxcomp(str, 3, v:true))
|
||||||
|
call assert_equal(5, byteidxcomp(str, 4, v:true))
|
||||||
|
call assert_equal(6, byteidxcomp(str, 5, v:true))
|
||||||
|
call assert_equal(8, byteidxcomp(str, 6, v:true))
|
||||||
|
call assert_equal(-1, byteidxcomp(str, 7, v:true))
|
||||||
|
call assert_fails('call byteidxcomp(str, 0, [])', 'E745:')
|
||||||
|
|
||||||
|
" string with multiple composing characters
|
||||||
|
let str = '-ą́-ą́'
|
||||||
|
call assert_equal(0, byteidxcomp(str, 0, v:true))
|
||||||
|
call assert_equal(1, byteidxcomp(str, 1, v:true))
|
||||||
|
call assert_equal(2, byteidxcomp(str, 2, v:true))
|
||||||
|
call assert_equal(4, byteidxcomp(str, 3, v:true))
|
||||||
|
call assert_equal(6, byteidxcomp(str, 4, v:true))
|
||||||
|
call assert_equal(7, byteidxcomp(str, 5, v:true))
|
||||||
|
call assert_equal(8, byteidxcomp(str, 6, v:true))
|
||||||
|
call assert_equal(10, byteidxcomp(str, 7, v:true))
|
||||||
|
call assert_equal(12, byteidxcomp(str, 8, v:true))
|
||||||
|
call assert_equal(-1, byteidxcomp(str, 9, v:true))
|
||||||
|
|
||||||
|
" empty string
|
||||||
|
call assert_equal(0, byteidxcomp('', 0, v:true))
|
||||||
|
call assert_equal(-1, byteidxcomp('', 1, v:true))
|
||||||
|
|
||||||
|
" error cases
|
||||||
|
call assert_fails('call byteidxcomp(str, 0, [])', 'E745:')
|
||||||
|
endfunc
|
||||||
|
|
||||||
|
" Test for charidx() using a byte index
|
||||||
func Test_charidx()
|
func Test_charidx()
|
||||||
let a = 'xáb́y'
|
let a = 'xáb́y'
|
||||||
call assert_equal(0, charidx(a, 0))
|
call assert_equal(0, charidx(a, 0))
|
||||||
@@ -1104,17 +1265,20 @@ func Test_charidx()
|
|||||||
call assert_equal(3, charidx(a, 7))
|
call assert_equal(3, charidx(a, 7))
|
||||||
call assert_equal(-1, charidx(a, 8))
|
call assert_equal(-1, charidx(a, 8))
|
||||||
call assert_equal(-1, charidx(a, -1))
|
call assert_equal(-1, charidx(a, -1))
|
||||||
call assert_equal(-1, charidx('', 0))
|
|
||||||
call assert_equal(-1, charidx(v:_null_string, 0))
|
|
||||||
|
|
||||||
" count composing characters
|
" count composing characters
|
||||||
call assert_equal(0, charidx(a, 0, 1))
|
call assert_equal(0, a->charidx(0, 1))
|
||||||
call assert_equal(2, charidx(a, 2, 1))
|
call assert_equal(2, a->charidx(2, 1))
|
||||||
call assert_equal(3, charidx(a, 4, 1))
|
call assert_equal(3, a->charidx(4, 1))
|
||||||
call assert_equal(5, charidx(a, 7, 1))
|
call assert_equal(5, a->charidx(7, 1))
|
||||||
call assert_equal(-1, charidx(a, 8, 1))
|
call assert_equal(-1, a->charidx(8, 1))
|
||||||
|
|
||||||
|
" empty string
|
||||||
|
call assert_equal(-1, charidx('', 0))
|
||||||
call assert_equal(-1, charidx('', 0, 1))
|
call assert_equal(-1, charidx('', 0, 1))
|
||||||
|
|
||||||
|
" error cases
|
||||||
|
call assert_equal(-1, charidx(v:_null_string, 0))
|
||||||
call assert_fails('let x = charidx([], 1)', 'E1174:')
|
call assert_fails('let x = charidx([], 1)', 'E1174:')
|
||||||
call assert_fails('let x = charidx("abc", [])', 'E1210:')
|
call assert_fails('let x = charidx("abc", [])', 'E1210:')
|
||||||
call assert_fails('let x = charidx("abc", 1, [])', 'E1212:')
|
call assert_fails('let x = charidx("abc", 1, [])', 'E1212:')
|
||||||
@@ -1122,6 +1286,237 @@ func Test_charidx()
|
|||||||
call assert_fails('let x = charidx("abc", 1, 2)', 'E1212:')
|
call assert_fails('let x = charidx("abc", 1, 2)', 'E1212:')
|
||||||
endfunc
|
endfunc
|
||||||
|
|
||||||
|
" Test for charidx() using a UTF-16 index
|
||||||
|
func Test_charidx_from_utf16_index()
|
||||||
|
" string with single byte characters
|
||||||
|
let str = "abc"
|
||||||
|
for i in range(3)
|
||||||
|
call assert_equal(i, charidx(str, i, v:false, v:true))
|
||||||
|
endfor
|
||||||
|
call assert_equal(-1, charidx(str, 3, v:false, v:true))
|
||||||
|
|
||||||
|
" string with two byte characters
|
||||||
|
let str = "a©©b"
|
||||||
|
call assert_equal(0, charidx(str, 0, v:false, v:true))
|
||||||
|
call assert_equal(1, charidx(str, 1, v:false, v:true))
|
||||||
|
call assert_equal(2, charidx(str, 2, v:false, v:true))
|
||||||
|
call assert_equal(3, charidx(str, 3, v:false, v:true))
|
||||||
|
call assert_equal(-1, charidx(str, 4, v:false, v:true))
|
||||||
|
|
||||||
|
" string with four byte characters
|
||||||
|
let str = "a😊😊b"
|
||||||
|
call assert_equal(0, charidx(str, 0, v:false, v:true))
|
||||||
|
call assert_equal(1, charidx(str, 1, v:false, v:true))
|
||||||
|
call assert_equal(1, charidx(str, 2, v:false, v:true))
|
||||||
|
call assert_equal(2, charidx(str, 3, v:false, v:true))
|
||||||
|
call assert_equal(2, charidx(str, 4, v:false, v:true))
|
||||||
|
call assert_equal(3, charidx(str, 5, v:false, v:true))
|
||||||
|
call assert_equal(-1, charidx(str, 6, v:false, v:true))
|
||||||
|
|
||||||
|
" string with composing characters
|
||||||
|
let str = '-á-b́'
|
||||||
|
for i in str->strcharlen()->range()
|
||||||
|
call assert_equal(i, charidx(str, i, v:false, v:true))
|
||||||
|
endfor
|
||||||
|
call assert_equal(-1, charidx(str, 4, v:false, v:true))
|
||||||
|
for i in str->strchars()->range()
|
||||||
|
call assert_equal(i, charidx(str, i, v:true, v:true))
|
||||||
|
endfor
|
||||||
|
call assert_equal(-1, charidx(str, 6, v:true, v:true))
|
||||||
|
|
||||||
|
" string with multiple composing characters
|
||||||
|
let str = '-ą́-ą́'
|
||||||
|
for i in str->strcharlen()->range()
|
||||||
|
call assert_equal(i, charidx(str, i, v:false, v:true))
|
||||||
|
endfor
|
||||||
|
call assert_equal(-1, charidx(str, 4, v:false, v:true))
|
||||||
|
for i in str->strchars()->range()
|
||||||
|
call assert_equal(i, charidx(str, i, v:true, v:true))
|
||||||
|
endfor
|
||||||
|
call assert_equal(-1, charidx(str, 8, v:true, v:true))
|
||||||
|
|
||||||
|
" empty string
|
||||||
|
call assert_equal(-1, charidx('', 0, v:false, v:true))
|
||||||
|
call assert_equal(-1, charidx('', 0, v:true, v:true))
|
||||||
|
|
||||||
|
" error cases
|
||||||
|
call assert_equal(-1, charidx('', 0, v:false, v:true))
|
||||||
|
call assert_equal(-1, charidx('', 0, v:true, v:true))
|
||||||
|
call assert_equal(-1, charidx(v:_null_string, 0, v:false, v:true))
|
||||||
|
call assert_fails('let x = charidx("abc", 1, v:false, [])', 'E1212:')
|
||||||
|
call assert_fails('let x = charidx("abc", 1, v:true, [])', 'E1212:')
|
||||||
|
endfunc
|
||||||
|
|
||||||
|
" Test for utf16idx() using a byte index
|
||||||
|
func Test_utf16idx_from_byteidx()
|
||||||
|
" UTF-16 index of a string with single byte characters
|
||||||
|
let str = "abc"
|
||||||
|
for i in range(3)
|
||||||
|
call assert_equal(i, utf16idx(str, i))
|
||||||
|
endfor
|
||||||
|
call assert_equal(-1, utf16idx(str, 3))
|
||||||
|
|
||||||
|
" UTF-16 index of a string with two byte characters
|
||||||
|
let str = 'a©©b'
|
||||||
|
call assert_equal(0, str->utf16idx(0))
|
||||||
|
call assert_equal(1, str->utf16idx(1))
|
||||||
|
call assert_equal(1, str->utf16idx(2))
|
||||||
|
call assert_equal(2, str->utf16idx(3))
|
||||||
|
call assert_equal(2, str->utf16idx(4))
|
||||||
|
call assert_equal(3, str->utf16idx(5))
|
||||||
|
call assert_equal(-1, str->utf16idx(6))
|
||||||
|
|
||||||
|
" UTF-16 index of a string with four byte characters
|
||||||
|
let str = 'a😊😊b'
|
||||||
|
call assert_equal(0, utf16idx(str, 0))
|
||||||
|
call assert_equal(2, utf16idx(str, 1))
|
||||||
|
call assert_equal(2, utf16idx(str, 2))
|
||||||
|
call assert_equal(2, utf16idx(str, 3))
|
||||||
|
call assert_equal(2, utf16idx(str, 4))
|
||||||
|
call assert_equal(4, utf16idx(str, 5))
|
||||||
|
call assert_equal(4, utf16idx(str, 6))
|
||||||
|
call assert_equal(4, utf16idx(str, 7))
|
||||||
|
call assert_equal(4, utf16idx(str, 8))
|
||||||
|
call assert_equal(5, utf16idx(str, 9))
|
||||||
|
call assert_equal(-1, utf16idx(str, 10))
|
||||||
|
|
||||||
|
" UTF-16 index of a string with composing characters
|
||||||
|
let str = '-á-b́'
|
||||||
|
call assert_equal(0, utf16idx(str, 0))
|
||||||
|
call assert_equal(1, utf16idx(str, 1))
|
||||||
|
call assert_equal(1, utf16idx(str, 2))
|
||||||
|
call assert_equal(1, utf16idx(str, 3))
|
||||||
|
call assert_equal(2, utf16idx(str, 4))
|
||||||
|
call assert_equal(3, utf16idx(str, 5))
|
||||||
|
call assert_equal(3, utf16idx(str, 6))
|
||||||
|
call assert_equal(3, utf16idx(str, 7))
|
||||||
|
call assert_equal(-1, utf16idx(str, 8))
|
||||||
|
call assert_equal(0, utf16idx(str, 0, v:true))
|
||||||
|
call assert_equal(1, utf16idx(str, 1, v:true))
|
||||||
|
call assert_equal(2, utf16idx(str, 2, v:true))
|
||||||
|
call assert_equal(2, utf16idx(str, 3, v:true))
|
||||||
|
call assert_equal(3, utf16idx(str, 4, v:true))
|
||||||
|
call assert_equal(4, utf16idx(str, 5, v:true))
|
||||||
|
call assert_equal(5, utf16idx(str, 6, v:true))
|
||||||
|
call assert_equal(5, utf16idx(str, 7, v:true))
|
||||||
|
call assert_equal(-1, utf16idx(str, 8, v:true))
|
||||||
|
|
||||||
|
" string with multiple composing characters
|
||||||
|
let str = '-ą́-ą́'
|
||||||
|
call assert_equal(0, utf16idx(str, 0))
|
||||||
|
call assert_equal(1, utf16idx(str, 1))
|
||||||
|
call assert_equal(1, utf16idx(str, 2))
|
||||||
|
call assert_equal(1, utf16idx(str, 3))
|
||||||
|
call assert_equal(1, utf16idx(str, 4))
|
||||||
|
call assert_equal(1, utf16idx(str, 5))
|
||||||
|
call assert_equal(2, utf16idx(str, 6))
|
||||||
|
call assert_equal(3, utf16idx(str, 7))
|
||||||
|
call assert_equal(3, utf16idx(str, 8))
|
||||||
|
call assert_equal(3, utf16idx(str, 9))
|
||||||
|
call assert_equal(3, utf16idx(str, 10))
|
||||||
|
call assert_equal(3, utf16idx(str, 11))
|
||||||
|
call assert_equal(-1, utf16idx(str, 12))
|
||||||
|
call assert_equal(0, utf16idx(str, 0, v:true))
|
||||||
|
call assert_equal(1, utf16idx(str, 1, v:true))
|
||||||
|
call assert_equal(2, utf16idx(str, 2, v:true))
|
||||||
|
call assert_equal(2, utf16idx(str, 3, v:true))
|
||||||
|
call assert_equal(3, utf16idx(str, 4, v:true))
|
||||||
|
call assert_equal(3, utf16idx(str, 5, v:true))
|
||||||
|
call assert_equal(4, utf16idx(str, 6, v:true))
|
||||||
|
call assert_equal(5, utf16idx(str, 7, v:true))
|
||||||
|
call assert_equal(6, utf16idx(str, 8, v:true))
|
||||||
|
call assert_equal(6, utf16idx(str, 9, v:true))
|
||||||
|
call assert_equal(7, utf16idx(str, 10, v:true))
|
||||||
|
call assert_equal(7, utf16idx(str, 11, v:true))
|
||||||
|
call assert_equal(-1, utf16idx(str, 12, v:true))
|
||||||
|
|
||||||
|
" empty string
|
||||||
|
call assert_equal(-1, utf16idx('', 0))
|
||||||
|
call assert_equal(-1, utf16idx('', 0, v:true))
|
||||||
|
|
||||||
|
" error cases
|
||||||
|
call assert_equal(-1, utf16idx("", 0))
|
||||||
|
call assert_equal(-1, utf16idx("abc", -1))
|
||||||
|
call assert_equal(-1, utf16idx(v:_null_string, 0))
|
||||||
|
call assert_fails('let l = utf16idx([], 0)', 'E1174:')
|
||||||
|
call assert_fails('let l = utf16idx("ab", [])', 'E1210:')
|
||||||
|
call assert_fails('let l = utf16idx("ab", 0, [])', 'E1212:')
|
||||||
|
endfunc
|
||||||
|
|
||||||
|
" Test for utf16idx() using a character index
|
||||||
|
func Test_utf16idx_from_charidx()
|
||||||
|
let str = "abc"
|
||||||
|
for i in str->strcharlen()->range()
|
||||||
|
call assert_equal(i, utf16idx(str, i, v:false, v:true))
|
||||||
|
endfor
|
||||||
|
call assert_equal(-1, utf16idx(str, 3, v:false, v:true))
|
||||||
|
|
||||||
|
" UTF-16 index of a string with two byte characters
|
||||||
|
let str = "a©©b"
|
||||||
|
for i in str->strcharlen()->range()
|
||||||
|
call assert_equal(i, utf16idx(str, i, v:false, v:true))
|
||||||
|
endfor
|
||||||
|
call assert_equal(-1, utf16idx(str, 4, v:false, v:true))
|
||||||
|
|
||||||
|
" UTF-16 index of a string with four byte characters
|
||||||
|
let str = "a😊😊b"
|
||||||
|
call assert_equal(0, utf16idx(str, 0, v:false, v:true))
|
||||||
|
call assert_equal(2, utf16idx(str, 1, v:false, v:true))
|
||||||
|
call assert_equal(4, utf16idx(str, 2, v:false, v:true))
|
||||||
|
call assert_equal(5, utf16idx(str, 3, v:false, v:true))
|
||||||
|
call assert_equal(-1, utf16idx(str, 4, v:false, v:true))
|
||||||
|
|
||||||
|
" UTF-16 index of a string with composing characters
|
||||||
|
let str = '-á-b́'
|
||||||
|
for i in str->strcharlen()->range()
|
||||||
|
call assert_equal(i, utf16idx(str, i, v:false, v:true))
|
||||||
|
endfor
|
||||||
|
call assert_equal(-1, utf16idx(str, 4, v:false, v:true))
|
||||||
|
for i in str->strchars()->range()
|
||||||
|
call assert_equal(i, utf16idx(str, i, v:true, v:true))
|
||||||
|
endfor
|
||||||
|
call assert_equal(-1, utf16idx(str, 6, v:true, v:true))
|
||||||
|
|
||||||
|
" string with multiple composing characters
|
||||||
|
let str = '-ą́-ą́'
|
||||||
|
for i in str->strcharlen()->range()
|
||||||
|
call assert_equal(i, utf16idx(str, i, v:false, v:true))
|
||||||
|
endfor
|
||||||
|
call assert_equal(-1, utf16idx(str, 4, v:false, v:true))
|
||||||
|
for i in str->strchars()->range()
|
||||||
|
call assert_equal(i, utf16idx(str, i, v:true, v:true))
|
||||||
|
endfor
|
||||||
|
call assert_equal(-1, utf16idx(str, 8, v:true, v:true))
|
||||||
|
|
||||||
|
" empty string
|
||||||
|
call assert_equal(-1, utf16idx('', 0, v:false, v:true))
|
||||||
|
call assert_equal(-1, utf16idx('', 0, v:true, v:true))
|
||||||
|
|
||||||
|
" error cases
|
||||||
|
call assert_equal(-1, utf16idx(v:_null_string, 0, v:true, v:true))
|
||||||
|
call assert_fails('let l = utf16idx("ab", 0, v:false, [])', 'E1212:')
|
||||||
|
endfunc
|
||||||
|
|
||||||
|
" Test for strutf16len()
|
||||||
|
func Test_strutf16len()
|
||||||
|
call assert_equal(3, strutf16len('abc'))
|
||||||
|
call assert_equal(3, 'abc'->strutf16len(v:true))
|
||||||
|
call assert_equal(4, strutf16len('a©©b'))
|
||||||
|
call assert_equal(4, strutf16len('a©©b', v:true))
|
||||||
|
call assert_equal(6, strutf16len('a😊😊b'))
|
||||||
|
call assert_equal(6, strutf16len('a😊😊b', v:true))
|
||||||
|
call assert_equal(4, strutf16len('-á-b́'))
|
||||||
|
call assert_equal(6, strutf16len('-á-b́', v:true))
|
||||||
|
call assert_equal(4, strutf16len('-ą́-ą́'))
|
||||||
|
call assert_equal(8, strutf16len('-ą́-ą́', v:true))
|
||||||
|
call assert_equal(0, strutf16len(''))
|
||||||
|
|
||||||
|
" error cases
|
||||||
|
call assert_fails('let l = strutf16len([])', 'E1174:')
|
||||||
|
call assert_fails('let l = strutf16len("a", [])', 'E1212:')
|
||||||
|
call assert_equal(0, strutf16len(v:_null_string))
|
||||||
|
endfunc
|
||||||
|
|
||||||
func Test_count()
|
func Test_count()
|
||||||
let l = ['a', 'a', 'A', 'b']
|
let l = ['a', 'a', 'A', 'b']
|
||||||
call assert_equal(2, count(l, 'a'))
|
call assert_equal(2, count(l, 'a'))
|
||||||
@@ -2644,5 +3039,4 @@ func Test_delfunc_while_listing()
|
|||||||
call StopVimInTerminal(buf)
|
call StopVimInTerminal(buf)
|
||||||
endfunc
|
endfunc
|
||||||
|
|
||||||
|
|
||||||
" vim: shiftwidth=2 sts=2 expandtab
|
" vim: shiftwidth=2 sts=2 expandtab
|
||||||
|
Reference in New Issue
Block a user