feat(stdlib): overload vim.str_byteindex, vim.str_utfindex #30735

PROBLEM:
There are several limitations to vim.str_byteindex, vim.str_utfindex:
1. They throw given out-of-range indexes. An invalid (often user/lsp-provided)
   index doesn't feel exceptional and should be handled by the caller.
   `:help dev-error-patterns` suggests that `retval, errmsg` is the preferred
   way to handle this kind of failure.
2. They cannot accept an encoding. So LSP needs wrapper functions. #25272
3. The current signatures are not extensible.
    * Calling: The function currently uses a fairly opaque boolean value to
      indicate to identify the encoding.
    * Returns: The fact it can throw requires wrapping in pcall.
4. The current name doesn't follow suggestions in `:h dev-naming` and I think
   `get` would be suitable.

SOLUTION:
- Because these are performance-sensitive, don't introduce `opts`.
- Introduce an "overload" that accepts `encoding:string` and
  `strict_indexing:bool` params.

```lua
local col = vim.str_utfindex(line, encoding, [index, [no_out_of_range]])
```

Support the old versions by dispatching on the type of argument 2, and
deprecate that form.

```lua
vim.str_utfindex(line)                             -- (utf-32 length, utf-16 length), deprecated
vim.str_utfindex(line, index)                      -- (utf-32 index, utf-16 index), deprecated
vim.str_utfindex(line, 'utf-16')                   -- utf-16 length
vim.str_utfindex(line, 'utf-16', index)            -- utf-16 index
vim.str_utfindex(line, 'utf-16', math.huge)        -- error: index out of range
vim.str_utfindex(line, 'utf-16', math.huge, false) -- utf-16 length
```
This commit is contained in:
Tristan Knight
2024-10-23 14:33:57 +01:00
committed by GitHub
parent 3a86b60032
commit 230b0c7f02
5 changed files with 283 additions and 68 deletions

View File

@@ -312,21 +312,106 @@ describe('lua stdlib', function()
49,
51,
}
local indices8 = {
[0] = 0,
1,
2,
3,
4,
5,
6,
7,
8,
9,
10,
11,
12,
13,
14,
15,
16,
17,
18,
19,
20,
21,
22,
23,
24,
25,
26,
27,
28,
29,
30,
31,
32,
33,
34,
35,
36,
37,
38,
39,
40,
41,
42,
43,
44,
45,
46,
47,
48,
49,
50,
51,
}
for i, k in pairs(indices32) do
eq(k, exec_lua('return vim.str_byteindex(_G.test_text, ...)', i), i)
eq(k, exec_lua('return vim.str_byteindex(_G.test_text, ..., false)', i), i)
eq(k, exec_lua('return vim.str_byteindex(_G.test_text, "utf-32", ...)', i), i)
end
for i, k in pairs(indices16) do
eq(k, exec_lua('return vim.str_byteindex(_G.test_text, ..., true)', i), i)
eq(k, exec_lua('return vim.str_byteindex(_G.test_text, "utf-16", ...)', i), i)
end
eq(
for i, k in pairs(indices8) do
eq(k, exec_lua('return vim.str_byteindex(_G.test_text, "utf-8", ...)', i), i)
end
matches(
'index out of range',
pcall_err(exec_lua, 'return vim.str_byteindex(_G.test_text, ...)', #indices32 + 1)
)
eq(
matches(
'index out of range',
pcall_err(exec_lua, 'return vim.str_byteindex(_G.test_text, ..., true)', #indices16 + 1)
)
local i32, i16 = 0, 0
matches(
'index out of range',
pcall_err(exec_lua, 'return vim.str_byteindex(_G.test_text, "utf-16", ...)', #indices16 + 1)
)
matches(
'index out of range',
pcall_err(exec_lua, 'return vim.str_byteindex(_G.test_text, "utf-32", ...)', #indices32 + 1)
)
matches(
'invalid encoding',
pcall_err(exec_lua, 'return vim.str_byteindex("hello", "madeupencoding", 1)')
)
eq(
indices32[#indices32],
exec_lua('return vim.str_byteindex(_G.test_text, "utf-32", 99999, false)')
)
eq(
indices16[#indices16],
exec_lua('return vim.str_byteindex(_G.test_text, "utf-16", 99999, false)')
)
eq(
indices8[#indices8],
exec_lua('return vim.str_byteindex(_G.test_text, "utf-8", 99999, false)')
)
eq(2, exec_lua('return vim.str_byteindex("é", "utf-16", 2, false)'))
local i32, i16, i8 = 0, 0, 0
local len = 51
for k = 0, len do
if indices32[i32] < k then
@@ -338,9 +423,29 @@ describe('lua stdlib', function()
i16 = i16 + 1
end
end
if indices8[i8] < k then
i8 = i8 + 1
end
eq({ i32, i16 }, exec_lua('return {vim.str_utfindex(_G.test_text, ...)}', k), k)
eq({ i32 }, exec_lua('return {vim.str_utfindex(_G.test_text, "utf-32", ...)}', k), k)
eq({ i16 }, exec_lua('return {vim.str_utfindex(_G.test_text, "utf-16", ...)}', k), k)
eq({ i8 }, exec_lua('return {vim.str_utfindex(_G.test_text, "utf-8", ...)}', k), k)
end
eq(
eq({ #indices32, #indices16 }, exec_lua('return {vim.str_utfindex(_G.test_text)}'))
eq(#indices32, exec_lua('return vim.str_utfindex(_G.test_text, "utf-32", math.huge, false)'))
eq(#indices16, exec_lua('return vim.str_utfindex(_G.test_text, "utf-16", math.huge, false)'))
eq(#indices8, exec_lua('return vim.str_utfindex(_G.test_text, "utf-8", math.huge, false)'))
eq(#indices32, exec_lua('return vim.str_utfindex(_G.test_text, "utf-32")'))
eq(#indices16, exec_lua('return vim.str_utfindex(_G.test_text, "utf-16")'))
eq(#indices8, exec_lua('return vim.str_utfindex(_G.test_text, "utf-8")'))
matches(
'invalid encoding',
pcall_err(exec_lua, 'return vim.str_utfindex(_G.test_text, "madeupencoding", ...)', 1)
)
matches(
'index out of range',
pcall_err(exec_lua, 'return vim.str_utfindex(_G.test_text, ...)', len + 1)
)