fix(lua): make vim.str_utfindex and vim.str_byteindex handle NUL bytes

fixes #16290
This commit is contained in:
bfredl
2022-09-12 11:26:27 +02:00
parent 738c204523
commit 25e4af439f
3 changed files with 13 additions and 8 deletions

View File

@@ -1457,7 +1457,7 @@ void mb_utflen(const char_u *s, size_t len, size_t *codepoints, size_t *codeunit
{ {
size_t count = 0, extra = 0; size_t count = 0, extra = 0;
size_t clen; size_t clen;
for (size_t i = 0; i < len && s[i] != NUL; i += clen) { for (size_t i = 0; i < len; i += clen) {
clen = (size_t)utf_ptr2len_len(s + i, (int)(len - i)); clen = (size_t)utf_ptr2len_len(s + i, (int)(len - i));
// NB: gets the byte value of invalid sequence bytes. // NB: gets the byte value of invalid sequence bytes.
// we only care whether the char fits in the BMP or not // we only care whether the char fits in the BMP or not
@@ -1479,7 +1479,7 @@ ssize_t mb_utf_index_to_bytes(const char_u *s, size_t len, size_t index, bool us
if (index == 0) { if (index == 0) {
return 0; return 0;
} }
for (i = 0; i < len && s[i] != NUL; i += clen) { for (i = 0; i < len; i += clen) {
clen = (size_t)utf_ptr2len_len(s + i, (int)(len - i)); clen = (size_t)utf_ptr2len_len(s + i, (int)(len - i));
// NB: gets the byte value of invalid sequence bytes. // NB: gets the byte value of invalid sequence bytes.
// we only care whether the char fits in the BMP or not // we only care whether the char fits in the BMP or not

View File

@@ -2296,8 +2296,9 @@ void ml_add_deleted_len_buf(buf_T *buf, char_u *ptr, ssize_t len)
if (inhibit_delete_count) { if (inhibit_delete_count) {
return; return;
} }
if (len == -1) { ssize_t maxlen = (ssize_t)STRLEN(ptr);
len = (ssize_t)STRLEN(ptr); if (len == -1 || len > maxlen) {
len = maxlen;
} }
curbuf->deleted_bytes += (size_t)len + 1; curbuf->deleted_bytes += (size_t)len + 1;
curbuf->deleted_bytes2 += (size_t)len + 1; curbuf->deleted_bytes2 += (size_t)len + 1;

View File

@@ -158,17 +158,20 @@ describe('lua stdlib', function()
end) end)
it("vim.str_utfindex/str_byteindex", function() it("vim.str_utfindex/str_byteindex", function()
exec_lua([[_G.test_text = "xy åäö ɧ 汉语 ↥ 🤦x🦄 å بِيَّ"]]) exec_lua([[_G.test_text = "xy åäö ɧ 汉语 ↥ 🤦x🦄 å بِيَّ\000ъ"]])
local indicies32 = {[0]=0,1,2,3,5,7,9,10,12,13,16,19,20,23,24,28,29,33,34,35,37,38,40,42,44,46,48} local indicies32 = {[0]=0,1,2,3,5,7,9,10,12,13,16,19,20,23,24,28,29,33,34,35,37,38,40,42,44,46,48,49,51}
local indicies16 = {[0]=0,1,2,3,5,7,9,10,12,13,16,19,20,23,24,28,28,29,33,33,34,35,37,38,40,42,44,46,48} local indicies16 = {[0]=0,1,2,3,5,7,9,10,12,13,16,19,20,23,24,28,28,29,33,33,34,35,37,38,40,42,44,46,48,49,51}
for i,k in pairs(indicies32) do for i,k in pairs(indicies32) do
eq(k, exec_lua("return vim.str_byteindex(_G.test_text, ...)", i), i) eq(k, exec_lua("return vim.str_byteindex(_G.test_text, ...)", i), i)
end end
for i,k in pairs(indicies16) do for i,k in pairs(indicies16) do
eq(k, exec_lua("return vim.str_byteindex(_G.test_text, ..., true)", i), i) eq(k, exec_lua("return vim.str_byteindex(_G.test_text, ..., true)", i), i)
end end
matches(": index out of range$", pcall_err(exec_lua, "return vim.str_byteindex(_G.test_text, ...)", #indicies32 + 1))
matches(": index out of range$", pcall_err(exec_lua, "return vim.str_byteindex(_G.test_text, ..., true)", #indicies16 + 1))
local i32, i16 = 0, 0 local i32, i16 = 0, 0
for k = 0,48 do local len = 51
for k = 0,len do
if indicies32[i32] < k then if indicies32[i32] < k then
i32 = i32 + 1 i32 = i32 + 1
end end
@@ -180,6 +183,7 @@ describe('lua stdlib', function()
end end
eq({i32, i16}, exec_lua("return {vim.str_utfindex(_G.test_text, ...)}", k), k) eq({i32, i16}, exec_lua("return {vim.str_utfindex(_G.test_text, ...)}", k), k)
end end
matches(": index out of range$", pcall_err(exec_lua, "return vim.str_utfindex(_G.test_text, ...)", len + 1))
end) end)
it("vim.str_utf_start", function() it("vim.str_utf_start", function()