mirror of
https://github.com/neovim/neovim.git
synced 2025-10-26 12:27:24 +00:00
fix(lsp): improve incremental sync robustness (#16358)
closes https://github.com/neovim/neovim/issues/16352 * improve handling of multi-byte deletions
This commit is contained in:
committed by
GitHub
parent
d249e18bbf
commit
f71be1f87b
@@ -74,43 +74,45 @@ local function byte_to_utf(line, byte, offset_encoding)
|
|||||||
return utf_idx + 1
|
return utf_idx + 1
|
||||||
end
|
end
|
||||||
|
|
||||||
|
local function compute_line_length(line, offset_encoding)
|
||||||
|
local length
|
||||||
|
local _
|
||||||
|
if offset_encoding == 'utf-16' then
|
||||||
|
_, length = str_utfindex(line)
|
||||||
|
elseif offset_encoding == 'utf-32' then
|
||||||
|
length, _ = str_utfindex(line)
|
||||||
|
else
|
||||||
|
length = #line
|
||||||
|
end
|
||||||
|
return length
|
||||||
|
end
|
||||||
|
|
||||||
---@private
|
---@private
|
||||||
-- Given a line, byte idx, alignment, and offset_encoding convert to the aligned
|
-- Given a line, byte idx, alignment, and offset_encoding convert to the aligned
|
||||||
-- utf-8 index and either the utf-16, or utf-32 index.
|
-- utf-8 index and either the utf-16, or utf-32 index.
|
||||||
---@param line string the line to index into
|
---@param line string the line to index into
|
||||||
---@param byte integer the byte idx
|
---@param byte integer the byte idx
|
||||||
---@param align string when dealing with multibyte characters,
|
|
||||||
-- to choose the start of the current character or the beginning of the next.
|
|
||||||
-- Used for incremental sync for start/end range respectively
|
|
||||||
---@param offset_encoding string utf-8|utf-16|utf-32|nil (default: utf-8)
|
---@param offset_encoding string utf-8|utf-16|utf-32|nil (default: utf-8)
|
||||||
---@returns table<string, int> byte_idx and char_idx of first change position
|
---@returns table<string, int> byte_idx and char_idx of first change position
|
||||||
local function align_position(line, byte, align, offset_encoding)
|
local function align_end_position(line, byte, offset_encoding)
|
||||||
local char
|
local char
|
||||||
-- If on the first byte, or an empty string: the trivial case
|
-- If on the first byte, or an empty string: the trivial case
|
||||||
if byte == 1 or #line == 0 then
|
if byte == 1 or #line == 0 then
|
||||||
char = byte
|
char = byte
|
||||||
-- Called in the case of extending an empty line "" -> "a"
|
-- Called in the case of extending an empty line "" -> "a"
|
||||||
elseif byte == #line + 1 then
|
elseif byte == #line + 1 then
|
||||||
byte = byte + str_utf_end(line, #line)
|
char = compute_line_length(line, offset_encoding) + 1
|
||||||
char = byte_to_utf(line, byte, offset_encoding)
|
|
||||||
else
|
else
|
||||||
-- Modifying line, find the nearest utf codepoint
|
-- Modifying line, find the nearest utf codepoint
|
||||||
if align == 'start' then
|
local offset = str_utf_end(line, byte)
|
||||||
byte = byte + str_utf_start(line, byte)
|
-- If the byte does not fall on the start of the character, then
|
||||||
char = byte_to_utf(line, byte, offset_encoding)
|
-- align to the start of the next character.
|
||||||
elseif align == 'end' then
|
if offset > 0 then
|
||||||
local offset = str_utf_end(line, byte)
|
char = byte_to_utf(line, byte, offset_encoding) + 1
|
||||||
-- If the byte does not fall on the start of the character, then
|
byte = byte + offset
|
||||||
-- align to the start of the next character.
|
|
||||||
if offset > 0 then
|
|
||||||
char = byte_to_utf(line, byte, offset_encoding) + 1
|
|
||||||
byte = byte + offset
|
|
||||||
else
|
|
||||||
char = byte_to_utf(line, byte, offset_encoding)
|
|
||||||
byte = byte + offset
|
|
||||||
end
|
|
||||||
else
|
else
|
||||||
error('`align` must be start or end.')
|
char = byte_to_utf(line, byte, offset_encoding)
|
||||||
|
byte = byte + offset
|
||||||
end
|
end
|
||||||
-- Extending line, find the nearest utf codepoint for the last valid character
|
-- Extending line, find the nearest utf codepoint for the last valid character
|
||||||
end
|
end
|
||||||
@@ -154,7 +156,18 @@ local function compute_start_range(prev_lines, curr_lines, firstline, lastline,
|
|||||||
end
|
end
|
||||||
|
|
||||||
-- Convert byte to codepoint if applicable
|
-- Convert byte to codepoint if applicable
|
||||||
local byte_idx, char_idx = align_position(prev_line, start_byte_idx, 'start', offset_encoding)
|
local char_idx
|
||||||
|
local byte_idx
|
||||||
|
if start_byte_idx == 1 or (#prev_line == 0 and start_byte_idx == 1)then
|
||||||
|
byte_idx = start_byte_idx
|
||||||
|
char_idx = 1
|
||||||
|
elseif start_byte_idx == #prev_line + 1 then
|
||||||
|
byte_idx = start_byte_idx
|
||||||
|
char_idx = compute_line_length(prev_line, offset_encoding) + 1
|
||||||
|
else
|
||||||
|
byte_idx = start_byte_idx + str_utf_start(prev_line, start_byte_idx)
|
||||||
|
char_idx = byte_to_utf(prev_line, start_byte_idx, offset_encoding)
|
||||||
|
end
|
||||||
|
|
||||||
-- Return the start difference (shared for new and prev lines)
|
-- Return the start difference (shared for new and prev lines)
|
||||||
return { line_idx = firstline, byte_idx = byte_idx, char_idx = char_idx }
|
return { line_idx = firstline, byte_idx = byte_idx, char_idx = char_idx }
|
||||||
@@ -219,11 +232,12 @@ local function compute_end_range(prev_lines, curr_lines, start_range, firstline,
|
|||||||
|
|
||||||
-- Iterate from end to beginning of shortest line
|
-- Iterate from end to beginning of shortest line
|
||||||
local prev_end_byte_idx = prev_line_length - byte_offset + 1
|
local prev_end_byte_idx = prev_line_length - byte_offset + 1
|
||||||
|
|
||||||
-- Handle case where lines match
|
-- Handle case where lines match
|
||||||
if prev_end_byte_idx == 0 then
|
if prev_end_byte_idx == 0 then
|
||||||
prev_end_byte_idx = 1
|
prev_end_byte_idx = 1
|
||||||
end
|
end
|
||||||
local prev_byte_idx, prev_char_idx = align_position(prev_line, prev_end_byte_idx, 'start', offset_encoding)
|
local prev_byte_idx, prev_char_idx = align_end_position(prev_line, prev_end_byte_idx, offset_encoding)
|
||||||
local prev_end_range = { line_idx = prev_line_idx, byte_idx = prev_byte_idx, char_idx = prev_char_idx }
|
local prev_end_range = { line_idx = prev_line_idx, byte_idx = prev_byte_idx, char_idx = prev_char_idx }
|
||||||
|
|
||||||
local curr_end_range
|
local curr_end_range
|
||||||
@@ -236,7 +250,7 @@ local function compute_end_range(prev_lines, curr_lines, start_range, firstline,
|
|||||||
if curr_end_byte_idx == 0 then
|
if curr_end_byte_idx == 0 then
|
||||||
curr_end_byte_idx = 1
|
curr_end_byte_idx = 1
|
||||||
end
|
end
|
||||||
local curr_byte_idx, curr_char_idx = align_position(curr_line, curr_end_byte_idx, 'start', offset_encoding)
|
local curr_byte_idx, curr_char_idx = align_end_position(curr_line, curr_end_byte_idx, offset_encoding)
|
||||||
curr_end_range = { line_idx = curr_line_idx, byte_idx = curr_byte_idx, char_idx = curr_char_idx }
|
curr_end_range = { line_idx = curr_line_idx, byte_idx = curr_byte_idx, char_idx = curr_char_idx }
|
||||||
end
|
end
|
||||||
|
|
||||||
@@ -280,18 +294,6 @@ local function extract_text(lines, start_range, end_range, line_ending)
|
|||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
local function compute_line_length(line, offset_encoding)
|
|
||||||
local length
|
|
||||||
local _
|
|
||||||
if offset_encoding == 'utf-16' then
|
|
||||||
_, length = str_utfindex(line)
|
|
||||||
elseif offset_encoding == 'utf-32' then
|
|
||||||
length, _ = str_utfindex(line)
|
|
||||||
else
|
|
||||||
length = #line
|
|
||||||
end
|
|
||||||
return length
|
|
||||||
end
|
|
||||||
---@private
|
---@private
|
||||||
-- rangelength depends on the offset encoding
|
-- rangelength depends on the offset encoding
|
||||||
-- bytes for utf-8 (clangd with extenion)
|
-- bytes for utf-8 (clangd with extenion)
|
||||||
|
|||||||
@@ -297,7 +297,31 @@ describe('incremental synchronization', function()
|
|||||||
}
|
}
|
||||||
test_edit({"🔥"}, {"x"}, expected_text_changes, 'utf-16', '\n')
|
test_edit({"🔥"}, {"x"}, expected_text_changes, 'utf-16', '\n')
|
||||||
end)
|
end)
|
||||||
it('deleting a multiple lines containing multibyte characters', function()
|
it('deleting a multibyte character from a long line', function()
|
||||||
|
local expected_text_changes = {
|
||||||
|
{
|
||||||
|
range = {
|
||||||
|
['start'] = {
|
||||||
|
character = 85,
|
||||||
|
line = 1
|
||||||
|
},
|
||||||
|
['end'] = {
|
||||||
|
character = 86,
|
||||||
|
line = 1
|
||||||
|
}
|
||||||
|
},
|
||||||
|
rangeLength = 1,
|
||||||
|
text = ''
|
||||||
|
}
|
||||||
|
}
|
||||||
|
local original_lines = {
|
||||||
|
"\\begin{document}",
|
||||||
|
"→→→→→→→→→→→→→→→→→→→→→→→→→→→→→→→→→→→→→→→→→→→→→→→→→→→→→→→→→→→→→→→→→→→→→→→→→→→→→→→→→→→→→→",
|
||||||
|
"\\end{document}",
|
||||||
|
}
|
||||||
|
test_edit(original_lines, {"jx"}, expected_text_changes, 'utf-16', '\n')
|
||||||
|
end)
|
||||||
|
it('deleting multiple lines containing multibyte characters', function()
|
||||||
local expected_text_changes = {
|
local expected_text_changes = {
|
||||||
{
|
{
|
||||||
range = {
|
range = {
|
||||||
|
|||||||
Reference in New Issue
Block a user