mirror of
				https://github.com/neovim/neovim.git
				synced 2025-11-04 01:34:25 +00:00 
			
		
		
		
	fix(lsp): improve incremental sync robustness (#16358)
closes https://github.com/neovim/neovim/issues/16352 * improve handling of multi-byte deletions
This commit is contained in:
		
				
					committed by
					
						
						GitHub
					
				
			
			
				
	
			
			
			
						parent
						
							d249e18bbf
						
					
				
				
					commit
					f71be1f87b
				
			@@ -74,31 +74,36 @@ local function byte_to_utf(line, byte, offset_encoding)
 | 
				
			|||||||
  return utf_idx + 1
 | 
					  return utf_idx + 1
 | 
				
			||||||
end
 | 
					end
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					local function compute_line_length(line, offset_encoding)
 | 
				
			||||||
 | 
					  local length
 | 
				
			||||||
 | 
					  local _
 | 
				
			||||||
 | 
					  if offset_encoding == 'utf-16' then
 | 
				
			||||||
 | 
					     _, length = str_utfindex(line)
 | 
				
			||||||
 | 
					  elseif offset_encoding == 'utf-32' then
 | 
				
			||||||
 | 
					    length, _ = str_utfindex(line)
 | 
				
			||||||
 | 
					  else
 | 
				
			||||||
 | 
					    length = #line
 | 
				
			||||||
 | 
					  end
 | 
				
			||||||
 | 
					  return length
 | 
				
			||||||
 | 
					end
 | 
				
			||||||
 | 
					
 | 
				
			||||||
---@private
 | 
					---@private
 | 
				
			||||||
-- Given a line, byte idx, alignment, and offset_encoding convert to the aligned
 | 
					-- Given a line, byte idx, alignment, and offset_encoding convert to the aligned
 | 
				
			||||||
-- utf-8 index and either the utf-16, or utf-32 index.
 | 
					-- utf-8 index and either the utf-16, or utf-32 index.
 | 
				
			||||||
---@param line string the line to index into
 | 
					---@param line string the line to index into
 | 
				
			||||||
---@param byte integer the byte idx
 | 
					---@param byte integer the byte idx
 | 
				
			||||||
---@param align string when dealing with multibyte characters,
 | 
					 | 
				
			||||||
--        to choose the start of the current character or the beginning of the next.
 | 
					 | 
				
			||||||
--        Used for incremental sync for start/end range respectively
 | 
					 | 
				
			||||||
---@param offset_encoding string utf-8|utf-16|utf-32|nil (default: utf-8)
 | 
					---@param offset_encoding string utf-8|utf-16|utf-32|nil (default: utf-8)
 | 
				
			||||||
---@returns table<string, int> byte_idx and char_idx of first change position
 | 
					---@returns table<string, int> byte_idx and char_idx of first change position
 | 
				
			||||||
local function align_position(line, byte, align, offset_encoding)
 | 
					local function align_end_position(line, byte, offset_encoding)
 | 
				
			||||||
  local char
 | 
					  local char
 | 
				
			||||||
  -- If on the first byte, or an empty string: the trivial case
 | 
					  -- If on the first byte, or an empty string: the trivial case
 | 
				
			||||||
  if byte == 1 or #line == 0 then
 | 
					  if byte == 1 or #line == 0 then
 | 
				
			||||||
    char = byte
 | 
					    char = byte
 | 
				
			||||||
  -- Called in the case of extending an empty line "" -> "a"
 | 
					  -- Called in the case of extending an empty line "" -> "a"
 | 
				
			||||||
  elseif byte == #line + 1 then
 | 
					  elseif byte == #line + 1 then
 | 
				
			||||||
    byte = byte + str_utf_end(line, #line)
 | 
					    char = compute_line_length(line, offset_encoding) + 1
 | 
				
			||||||
    char = byte_to_utf(line, byte, offset_encoding)
 | 
					 | 
				
			||||||
  else
 | 
					  else
 | 
				
			||||||
    -- Modifying line, find the nearest utf codepoint
 | 
					    -- Modifying line, find the nearest utf codepoint
 | 
				
			||||||
    if align == 'start' then
 | 
					 | 
				
			||||||
      byte = byte + str_utf_start(line, byte)
 | 
					 | 
				
			||||||
      char = byte_to_utf(line, byte, offset_encoding)
 | 
					 | 
				
			||||||
    elseif align == 'end' then
 | 
					 | 
				
			||||||
    local offset = str_utf_end(line, byte)
 | 
					    local offset = str_utf_end(line, byte)
 | 
				
			||||||
    -- If the byte does not fall on the start of the character, then
 | 
					    -- If the byte does not fall on the start of the character, then
 | 
				
			||||||
    -- align to the start of the next character.
 | 
					    -- align to the start of the next character.
 | 
				
			||||||
@@ -109,9 +114,6 @@ local function align_position(line, byte, align, offset_encoding)
 | 
				
			|||||||
      char = byte_to_utf(line, byte, offset_encoding)
 | 
					      char = byte_to_utf(line, byte, offset_encoding)
 | 
				
			||||||
      byte = byte + offset
 | 
					      byte = byte + offset
 | 
				
			||||||
    end
 | 
					    end
 | 
				
			||||||
    else
 | 
					 | 
				
			||||||
      error('`align` must be start or end.')
 | 
					 | 
				
			||||||
    end
 | 
					 | 
				
			||||||
    -- Extending line, find the nearest utf codepoint for the last valid character
 | 
					    -- Extending line, find the nearest utf codepoint for the last valid character
 | 
				
			||||||
  end
 | 
					  end
 | 
				
			||||||
  return byte, char
 | 
					  return byte, char
 | 
				
			||||||
@@ -154,7 +156,18 @@ local function compute_start_range(prev_lines, curr_lines, firstline, lastline,
 | 
				
			|||||||
  end
 | 
					  end
 | 
				
			||||||
 | 
					
 | 
				
			||||||
  -- Convert byte to codepoint if applicable
 | 
					  -- Convert byte to codepoint if applicable
 | 
				
			||||||
  local byte_idx, char_idx = align_position(prev_line, start_byte_idx, 'start', offset_encoding)
 | 
					  local char_idx
 | 
				
			||||||
 | 
					  local byte_idx
 | 
				
			||||||
 | 
					  if start_byte_idx == 1 or (#prev_line == 0 and start_byte_idx == 1)then
 | 
				
			||||||
 | 
					    byte_idx = start_byte_idx
 | 
				
			||||||
 | 
					    char_idx = 1
 | 
				
			||||||
 | 
					  elseif start_byte_idx == #prev_line + 1 then
 | 
				
			||||||
 | 
					    byte_idx = start_byte_idx
 | 
				
			||||||
 | 
					    char_idx = compute_line_length(prev_line, offset_encoding)  + 1
 | 
				
			||||||
 | 
					  else
 | 
				
			||||||
 | 
					    byte_idx = start_byte_idx + str_utf_start(prev_line, start_byte_idx)
 | 
				
			||||||
 | 
					    char_idx = byte_to_utf(prev_line, start_byte_idx, offset_encoding)
 | 
				
			||||||
 | 
					  end
 | 
				
			||||||
 | 
					
 | 
				
			||||||
  -- Return the start difference (shared for new and prev lines)
 | 
					  -- Return the start difference (shared for new and prev lines)
 | 
				
			||||||
  return { line_idx = firstline, byte_idx = byte_idx, char_idx = char_idx }
 | 
					  return { line_idx = firstline, byte_idx = byte_idx, char_idx = char_idx }
 | 
				
			||||||
@@ -219,11 +232,12 @@ local function compute_end_range(prev_lines, curr_lines, start_range, firstline,
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
  -- Iterate from end to beginning of shortest line
 | 
					  -- Iterate from end to beginning of shortest line
 | 
				
			||||||
  local prev_end_byte_idx = prev_line_length - byte_offset + 1
 | 
					  local prev_end_byte_idx = prev_line_length - byte_offset + 1
 | 
				
			||||||
 | 
					
 | 
				
			||||||
  -- Handle case where lines match
 | 
					  -- Handle case where lines match
 | 
				
			||||||
  if prev_end_byte_idx == 0 then
 | 
					  if prev_end_byte_idx == 0 then
 | 
				
			||||||
    prev_end_byte_idx = 1
 | 
					    prev_end_byte_idx = 1
 | 
				
			||||||
  end
 | 
					  end
 | 
				
			||||||
  local prev_byte_idx, prev_char_idx = align_position(prev_line, prev_end_byte_idx, 'start', offset_encoding)
 | 
					  local prev_byte_idx, prev_char_idx = align_end_position(prev_line, prev_end_byte_idx, offset_encoding)
 | 
				
			||||||
  local prev_end_range = { line_idx = prev_line_idx, byte_idx = prev_byte_idx, char_idx = prev_char_idx }
 | 
					  local prev_end_range = { line_idx = prev_line_idx, byte_idx = prev_byte_idx, char_idx = prev_char_idx }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
  local curr_end_range
 | 
					  local curr_end_range
 | 
				
			||||||
@@ -236,7 +250,7 @@ local function compute_end_range(prev_lines, curr_lines, start_range, firstline,
 | 
				
			|||||||
    if curr_end_byte_idx == 0 then
 | 
					    if curr_end_byte_idx == 0 then
 | 
				
			||||||
      curr_end_byte_idx = 1
 | 
					      curr_end_byte_idx = 1
 | 
				
			||||||
    end
 | 
					    end
 | 
				
			||||||
    local curr_byte_idx, curr_char_idx = align_position(curr_line, curr_end_byte_idx, 'start', offset_encoding)
 | 
					    local curr_byte_idx, curr_char_idx = align_end_position(curr_line, curr_end_byte_idx, offset_encoding)
 | 
				
			||||||
    curr_end_range = { line_idx = curr_line_idx, byte_idx = curr_byte_idx, char_idx = curr_char_idx }
 | 
					    curr_end_range = { line_idx = curr_line_idx, byte_idx = curr_byte_idx, char_idx = curr_char_idx }
 | 
				
			||||||
  end
 | 
					  end
 | 
				
			||||||
 | 
					
 | 
				
			||||||
@@ -280,18 +294,6 @@ local function extract_text(lines, start_range, end_range, line_ending)
 | 
				
			|||||||
  end
 | 
					  end
 | 
				
			||||||
end
 | 
					end
 | 
				
			||||||
 | 
					
 | 
				
			||||||
local function compute_line_length(line, offset_encoding)
 | 
					 | 
				
			||||||
  local length
 | 
					 | 
				
			||||||
  local _
 | 
					 | 
				
			||||||
  if offset_encoding == 'utf-16' then
 | 
					 | 
				
			||||||
     _, length = str_utfindex(line)
 | 
					 | 
				
			||||||
  elseif offset_encoding == 'utf-32' then
 | 
					 | 
				
			||||||
    length, _ = str_utfindex(line)
 | 
					 | 
				
			||||||
  else
 | 
					 | 
				
			||||||
    length = #line
 | 
					 | 
				
			||||||
  end
 | 
					 | 
				
			||||||
  return length
 | 
					 | 
				
			||||||
end
 | 
					 | 
				
			||||||
---@private
 | 
					---@private
 | 
				
			||||||
-- rangelength depends on the offset encoding
 | 
					-- rangelength depends on the offset encoding
 | 
				
			||||||
-- bytes for utf-8 (clangd with extenion)
 | 
					-- bytes for utf-8 (clangd with extenion)
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -297,7 +297,31 @@ describe('incremental synchronization', function()
 | 
				
			|||||||
      }
 | 
					      }
 | 
				
			||||||
      test_edit({"🔥"}, {"x"}, expected_text_changes, 'utf-16', '\n')
 | 
					      test_edit({"🔥"}, {"x"}, expected_text_changes, 'utf-16', '\n')
 | 
				
			||||||
    end)
 | 
					    end)
 | 
				
			||||||
    it('deleting a multiple lines containing multibyte characters', function()
 | 
					    it('deleting a multibyte character from a long line', function()
 | 
				
			||||||
 | 
					      local expected_text_changes = {
 | 
				
			||||||
 | 
					        {
 | 
				
			||||||
 | 
					          range = {
 | 
				
			||||||
 | 
					            ['start'] = {
 | 
				
			||||||
 | 
					              character = 85,
 | 
				
			||||||
 | 
					              line = 1
 | 
				
			||||||
 | 
					            },
 | 
				
			||||||
 | 
					            ['end'] = {
 | 
				
			||||||
 | 
					              character = 86,
 | 
				
			||||||
 | 
					              line = 1
 | 
				
			||||||
 | 
					            }
 | 
				
			||||||
 | 
					          },
 | 
				
			||||||
 | 
					          rangeLength = 1,
 | 
				
			||||||
 | 
					          text = ''
 | 
				
			||||||
 | 
					        }
 | 
				
			||||||
 | 
					      }
 | 
				
			||||||
 | 
					      local original_lines = {
 | 
				
			||||||
 | 
					        "\\begin{document}",
 | 
				
			||||||
 | 
					        "→→→→→→→→→→→→→→→→→→→→→→→→→→→→→→→→→→→→→→→→→→→→→→→→→→→→→→→→→→→→→→→→→→→→→→→→→→→→→→→→→→→→→→",
 | 
				
			||||||
 | 
					        "\\end{document}",
 | 
				
			||||||
 | 
					      }
 | 
				
			||||||
 | 
					      test_edit(original_lines, {"jx"}, expected_text_changes, 'utf-16', '\n')
 | 
				
			||||||
 | 
					    end)
 | 
				
			||||||
 | 
					    it('deleting multiple lines containing multibyte characters', function()
 | 
				
			||||||
      local expected_text_changes = {
 | 
					      local expected_text_changes = {
 | 
				
			||||||
        {
 | 
					        {
 | 
				
			||||||
          range = {
 | 
					          range = {
 | 
				
			||||||
 
 | 
				
			|||||||
		Reference in New Issue
	
	Block a user