fix(lsp): correctly align start and end range to codepoints during incremental sync (#16670)

Closes #16624

Fixes two issues with aligning the start position and end position to
codepoints when calculating the start and end range.

When aligning the start position:
* use aligned byte index to calculate character index rather than 
  the unadjusted byte

When aligning the end position:
* do not adjust the end byte if it falls on a UTF-8 codepoint
* align byte to the first byte of the next codepoint rather than the
  last byte of the current codepoint
* compute character character end range on the aligned byte index

This commit also adds additional test coverage, including multibyte operations
that previously failed before this commit.
This commit is contained in:
Rishikesh Vaishnav
2021-12-17 18:05:00 -08:00
committed by GitHub
parent 6a92a53c02
commit 36c401db24
2 changed files with 277 additions and 7 deletions

View File

@@ -105,15 +105,16 @@ local function align_end_position(line, byte, offset_encoding)
char = compute_line_length(line, offset_encoding) + 1
else
-- Modifying line, find the nearest utf codepoint
local offset = str_utf_end(line, byte)
local offset = str_utf_start(line, byte)
-- If the byte does not fall on the start of the character, then
-- align to the start of the next character.
if offset > 0 then
char = byte_to_utf(line, byte, offset_encoding) + 1
byte = byte + offset
else
if offset < 0 then
byte = byte + str_utf_end(line, byte) + 1
end
if byte <= #line then
char = byte_to_utf(line, byte, offset_encoding)
byte = byte + offset
else
char = compute_line_length(line, offset_encoding) + 1
end
-- Extending line, find the nearest utf codepoint for the last valid character
end
@@ -167,7 +168,7 @@ local function compute_start_range(prev_lines, curr_lines, firstline, lastline,
char_idx = compute_line_length(prev_line, offset_encoding) + 1
else
byte_idx = start_byte_idx + str_utf_start(prev_line, start_byte_idx)
char_idx = byte_to_utf(prev_line, start_byte_idx, offset_encoding)
char_idx = byte_to_utf(prev_line, byte_idx, offset_encoding)
end
-- Return the start difference (shared for new and prev lines)