mirror of
				https://github.com/neovim/neovim.git
				synced 2025-10-26 12:27:24 +00:00 
			
		
		
		
	
		
			
				
	
	
		
			393 lines
		
	
	
		
			15 KiB
		
	
	
	
		
			Lua
		
	
	
	
	
	
			
		
		
	
	
			393 lines
		
	
	
		
			15 KiB
		
	
	
	
		
			Lua
		
	
	
	
	
	
| -- Notes on incremental sync:
 | ||
| --  Per the protocol, the text range should be:
 | ||
| --
 | ||
| --  A position inside a document (see Position definition below) is expressed as
 | ||
| --  a zero-based line and character offset. The offsets are based on a UTF-16
 | ||
| --  string representation. So a string of the form a𐐀b the character offset
 | ||
| --  of the character a is 0, the character offset of 𐐀 is 1 and the character
 | ||
| --  offset of b is 3 since 𐐀 is represented using two code units in UTF-16.
 | ||
| --
 | ||
| --  To ensure that both client and server split the string into the same line
 | ||
| --  representation the protocol specifies the following end-of-line sequences: ‘\n’, ‘\r\n’ and ‘\r’.
 | ||
| --
 | ||
| --  Positions are line end character agnostic. So you can not specify a position that
 | ||
| --  denotes \r|\n or \n| where | represents the character offset. This means *no* defining
 | ||
| --  a range than ends on the same line after a terminating character
 | ||
| --
 | ||
| -- Generic warnings about byte level changes in neovim. Many apparently "single"
 | ||
| -- operations in on_lines callbacks are actually multiple operations.
 | ||
| --
 | ||
| --  Join operation (2 operations):
 | ||
| --  * extends line 1 with the contents of line 2
 | ||
| --  * deletes line 2
 | ||
| --
 | ||
| --  test 1    test 1 test 2    test 1 test 2
 | ||
| --  test 2 -> test 2        -> test 3
 | ||
| --  test 3    test 3
 | ||
| --
 | ||
| --  Deleting (and undoing) two middle lines (1 operation):
 | ||
| --
 | ||
| --  test 1    test 1
 | ||
| --  test 2 -> test 4
 | ||
| --  test 3
 | ||
| --  test 4
 | ||
| --
 | ||
| --  Deleting partial lines (5 operations) deleting between asterisks below:
 | ||
| --
 | ||
| --  test *1   test *    test *     test *    test *4    test *4*
 | ||
| --  test 2 -> test 2 -> test *4 -> *4     -> *4      ->
 | ||
| --  test 3    test 3
 | ||
| --  test *4   test 4
 | ||
| 
 | ||
| local M = {}
 | ||
| 
 | ||
| -- local string.byte, unclear if this is necessary for JIT compilation
 | ||
| local str_byte = string.byte
 | ||
| local min = math.min
 | ||
| local str_utfindex = vim.str_utfindex
 | ||
| local str_utf_start = vim.str_utf_start
 | ||
| local str_utf_end = vim.str_utf_end
 | ||
| 
 | ||
| ---@private
 | ||
| -- Given a line, byte idx, and offset_encoding convert to the
 | ||
| -- utf-8, utf-16, or utf-32 index.
 | ||
| ---@param line string the line to index into
 | ||
| ---@param byte integer the byte idx
 | ||
| ---@param offset_encoding string utf-8|utf-16|utf-32|nil (default: utf-8)
 | ||
| --@returns integer the utf idx for the given encoding
 | ||
| local function byte_to_utf(line, byte, offset_encoding)
 | ||
|   -- convert to 0 based indexing for str_utfindex
 | ||
|   byte = byte - 1
 | ||
| 
 | ||
|   local utf_idx
 | ||
|   local _
 | ||
|   -- Convert the byte range to utf-{8,16,32} and convert 1-based (lua) indexing to 0-based
 | ||
|   if offset_encoding == 'utf-16' then
 | ||
|     _, utf_idx = str_utfindex(line, byte)
 | ||
|   elseif offset_encoding == 'utf-32' then
 | ||
|     utf_idx, _ = str_utfindex(line, byte)
 | ||
|   else
 | ||
|     utf_idx = byte
 | ||
|   end
 | ||
| 
 | ||
|   -- convert to 1 based indexing
 | ||
|   return utf_idx + 1
 | ||
| end
 | ||
| 
 | ||
| ---@private
 | ||
| local function compute_line_length(line, offset_encoding)
 | ||
|   local length
 | ||
|   local _
 | ||
|   if offset_encoding == 'utf-16' then
 | ||
|      _, length = str_utfindex(line)
 | ||
|   elseif offset_encoding == 'utf-32' then
 | ||
|     length, _ = str_utfindex(line)
 | ||
|   else
 | ||
|     length = #line
 | ||
|   end
 | ||
|   return length
 | ||
| end
 | ||
| 
 | ||
| ---@private
 | ||
| -- Given a line, byte idx, alignment, and offset_encoding convert to the aligned
 | ||
| -- utf-8 index and either the utf-16, or utf-32 index.
 | ||
| ---@param line string the line to index into
 | ||
| ---@param byte integer the byte idx
 | ||
| ---@param offset_encoding string utf-8|utf-16|utf-32|nil (default: utf-8)
 | ||
| ---@returns table<string, int> byte_idx and char_idx of first change position
 | ||
| local function align_end_position(line, byte, offset_encoding)
 | ||
|   local char
 | ||
|   -- If on the first byte, or an empty string: the trivial case
 | ||
|   if byte == 1 or #line == 0 then
 | ||
|     char = byte
 | ||
|   -- Called in the case of extending an empty line "" -> "a"
 | ||
|   elseif byte == #line + 1 then
 | ||
|     char = compute_line_length(line, offset_encoding) + 1
 | ||
|   else
 | ||
|     -- Modifying line, find the nearest utf codepoint
 | ||
|     local offset = str_utf_end(line, byte)
 | ||
|     -- If the byte does not fall on the start of the character, then
 | ||
|     -- align to the start of the next character.
 | ||
|     if offset > 0 then
 | ||
|       char = byte_to_utf(line, byte, offset_encoding) + 1
 | ||
|       byte = byte + offset
 | ||
|     else
 | ||
|       char = byte_to_utf(line, byte, offset_encoding)
 | ||
|       byte = byte + offset
 | ||
|     end
 | ||
|     -- Extending line, find the nearest utf codepoint for the last valid character
 | ||
|   end
 | ||
|   return byte, char
 | ||
| end
 | ||
| 
 | ||
| ---@private
 | ||
| --- Finds the first line, byte, and char index of the difference between the previous and current lines buffer normalized to the previous codepoint.
 | ||
| ---@param prev_lines table list of lines from previous buffer
 | ||
| ---@param curr_lines table list of lines from current buffer
 | ||
| ---@param firstline integer firstline from on_lines, adjusted to 1-index
 | ||
| ---@param lastline integer lastline from on_lines, adjusted to 1-index
 | ||
| ---@param new_lastline integer new_lastline from on_lines, adjusted to 1-index
 | ||
| ---@param offset_encoding string utf-8|utf-16|utf-32|nil (fallback to utf-8)
 | ||
| ---@returns table<int, int> line_idx, byte_idx, and char_idx of first change position
 | ||
| local function compute_start_range(prev_lines, curr_lines, firstline, lastline, new_lastline, offset_encoding)
 | ||
|   -- If firstline == lastline, no existing text is changed. All edit operations
 | ||
|   -- occur on a new line pointed to by lastline. This occurs during insertion of
 | ||
|   -- new lines(O), the new newline is inserted at the line indicated by
 | ||
|   -- new_lastline.
 | ||
|   -- If firstline == new_lastline, the first change occurred on a line that was deleted.
 | ||
|   -- In this case, the first byte change is also at the first byte of firstline
 | ||
|   if firstline == new_lastline or firstline == lastline then
 | ||
|     return { line_idx = firstline, byte_idx = 1, char_idx = 1 }
 | ||
|   end
 | ||
| 
 | ||
|   local prev_line = prev_lines[firstline]
 | ||
|   local curr_line = curr_lines[firstline]
 | ||
| 
 | ||
|   -- Iterate across previous and current line containing first change
 | ||
|   -- to find the first different byte.
 | ||
|   -- Note: *about -> a*about will register the second a as the first
 | ||
|   -- difference, regardless of edit since we do not receive the first
 | ||
|   -- column of the edit from on_lines.
 | ||
|   local start_byte_idx = 1
 | ||
|   for idx = 1, #prev_line + 1 do
 | ||
|     start_byte_idx = idx
 | ||
|     if str_byte(prev_line, idx) ~= str_byte(curr_line, idx) then
 | ||
|       break
 | ||
|     end
 | ||
|   end
 | ||
| 
 | ||
|   -- Convert byte to codepoint if applicable
 | ||
|   local char_idx
 | ||
|   local byte_idx
 | ||
|   if start_byte_idx == 1 or (#prev_line == 0 and start_byte_idx == 1)then
 | ||
|     byte_idx = start_byte_idx
 | ||
|     char_idx = 1
 | ||
|   elseif start_byte_idx == #prev_line + 1 then
 | ||
|     byte_idx = start_byte_idx
 | ||
|     char_idx = compute_line_length(prev_line, offset_encoding)  + 1
 | ||
|   else
 | ||
|     byte_idx = start_byte_idx + str_utf_start(prev_line, start_byte_idx)
 | ||
|     char_idx = byte_to_utf(prev_line, start_byte_idx, offset_encoding)
 | ||
|   end
 | ||
| 
 | ||
|   -- Return the start difference (shared for new and prev lines)
 | ||
|   return { line_idx = firstline, byte_idx = byte_idx, char_idx = char_idx }
 | ||
| end
 | ||
| 
 | ||
| ---@private
 | ||
| --- Finds the last line and byte index of the differences between prev and current buffer.
 | ||
| --- Normalized to the next codepoint.
 | ||
| --- prev_end_range is the text range sent to the server representing the changed region.
 | ||
| --- curr_end_range is the text that should be collected and sent to the server.
 | ||
| --
 | ||
| ---@param prev_lines table list of lines
 | ||
| ---@param curr_lines table list of lines
 | ||
| ---@param start_range table
 | ||
| ---@param lastline integer
 | ||
| ---@param new_lastline integer
 | ||
| ---@param offset_encoding string
 | ||
| ---@returns (int, int) end_line_idx and end_col_idx of range
 | ||
| local function compute_end_range(prev_lines, curr_lines, start_range, firstline, lastline, new_lastline, offset_encoding)
 | ||
|   -- If firstline == new_lastline, the first change occurred on a line that was deleted.
 | ||
|   -- In this case, the last_byte...
 | ||
|   if firstline == new_lastline then
 | ||
|       return { line_idx = (lastline - new_lastline + firstline), byte_idx = 1, char_idx = 1 }, { line_idx = firstline, byte_idx = 1, char_idx = 1 }
 | ||
|   end
 | ||
|   if firstline == lastline then
 | ||
|       return { line_idx = firstline, byte_idx = 1, char_idx = 1 }, { line_idx = new_lastline - lastline + firstline, byte_idx = 1, char_idx = 1 }
 | ||
|   end
 | ||
|   -- Compare on last line, at minimum will be the start range
 | ||
|   local start_line_idx = start_range.line_idx
 | ||
| 
 | ||
|   -- lastline and new_lastline were last lines that were *not* replaced, compare previous lines
 | ||
|   local prev_line_idx = lastline - 1
 | ||
|   local curr_line_idx = new_lastline - 1
 | ||
| 
 | ||
|   local prev_line = prev_lines[lastline - 1]
 | ||
|   local curr_line = curr_lines[new_lastline - 1]
 | ||
| 
 | ||
|   local prev_line_length = #prev_line
 | ||
|   local curr_line_length = #curr_line
 | ||
| 
 | ||
|   local byte_offset = 0
 | ||
| 
 | ||
|   -- Editing the same line
 | ||
|   -- If the byte offset is zero, that means there is a difference on the last byte (not newline)
 | ||
|   if prev_line_idx == curr_line_idx then
 | ||
|     local max_length
 | ||
|     if start_line_idx == prev_line_idx then
 | ||
|       -- Search until beginning of difference
 | ||
|       max_length = min(prev_line_length - start_range.byte_idx, curr_line_length - start_range.byte_idx) + 1
 | ||
|     else
 | ||
|       max_length = min(prev_line_length, curr_line_length) + 1
 | ||
|     end
 | ||
|     for idx = 0, max_length do
 | ||
|       byte_offset = idx
 | ||
|       if
 | ||
|         str_byte(prev_line, prev_line_length - byte_offset) ~= str_byte(curr_line, curr_line_length - byte_offset)
 | ||
|       then
 | ||
|         break
 | ||
|       end
 | ||
|     end
 | ||
|   end
 | ||
| 
 | ||
|   -- Iterate from end to beginning of shortest line
 | ||
|   local prev_end_byte_idx = prev_line_length - byte_offset + 1
 | ||
| 
 | ||
|   -- Handle case where lines match
 | ||
|   if prev_end_byte_idx == 0 then
 | ||
|     prev_end_byte_idx = 1
 | ||
|   end
 | ||
|   local prev_byte_idx, prev_char_idx = align_end_position(prev_line, prev_end_byte_idx, offset_encoding)
 | ||
|   local prev_end_range = { line_idx = prev_line_idx, byte_idx = prev_byte_idx, char_idx = prev_char_idx }
 | ||
| 
 | ||
|   local curr_end_range
 | ||
|   -- Deletion event, new_range cannot be before start
 | ||
|   if curr_line_idx < start_line_idx then
 | ||
|     curr_end_range = { line_idx = start_line_idx, byte_idx = 1, char_idx = 1 }
 | ||
|   else
 | ||
|     local curr_end_byte_idx = curr_line_length - byte_offset + 1
 | ||
|     -- Handle case where lines match
 | ||
|     if curr_end_byte_idx == 0 then
 | ||
|       curr_end_byte_idx = 1
 | ||
|     end
 | ||
|     local curr_byte_idx, curr_char_idx = align_end_position(curr_line, curr_end_byte_idx, offset_encoding)
 | ||
|     curr_end_range = { line_idx = curr_line_idx, byte_idx = curr_byte_idx, char_idx = curr_char_idx }
 | ||
|   end
 | ||
| 
 | ||
|   return prev_end_range, curr_end_range
 | ||
| end
 | ||
| 
 | ||
| ---@private
 | ||
| --- Get the text of the range defined by start and end line/column
 | ||
| ---@param lines table list of lines
 | ||
| ---@param start_range table table returned by first_difference
 | ||
| ---@param end_range table new_end_range returned by last_difference
 | ||
| ---@returns string text extracted from defined region
 | ||
| local function extract_text(lines, start_range, end_range, line_ending)
 | ||
|     if not lines[start_range.line_idx] then
 | ||
|       return ""
 | ||
|     end
 | ||
|   -- Trivial case: start and end range are the same line, directly grab changed text
 | ||
|   if start_range.line_idx == end_range.line_idx then
 | ||
|     -- string.sub is inclusive, end_range is not
 | ||
|     return string.sub(lines[start_range.line_idx], start_range.byte_idx, end_range.byte_idx - 1)
 | ||
| 
 | ||
|   else
 | ||
|     -- Handle deletion case
 | ||
|     -- Collect the changed portion of the first changed line
 | ||
|     local result = { string.sub(lines[start_range.line_idx], start_range.byte_idx) }
 | ||
| 
 | ||
|     -- Collect the full line for intermediate lines
 | ||
|     for idx = start_range.line_idx + 1, end_range.line_idx - 1 do
 | ||
|       table.insert(result, lines[idx])
 | ||
|     end
 | ||
| 
 | ||
|     if lines[end_range.line_idx] then
 | ||
|       -- Collect the changed portion of the last changed line.
 | ||
|       table.insert(result, string.sub(lines[end_range.line_idx], 1, end_range.byte_idx - 1))
 | ||
|     else
 | ||
|       table.insert(result, "")
 | ||
|     end
 | ||
| 
 | ||
|     -- Add line ending between all lines
 | ||
|     return table.concat(result, line_ending)
 | ||
|   end
 | ||
| end
 | ||
| 
 | ||
| ---@private
 | ||
| -- rangelength depends on the offset encoding
 | ||
| -- bytes for utf-8 (clangd with extenion)
 | ||
| -- codepoints for utf-16
 | ||
| -- codeunits for utf-32
 | ||
| -- Line endings count here as 2 chars for \r\n (dos), 1 char for \n (unix), and 1 char for \r (mac)
 | ||
| -- These correspond to Windows, Linux/macOS (OSX and newer), and macOS (version 9 and prior)
 | ||
| local function compute_range_length(lines, start_range, end_range, offset_encoding, line_ending)
 | ||
|   local line_ending_length = #line_ending
 | ||
|   -- Single line case
 | ||
|   if start_range.line_idx == end_range.line_idx then
 | ||
|     return end_range.char_idx - start_range.char_idx
 | ||
|   end
 | ||
| 
 | ||
|   local start_line = lines[start_range.line_idx]
 | ||
|   local range_length
 | ||
|   if start_line and #start_line > 0 then
 | ||
|     range_length = compute_line_length(start_line, offset_encoding) - start_range.char_idx + 1 + line_ending_length
 | ||
|   else
 | ||
|     -- Length of newline character
 | ||
|     range_length = line_ending_length
 | ||
|   end
 | ||
| 
 | ||
|   -- The first and last range of the line idx may be partial lines
 | ||
|   for idx = start_range.line_idx + 1, end_range.line_idx - 1 do
 | ||
|     -- Length full line plus newline character
 | ||
|     if #lines[idx] > 0 then
 | ||
|       range_length = range_length + compute_line_length(lines[idx], offset_encoding) + #line_ending
 | ||
|     else
 | ||
|       range_length = range_length + line_ending_length
 | ||
|     end
 | ||
|   end
 | ||
| 
 | ||
|   local end_line = lines[end_range.line_idx]
 | ||
|   if end_line and #end_line > 0 then
 | ||
|     range_length = range_length + end_range.char_idx - 1
 | ||
|   end
 | ||
| 
 | ||
|   return range_length
 | ||
| end
 | ||
| 
 | ||
| --- Returns the range table for the difference between prev and curr lines
 | ||
| ---@param prev_lines table list of lines
 | ||
| ---@param curr_lines table list of lines
 | ||
| ---@param firstline number line to begin search for first difference
 | ||
| ---@param lastline number line to begin search in old_lines for last difference
 | ||
| ---@param new_lastline number line to begin search in new_lines for last difference
 | ||
| ---@param offset_encoding string encoding requested by language server
 | ||
| ---@returns table TextDocumentContentChangeEvent see https://microsoft.github.io/language-server-protocol/specifications/specification-3-17/#textDocumentContentChangeEvent
 | ||
| function M.compute_diff(prev_lines, curr_lines, firstline, lastline, new_lastline, offset_encoding, line_ending)
 | ||
|   -- Find the start of changes between the previous and current buffer. Common between both.
 | ||
|   -- Sent to the server as the start of the changed range.
 | ||
|   -- Used to grab the changed text from the latest buffer.
 | ||
|   local start_range = compute_start_range(
 | ||
|     prev_lines,
 | ||
|     curr_lines,
 | ||
|     firstline + 1,
 | ||
|     lastline + 1,
 | ||
|     new_lastline + 1,
 | ||
|     offset_encoding
 | ||
|   )
 | ||
|   -- Find the last position changed in the previous and current buffer.
 | ||
|   -- prev_end_range is sent to the server as as the end of the changed range.
 | ||
|   -- curr_end_range is used to grab the changed text from the latest buffer.
 | ||
|   local prev_end_range, curr_end_range = compute_end_range(
 | ||
|     prev_lines,
 | ||
|     curr_lines,
 | ||
|     start_range,
 | ||
|     firstline + 1,
 | ||
|     lastline + 1,
 | ||
|     new_lastline + 1,
 | ||
|     offset_encoding
 | ||
|   )
 | ||
| 
 | ||
|   -- Grab the changed text of from start_range to curr_end_range in the current buffer.
 | ||
|   -- The text range is "" if entire range is deleted.
 | ||
|   local text = extract_text(curr_lines, start_range, curr_end_range, line_ending)
 | ||
| 
 | ||
|   -- Compute the range of the replaced text. Deprecated but still required for certain language servers
 | ||
|   local range_length = compute_range_length(prev_lines, start_range, prev_end_range, offset_encoding, line_ending)
 | ||
| 
 | ||
|   -- convert to 0 based indexing
 | ||
|   local result = {
 | ||
|     range = {
 | ||
|       ['start'] = { line = start_range.line_idx - 1, character = start_range.char_idx - 1 },
 | ||
|       ['end'] = { line = prev_end_range.line_idx - 1, character = prev_end_range.char_idx - 1 },
 | ||
|     },
 | ||
|     text = text,
 | ||
|     rangeLength = range_length,
 | ||
|   }
 | ||
| 
 | ||
|   return result
 | ||
| end
 | ||
| 
 | ||
| return M
 | 
