From c3d11208bc6b12d22c4f59f098ef55eff29efd05 Mon Sep 17 00:00:00 2001
From: "github-actions[bot]"
 <41898282+github-actions[bot]@users.noreply.github.com>
Date: Sat, 6 May 2023 12:10:49 +0200
Subject: [PATCH] [Backport release-0.9] perf(lsp): load buffer contents once
 when processing semantic tokens responses (#23505)

perf(lsp): load buffer contents once when processing semantic token responses

Using _get_line_byte_from_position() for each token's boundaries was a
pretty huge bottleneck, since that function would load individual buffer
lines via nvim_buf_get_lines() (plus a lot of extra overhead). So each
token caused two calls to nvim_buf_get_lines() (once for the start
position, and once for the end position).

For semantic tokens, we only attach to buffers that have already been
loaded, so we can safely just get all the lines for the entire buffer at
once, and lift the rest of the _get_line_byte_from_position()
implementation directly while bypassing the part that loads the buffer
line.

While I was looking at get_lines (used by _get_line_byte_from_position),
I noticed that we were checking for non-file URIs before we even looked
to see if we already had the buffer loaded. Moving the buffer-loaded
check to be the first thing done in get_lines() more than halved the
average time spent transforming the token list into highlight ranges vs
when it was still using _get_line_byte_from_position. I ended up
improving that loop more by not using get_lines, but figured the
performance improvement it provided was worth leaving in.

(cherry picked from commit dc38eafab5ca5877d2b40febb61809449fd84196)

Co-authored-by: John Drouhard <john@drouhard.dev>
---
 runtime/lua/vim/lsp/semantic_tokens.lua | 17 ++++++++++++-----
 runtime/lua/vim/lsp/util.lua            | 12 ++++++------
 2 files changed, 18 insertions(+), 11 deletions(-)

diff --git a/runtime/lua/vim/lsp/semantic_tokens.lua b/runtime/lua/vim/lsp/semantic_tokens.lua
index b6b09c58b1..376cac19a7 100644
--- a/runtime/lua/vim/lsp/semantic_tokens.lua
+++ b/runtime/lua/vim/lsp/semantic_tokens.lua
@@ -99,6 +99,7 @@ local function tokens_to_ranges(data, bufnr, client, request)
   local legend = client.server_capabilities.semanticTokensProvider.legend
   local token_types = legend.tokenTypes
   local token_modifiers = legend.tokenModifiers
+  local lines = api.nvim_buf_get_lines(bufnr, 0, -1, false)
   local ranges = {}
 
   local start = uv.hrtime()
@@ -137,11 +138,17 @@ local function tokens_to_ranges(data, bufnr, client, request)
     local modifiers = modifiers_from_number(data[i + 4], token_modifiers)
 
     ---@private
-    local function _get_byte_pos(char_pos)
-      return util._get_line_byte_from_position(bufnr, {
-        line = line,
-        character = char_pos,
-      }, client.offset_encoding)
+    local function _get_byte_pos(col)
+      if col > 0 then
+        local buf_line = lines[line + 1] or ''
+        local ok, result
+        ok, result = pcall(util._str_byteindex_enc, buf_line, col, client.offset_encoding)
+        if ok then
+          return result
+        end
+        return math.min(#buf_line, col)
+      end
+      return col
     end
 
     local start_col = _get_byte_pos(start_char)
diff --git a/runtime/lua/vim/lsp/util.lua b/runtime/lua/vim/lsp/util.lua
index ebde7af16c..c759d9e957 100644
--- a/runtime/lua/vim/lsp/util.lua
+++ b/runtime/lua/vim/lsp/util.lua
@@ -253,12 +253,17 @@ local function get_lines(bufnr, rows)
   ---@private
   local function buf_lines()
     local lines = {}
-    for _, row in pairs(rows) do
+    for _, row in ipairs(rows) do
       lines[row] = (api.nvim_buf_get_lines(bufnr, row, row + 1, false) or { '' })[1]
     end
     return lines
   end
 
+  -- use loaded buffers if available
+  if vim.fn.bufloaded(bufnr) == 1 then
+    return buf_lines()
+  end
+
   local uri = vim.uri_from_bufnr(bufnr)
 
   -- load the buffer if this is not a file uri
@@ -268,11 +273,6 @@ local function get_lines(bufnr, rows)
     return buf_lines()
   end
 
-  -- use loaded buffers if available
-  if vim.fn.bufloaded(bufnr) == 1 then
-    return buf_lines()
-  end
-
   local filename = api.nvim_buf_get_name(bufnr)
 
   -- get the data from the file