Files
neovim/runtime/lua/vim/_stringbuffer.lua
tao 654303079b feat(lsp): skip invalid header lines #36402
Problem:
Some servers write log to stdout and there's no way to avoid it.
See https://github.com/neovim/neovim/pull/35743#pullrequestreview-3379705828

Solution:
We can extract `content-length` field byte by byte and skip invalid
lines via a simple state machine (name/colon/value/invalid), with minimal
performance impact.

I chose byte parsing here instead of pattern. Although it's a bit more complex,
it provides more stable performance and allows for more accurate error info when
needed.

Here is a bench result and script:

    parse header1 by pattern: 59.52377ms 45
    parse header1 by byte: 7.531128ms 45

    parse header2 by pattern: 26.06936ms 45
    parse header2 by byte: 5.235724ms 45

    parse header3 by pattern: 9.348495ms 45
    parse header3 by byte: 3.452389ms 45

    parse header4 by pattern: 9.73156ms 45
    parse header4 by byte: 3.638386ms 45

Script:

```lua
local strbuffer = require('string.buffer')

--- @param header string
local function get_content_length(header)
  for line in header:gmatch('(.-)\r?\n') do
    if line == '' then
      break
    end
    local key, value = line:match('^%s*(%S+)%s*:%s*(%d+)%s*$')
    if key and key:lower() == 'content-length' then
      return assert(tonumber(value))
    end
  end
  error('Content-Length not found in header: ' .. header)
end

--- @param header string
local function get_content_length_by_byte(header)
  local state = 'name'
  local i, len = 1, #header
  local j, name = 1, 'content-length'
  local buf = strbuffer.new()
  local digit = true
  while i <= len do
    local c = header:byte(i)
    if state == 'name' then
      if c >= 65 and c <= 90 then -- lower case
        c = c + 32
      end
      if (c == 32 or c == 9) and j == 1 then
        -- skip OWS for compatibility only
      elseif c == name:byte(j) then
        j = j + 1
      elseif c == 58 and j == 15 then
        state = 'colon'
      else
        state = 'invalid'
      end
    elseif state == 'colon' then
      if c ~= 32 and c ~= 9 then -- skip OWS normally
        state = 'value'
        i = i - 1
      end
    elseif state == 'value' then
      if c == 13 and header:byte(i + 1) == 10 then -- must end with \r\n
        local value = buf:get()
        return assert(digit and tonumber(value), 'value of Content-Length is not number: ' .. value)
      else
        buf:put(string.char(c))
      end
      if c < 48 and c ~= 32 and c ~= 9 or c > 57 then
        digit = false
      end
    elseif state == 'invalid' then
      if c == 10 then -- reset for next line
        state, j = 'name', 1
      end
    end
    i = i + 1
  end
  error('Content-Length not found in header: ' .. header)
end

--- @param fn fun(header: string): number
local function bench(label, header, fn, count)
  local start = vim.uv.hrtime()
  local value --- @type number
  for _ = 1, count do
    value = fn(header)
  end
  local elapsed = (vim.uv.hrtime() - start) / 1e6
  print(label .. ':', elapsed .. 'ms', value)
end

-- header starting with log lines
local header1 =
  'WARN: no common words file defined for Khmer - this language might not be correctly auto-detected\nWARN: no common words file defined for Japanese - this language might not be correctly auto-detected\nContent-Length: 45  \r\n\r\n'
-- header starting with content-type
local header2 = 'Content-Type: application/json-rpc; charset=utf-8\r\nContent-Length: 45  \r\n'
-- regular header
local header3 = '  Content-Length: 45\r\n'
-- regular header ending with content-type
local header4 = '  Content-Length: 45 \r\nContent-Type: application/json-rpc; charset=utf-8\r\n'

local count = 10000

collectgarbage('collect')
bench('parse header1 by pattern', header1, get_content_length, count)
collectgarbage('collect')
bench('parse header1 by byte', header1, get_content_length_by_byte, count)

collectgarbage('collect')
bench('parse header2 by pattern', header2, get_content_length, count)
collectgarbage('collect')
bench('parse header2 by byte', header2, get_content_length_by_byte, count)

collectgarbage('collect')
bench('parse header3 by pattern', header3, get_content_length, count)
collectgarbage('collect')
bench('parse header3 by byte', header3, get_content_length_by_byte, count)

collectgarbage('collect')
bench('parse header4 by pattern', header4, get_content_length, count)
collectgarbage('collect')
bench('parse header4 by byte', header4, get_content_length_by_byte, count)
```

Also, I removed an outdated test
accd392f4d/test/functional/plugin/lsp_spec.lua (L1950)
and tweaked the boilerplate in two other tests for reusability while keeping the final assertions the same.
accd392f4d/test/functional/plugin/lsp_spec.lua (L5704)
accd392f4d/test/functional/plugin/lsp_spec.lua (L5721)
2025-11-16 17:23:52 -08:00

112 lines
2.4 KiB
Lua

-- Basic shim for LuaJIT's stringbuffer.
-- Note this does not implement the full API.
-- This is intentionally internal-only. If we want to expose it, we should
-- reimplement this a userdata and ship it as `string.buffer`
-- (minus the FFI stuff) for Lua 5.1.
local M = {}
local has_strbuffer, strbuffer = pcall(require, 'string.buffer')
if has_strbuffer then
M.new = strbuffer.new
-- Lua 5.1 does not have __len metamethod so we need to provide a len()
-- function to use instead.
--- @param buf vim._stringbuffer
--- @return integer
function M.len(buf)
return #buf
end
return M
end
--- @class vim._stringbuffer
--- @field private buf string[]
--- @field package len integer absolute length of the `buf`
--- @field package skip_ptr integer
local StrBuffer = {}
StrBuffer.__index = StrBuffer
--- @return string
function StrBuffer:tostring()
if #self.buf > 1 then
self.buf = { table.concat(self.buf) }
end
-- assert(self.len == #(self.buf[1] or ''), 'len mismatch')
if self.skip_ptr > 0 then
if self.buf[1] then
self.buf[1] = self.buf[1]:sub(self.skip_ptr + 1)
self.len = self.len - self.skip_ptr
end
self.skip_ptr = 0
end
-- assert(self.len == #(self.buf[1] or ''), 'len mismatch')
return self.buf[1] or ''
end
StrBuffer.__tostring = StrBuffer.tostring
--- @private
--- Efficiently peak at the first `n` characters of the buffer.
--- @param n integer
--- @return string
function StrBuffer:_peak(n)
local skip, buf1 = self.skip_ptr, self.buf[1]
if buf1 and (n + skip) < #buf1 then
return buf1:sub(skip + 1, skip + n)
end
return self:tostring():sub(1, n)
end
--- @param chunk string
function StrBuffer:put(chunk)
local s = tostring(chunk)
self.buf[#self.buf + 1] = s
self.len = self.len + #s
return self
end
--- @param str string
function StrBuffer:set(str)
return self:reset():put(str)
end
--- @param n? integer
--- @return string
function StrBuffer:get(n)
n = n or self.len
local r = self:_peak(n)
self:skip(n)
return r
end
--- @param n integer
function StrBuffer:skip(n)
self.skip_ptr = math.min(self.len, self.skip_ptr + n)
return self
end
function StrBuffer:reset()
self.buf = {}
self.skip_ptr = 0
self.len = 0
return self
end
function M.new()
return setmetatable({}, StrBuffer):reset()
end
--- @param buf vim._stringbuffer
function M.len(buf)
return buf.len - buf.skip_ptr
end
return M