mirror of
https://github.com/neovim/neovim.git
synced 2025-12-09 08:02:38 +00:00
feat(lsp): skip invalid header lines #36402
Problem: Some servers write log to stdout and there's no way to avoid it. See https://github.com/neovim/neovim/pull/35743#pullrequestreview-3379705828 Solution: We can extract `content-length` field byte by byte and skip invalid lines via a simple state machine (name/colon/value/invalid), with minimal performance impact. I chose byte parsing here instead of pattern. Although it's a bit more complex, it provides more stable performance and allows for more accurate error info when needed. Here is a bench result and script: parse header1 by pattern: 59.52377ms 45 parse header1 by byte: 7.531128ms 45 parse header2 by pattern: 26.06936ms 45 parse header2 by byte: 5.235724ms 45 parse header3 by pattern: 9.348495ms 45 parse header3 by byte: 3.452389ms 45 parse header4 by pattern: 9.73156ms 45 parse header4 by byte: 3.638386ms 45 Script: ```lua local strbuffer = require('string.buffer') --- @param header string local function get_content_length(header) for line in header:gmatch('(.-)\r?\n') do if line == '' then break end local key, value = line:match('^%s*(%S+)%s*:%s*(%d+)%s*$') if key and key:lower() == 'content-length' then return assert(tonumber(value)) end end error('Content-Length not found in header: ' .. header) end --- @param header string local function get_content_length_by_byte(header) local state = 'name' local i, len = 1, #header local j, name = 1, 'content-length' local buf = strbuffer.new() local digit = true while i <= len do local c = header:byte(i) if state == 'name' then if c >= 65 and c <= 90 then -- lower case c = c + 32 end if (c == 32 or c == 9) and j == 1 then -- skip OWS for compatibility only elseif c == name:byte(j) then j = j + 1 elseif c == 58 and j == 15 then state = 'colon' else state = 'invalid' end elseif state == 'colon' then if c ~= 32 and c ~= 9 then -- skip OWS normally state = 'value' i = i - 1 end elseif state == 'value' then if c == 13 and header:byte(i + 1) == 10 then -- must end with \r\n local value = buf:get() return assert(digit and tonumber(value), 'value of Content-Length is not number: ' .. value) else buf:put(string.char(c)) end if c < 48 and c ~= 32 and c ~= 9 or c > 57 then digit = false end elseif state == 'invalid' then if c == 10 then -- reset for next line state, j = 'name', 1 end end i = i + 1 end error('Content-Length not found in header: ' .. header) end --- @param fn fun(header: string): number local function bench(label, header, fn, count) local start = vim.uv.hrtime() local value --- @type number for _ = 1, count do value = fn(header) end local elapsed = (vim.uv.hrtime() - start) / 1e6 print(label .. ':', elapsed .. 'ms', value) end -- header starting with log lines local header1 = 'WARN: no common words file defined for Khmer - this language might not be correctly auto-detected\nWARN: no common words file defined for Japanese - this language might not be correctly auto-detected\nContent-Length: 45 \r\n\r\n' -- header starting with content-type local header2 = 'Content-Type: application/json-rpc; charset=utf-8\r\nContent-Length: 45 \r\n' -- regular header local header3 = ' Content-Length: 45\r\n' -- regular header ending with content-type local header4 = ' Content-Length: 45 \r\nContent-Type: application/json-rpc; charset=utf-8\r\n' local count = 10000 collectgarbage('collect') bench('parse header1 by pattern', header1, get_content_length, count) collectgarbage('collect') bench('parse header1 by byte', header1, get_content_length_by_byte, count) collectgarbage('collect') bench('parse header2 by pattern', header2, get_content_length, count) collectgarbage('collect') bench('parse header2 by byte', header2, get_content_length_by_byte, count) collectgarbage('collect') bench('parse header3 by pattern', header3, get_content_length, count) collectgarbage('collect') bench('parse header3 by byte', header3, get_content_length_by_byte, count) collectgarbage('collect') bench('parse header4 by pattern', header4, get_content_length, count) collectgarbage('collect') bench('parse header4 by byte', header4, get_content_length_by_byte, count) ``` Also, I removed an outdated testaccd392f4d/test/functional/plugin/lsp_spec.lua (L1950)and tweaked the boilerplate in two other tests for reusability while keeping the final assertions the same.accd392f4d/test/functional/plugin/lsp_spec.lua (L5704)accd392f4d/test/functional/plugin/lsp_spec.lua (L5721)
This commit is contained in:
@@ -1,6 +1,7 @@
|
||||
local log = require('vim.lsp.log')
|
||||
local protocol = require('vim.lsp.protocol')
|
||||
local lsp_transport = require('vim.lsp._transport')
|
||||
local strbuffer = require('vim._stringbuffer')
|
||||
local validate, schedule_wrap = vim.validate, vim.schedule_wrap
|
||||
|
||||
--- Embeds the given string into a table and correctly computes `Content-Length`.
|
||||
@@ -16,19 +17,59 @@ local function format_message_with_content_length(message)
|
||||
})
|
||||
end
|
||||
|
||||
--- Extract content-length from the header
|
||||
--- Extract content-length from the header.
|
||||
---
|
||||
--- The structure of header fields conforms to the [HTTP semantic](https://tools.ietf.org/html/rfc7230#section-3.2).
|
||||
--- i.e., `header-field = field-name : OWS field-value OWS`,
|
||||
--- OWS means optional whitespace (Space/Horizontal Tab).
|
||||
---
|
||||
--- we ignore lines ending with `\n` that don't contain `content-length`, since some servers
|
||||
--- write log to stdout and there's no way to avoid it.
|
||||
--- See https://github.com/neovim/neovim/pull/35743#pullrequestreview-3379705828
|
||||
--- @param header string The header to parse
|
||||
--- @return integer
|
||||
local function get_content_length(header)
|
||||
for line in header:gmatch('(.-)\r\n') do
|
||||
if line == '' then
|
||||
break
|
||||
end
|
||||
local key, value = line:match('^%s*(%S+)%s*:%s*(%d+)%s*$')
|
||||
if key and key:lower() == 'content-length' then
|
||||
return assert(tonumber(value))
|
||||
local state = 'name'
|
||||
local i, len = 1, #header
|
||||
local j, name = 1, 'content-length'
|
||||
local buf = strbuffer.new()
|
||||
local digit = true
|
||||
while i <= len do
|
||||
local c = header:byte(i)
|
||||
if state == 'name' then
|
||||
if c >= 65 and c <= 90 then -- lower case
|
||||
c = c + 32
|
||||
end
|
||||
if (c == 32 or c == 9) and j == 1 then -- luacheck: ignore 542
|
||||
-- skip OWS for compatibility only
|
||||
elseif c == name:byte(j) then
|
||||
j = j + 1
|
||||
elseif c == 58 and j == 15 then
|
||||
state = 'colon'
|
||||
else
|
||||
state = 'invalid'
|
||||
end
|
||||
elseif state == 'colon' then
|
||||
if c ~= 32 and c ~= 9 then -- skip OWS normally
|
||||
state = 'value'
|
||||
i = i - 1
|
||||
end
|
||||
elseif state == 'value' then
|
||||
if c == 13 and header:byte(i + 1) == 10 then -- must end with \r\n
|
||||
local value = buf:get()
|
||||
return assert(digit and tonumber(value), 'value of Content-Length is not number: ' .. value)
|
||||
else
|
||||
buf:put(string.char(c))
|
||||
end
|
||||
if c < 48 and c ~= 32 and c ~= 9 or c > 57 then
|
||||
digit = false
|
||||
end
|
||||
elseif state == 'invalid' then
|
||||
if c == 10 then -- reset for next line
|
||||
state, j = 'name', 1
|
||||
end
|
||||
end
|
||||
i = i + 1
|
||||
end
|
||||
error('Content-Length not found in header: ' .. header)
|
||||
end
|
||||
@@ -149,8 +190,6 @@ local default_dispatchers = {
|
||||
end,
|
||||
}
|
||||
|
||||
local strbuffer = require('vim._stringbuffer')
|
||||
|
||||
--- @async
|
||||
local function request_parser_loop()
|
||||
local buf = strbuffer.new()
|
||||
|
||||
Reference in New Issue
Block a user