Problem:
Some servers write log to stdout and there's no way to avoid it.
See https://github.com/neovim/neovim/pull/35743#pullrequestreview-3379705828
Solution:
We can extract `content-length` field byte by byte and skip invalid
lines via a simple state machine (name/colon/value/invalid), with minimal
performance impact.
I chose byte parsing here instead of pattern. Although it's a bit more complex,
it provides more stable performance and allows for more accurate error info when
needed.
Here is a bench result and script:
parse header1 by pattern: 59.52377ms 45
parse header1 by byte: 7.531128ms 45
parse header2 by pattern: 26.06936ms 45
parse header2 by byte: 5.235724ms 45
parse header3 by pattern: 9.348495ms 45
parse header3 by byte: 3.452389ms 45
parse header4 by pattern: 9.73156ms 45
parse header4 by byte: 3.638386ms 45
Script:
```lua
local strbuffer = require('string.buffer')
--- @param header string
local function get_content_length(header)
for line in header:gmatch('(.-)\r?\n') do
if line == '' then
break
end
local key, value = line:match('^%s*(%S+)%s*:%s*(%d+)%s*$')
if key and key:lower() == 'content-length' then
return assert(tonumber(value))
end
end
error('Content-Length not found in header: ' .. header)
end
--- @param header string
local function get_content_length_by_byte(header)
local state = 'name'
local i, len = 1, #header
local j, name = 1, 'content-length'
local buf = strbuffer.new()
local digit = true
while i <= len do
local c = header:byte(i)
if state == 'name' then
if c >= 65 and c <= 90 then -- lower case
c = c + 32
end
if (c == 32 or c == 9) and j == 1 then
-- skip OWS for compatibility only
elseif c == name:byte(j) then
j = j + 1
elseif c == 58 and j == 15 then
state = 'colon'
else
state = 'invalid'
end
elseif state == 'colon' then
if c ~= 32 and c ~= 9 then -- skip OWS normally
state = 'value'
i = i - 1
end
elseif state == 'value' then
if c == 13 and header:byte(i + 1) == 10 then -- must end with \r\n
local value = buf:get()
return assert(digit and tonumber(value), 'value of Content-Length is not number: ' .. value)
else
buf:put(string.char(c))
end
if c < 48 and c ~= 32 and c ~= 9 or c > 57 then
digit = false
end
elseif state == 'invalid' then
if c == 10 then -- reset for next line
state, j = 'name', 1
end
end
i = i + 1
end
error('Content-Length not found in header: ' .. header)
end
--- @param fn fun(header: string): number
local function bench(label, header, fn, count)
local start = vim.uv.hrtime()
local value --- @type number
for _ = 1, count do
value = fn(header)
end
local elapsed = (vim.uv.hrtime() - start) / 1e6
print(label .. ':', elapsed .. 'ms', value)
end
-- header starting with log lines
local header1 =
'WARN: no common words file defined for Khmer - this language might not be correctly auto-detected\nWARN: no common words file defined for Japanese - this language might not be correctly auto-detected\nContent-Length: 45 \r\n\r\n'
-- header starting with content-type
local header2 = 'Content-Type: application/json-rpc; charset=utf-8\r\nContent-Length: 45 \r\n'
-- regular header
local header3 = ' Content-Length: 45\r\n'
-- regular header ending with content-type
local header4 = ' Content-Length: 45 \r\nContent-Type: application/json-rpc; charset=utf-8\r\n'
local count = 10000
collectgarbage('collect')
bench('parse header1 by pattern', header1, get_content_length, count)
collectgarbage('collect')
bench('parse header1 by byte', header1, get_content_length_by_byte, count)
collectgarbage('collect')
bench('parse header2 by pattern', header2, get_content_length, count)
collectgarbage('collect')
bench('parse header2 by byte', header2, get_content_length_by_byte, count)
collectgarbage('collect')
bench('parse header3 by pattern', header3, get_content_length, count)
collectgarbage('collect')
bench('parse header3 by byte', header3, get_content_length_by_byte, count)
collectgarbage('collect')
bench('parse header4 by pattern', header4, get_content_length, count)
collectgarbage('collect')
bench('parse header4 by byte', header4, get_content_length_by_byte, count)
```
Also, I removed an outdated test
accd392f4d/test/functional/plugin/lsp_spec.lua (L1950)
and tweaked the boilerplate in two other tests for reusability while keeping the final assertions the same.
accd392f4d/test/functional/plugin/lsp_spec.lua (L5704)accd392f4d/test/functional/plugin/lsp_spec.lua (L5721)