mirror of
https://github.com/neovim/neovim.git
synced 2026-05-02 20:15:03 +00:00
fix(bufwrite.c): handle invalid byte sequences #37363
Problem: bw_rest was used as an extra buffer to save incomplete byte sequences between calls to buf_write_bytes. Besides being unnecessarily complicated, this introduced a number of issues: 1) The bytes stored in bw_rest could still be there at the end of writing the file, never having been written, thus losing some of the file content on write. 2) bw_rest was not cleared out after the "checking_conversion" phase, leaving them to affect the written file content during the writing phase, corrupting the file. 3) bw_rest could contain extra bytes that need to be written to the output buffer during a buf_write_convert call, potentially before any bytes are consumed. But some conversions are in-place, without a separate output buffer. Writing bytes from bw_rest to the "output" buffer actually overwrote bytes from the input buffer before they were read, corrupting the data to be written. 4) The extra bytes in bw_rest that need to be written to the conversion output buffer were not originally accounted for in the size calculation for the output buffer, causing a buffer overflow (previously fixed in Vim patch 9.1.2028). Solution: Rather than maintaining a separate buffer, the unconverted bytes at the end of the buffer can just be shifted to the beginning of the buffer, and the buffer size updated. This requires a bit of refactoring, and buf_write_convert and buf_write_convert_with_iconv need to report the number of bytes they consumed so that buf_write_bytes can handle the remaining bytes. Following conversion, bw_buf can be checked for any remaining bytes. Leftover bytes in this case result in a conversion error, which is better than silently dropping them. A short section of dead code was removed from buf_write_convert, for converting a non-UTF-8 buffer to UTF-8. Neovim buffers are always UTF-8. A few additional tests for iconv conversions have been added. Vim's iconv tests are disabled in Neovim because they use unsupported values for 'encoding'.
This commit is contained in:
@@ -11,6 +11,7 @@ local api = n.api
|
||||
local skip = t.skip
|
||||
local is_os = t.is_os
|
||||
local is_ci = t.is_ci
|
||||
local read_file = t.read_file
|
||||
|
||||
local fname = 'Xtest-functional-ex_cmds-write'
|
||||
local fname_bak = fname .. '~'
|
||||
@@ -181,6 +182,99 @@ describe(':write', function()
|
||||
vim.uv.fs_symlink(fname_bak .. ('/xxxxx'):rep(20), fname)
|
||||
eq("Vim(write):E166: Can't open linked file for writing", pcall_err(command, 'write!'))
|
||||
end)
|
||||
|
||||
it('fails converting a trailing incomplete sequence', function()
|
||||
-- From https://github.com/neovim/neovim/issues/36990, an invalid UTF-8 sequence at the end of
|
||||
-- the file during conversion testing can overwrite the rest of the file during the real
|
||||
-- conversion.
|
||||
|
||||
api.nvim_buf_set_lines(0, 0, 1, true, { 'line 1', 'line 2', 'aaabbb\235\128' })
|
||||
command('set noendofline nofixendofline')
|
||||
|
||||
eq(
|
||||
"Vim(write):E513: Write error, conversion failed in line 3 (make 'fenc' empty to override)",
|
||||
pcall_err(command, 'write ++enc=latin1 ' .. fname)
|
||||
)
|
||||
end)
|
||||
|
||||
it('converts to latin1 with an invalid sequence at buffer boundary', function()
|
||||
-- From https://github.com/neovim/neovim/issues/36990, an invalid UTF-8 sequence that falls
|
||||
-- right at the end of the 8 KiB buffer used for encoding conversions causes subsequent data to
|
||||
-- be overwritten.
|
||||
|
||||
local content = string.rep('a', 1024 * 8 - 1) .. '\251' .. string.rep('b', 20)
|
||||
api.nvim_buf_set_lines(0, 0, 1, true, { content })
|
||||
command('set noendofline nofixendofline fenc=latin1')
|
||||
command('write ' .. fname)
|
||||
|
||||
local tail = string.sub(read_file(fname) or '', -10)
|
||||
eq('bbbbbbbbbb', tail)
|
||||
end)
|
||||
|
||||
it('converts to CP1251 with iconv', function()
|
||||
api.nvim_buf_set_lines(
|
||||
0,
|
||||
0,
|
||||
1,
|
||||
true,
|
||||
{ 'Привет, мир!', 'Это простой тест.' }
|
||||
)
|
||||
command('write ++enc=cp1251 ++ff=unix ' .. fname)
|
||||
|
||||
eq(
|
||||
'\207\240\232\226\229\242, \236\232\240!\n'
|
||||
.. '\221\242\238 \239\240\238\241\242\238\233 \242\229\241\242.\n',
|
||||
read_file(fname)
|
||||
)
|
||||
end)
|
||||
|
||||
it('converts to GB18030 with iconv', function()
|
||||
api.nvim_buf_set_lines(0, 0, 1, true, { '你好,世界!', '这是一个测试。' })
|
||||
command('write ++enc=gb18030 ++ff=unix ' .. fname)
|
||||
|
||||
eq(
|
||||
'\196\227\186\195\163\172\202\192\189\231\163\161\n'
|
||||
.. '\213\226\202\199\210\187\184\246\178\226\202\212\161\163\n',
|
||||
read_file(fname)
|
||||
)
|
||||
end)
|
||||
|
||||
it('converts to Shift_JIS with iconv', function()
|
||||
api.nvim_buf_set_lines(
|
||||
0,
|
||||
0,
|
||||
1,
|
||||
true,
|
||||
{ 'こんにちは、世界!', 'これはテストです。' }
|
||||
)
|
||||
command('write ++enc=sjis ++ff=unix ' .. fname)
|
||||
|
||||
eq(
|
||||
'\130\177\130\241\130\201\130\191\130\205\129A\144\162\138E\129I\n'
|
||||
.. '\130\177\130\234\130\205\131e\131X\131g\130\197\130\183\129B\n',
|
||||
read_file(fname)
|
||||
)
|
||||
end)
|
||||
|
||||
it('fails converting an illegal sequence with iconv', function()
|
||||
api.nvim_buf_set_lines(0, 0, 1, true, { 'line 1', 'aaa\128bbb' })
|
||||
|
||||
eq(
|
||||
"Vim(write):E513: Write error, conversion failed (make 'fenc' empty to override)",
|
||||
pcall_err(command, 'write ++enc=cp1251 ' .. fname)
|
||||
)
|
||||
end)
|
||||
|
||||
it('handles a multi-byte sequence crossing the buffer boundary converting with iconv', function()
|
||||
local content = string.rep('a', 1024 * 8 - 1) .. 'Дbbbbb'
|
||||
api.nvim_buf_set_lines(0, 0, 1, true, { content })
|
||||
-- Skip the backup so we're testing the "checking" phase also.
|
||||
command('set nowritebackup')
|
||||
command('write ++enc=cp1251 ++ff=unix ' .. fname)
|
||||
|
||||
local expected = string.rep('a', 1024 * 8 - 1) .. '\196bbbbb\n'
|
||||
eq(expected, read_file(fname))
|
||||
end)
|
||||
end)
|
||||
|
||||
describe(':update', function()
|
||||
|
||||
Reference in New Issue
Block a user