mirror of
https://github.com/neovim/neovim.git
synced 2026-04-04 22:59:36 +00:00
fix(bufwrite.c): handle invalid byte sequences #37363
Problem: bw_rest was used as an extra buffer to save incomplete byte sequences between calls to buf_write_bytes. Besides being unnecessarily complicated, this introduced a number of issues: 1) The bytes stored in bw_rest could still be there at the end of writing the file, never having been written, thus losing some of the file content on write. 2) bw_rest was not cleared out after the "checking_conversion" phase, leaving them to affect the written file content during the writing phase, corrupting the file. 3) bw_rest could contain extra bytes that need to be written to the output buffer during a buf_write_convert call, potentially before any bytes are consumed. But some conversions are in-place, without a separate output buffer. Writing bytes from bw_rest to the "output" buffer actually overwrote bytes from the input buffer before they were read, corrupting the data to be written. 4) The extra bytes in bw_rest that need to be written to the conversion output buffer were not originally accounted for in the size calculation for the output buffer, causing a buffer overflow (previously fixed in Vim patch 9.1.2028). Solution: Rather than maintaining a separate buffer, the unconverted bytes at the end of the buffer can just be shifted to the beginning of the buffer, and the buffer size updated. This requires a bit of refactoring, and buf_write_convert and buf_write_convert_with_iconv need to report the number of bytes they consumed so that buf_write_bytes can handle the remaining bytes. Following conversion, bw_buf can be checked for any remaining bytes. Leftover bytes in this case result in a conversion error, which is better than silently dropping them. A short section of dead code was removed from buf_write_convert, for converting a non-UTF-8 buffer to UTF-8. Neovim buffers are always UTF-8. A few additional tests for iconv conversions have been added. Vim's iconv tests are disabled in Neovim because they use unsupported values for 'encoding'.
This commit is contained in:
@@ -80,8 +80,6 @@ struct bw_info {
|
|||||||
char *bw_buf; // buffer with data to be written
|
char *bw_buf; // buffer with data to be written
|
||||||
int bw_len; // length of data
|
int bw_len; // length of data
|
||||||
int bw_flags; // FIO_ flags
|
int bw_flags; // FIO_ flags
|
||||||
uint8_t bw_rest[CONV_RESTLEN]; // not converted bytes
|
|
||||||
int bw_restlen; // nr of bytes in bw_rest[]
|
|
||||||
int bw_first; // first write call
|
int bw_first; // first write call
|
||||||
char *bw_conv_buf; // buffer for writing converted chars
|
char *bw_conv_buf; // buffer for writing converted chars
|
||||||
size_t bw_conv_buflen; // size of bw_conv_buf
|
size_t bw_conv_buflen; // size of bw_conv_buf
|
||||||
@@ -160,30 +158,17 @@ static bool ucs2bytes(unsigned c, char **pp, int flags)
|
|||||||
return error;
|
return error;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Converts a buffer encoding based on values in ip using iconv.
|
||||||
|
///
|
||||||
|
/// See buf_write_convert for parameters and return value details.
|
||||||
static int buf_write_convert_with_iconv(struct bw_info *ip, char **bufp, int *lenp)
|
static int buf_write_convert_with_iconv(struct bw_info *ip, char **bufp, int *lenp)
|
||||||
{
|
{
|
||||||
const char *from;
|
|
||||||
size_t fromlen;
|
|
||||||
size_t tolen;
|
|
||||||
|
|
||||||
int len = *lenp;
|
int len = *lenp;
|
||||||
|
|
||||||
// Convert with iconv().
|
// Convert with iconv().
|
||||||
if (ip->bw_restlen > 0) {
|
const char *from = *bufp;
|
||||||
// Need to concatenate the remainder of the previous call and
|
size_t fromlen = (size_t)len;
|
||||||
// the bytes of the current call. Use the end of the
|
size_t tolen = ip->bw_conv_buflen;
|
||||||
// conversion buffer for this.
|
|
||||||
fromlen = (size_t)len + (size_t)ip->bw_restlen;
|
|
||||||
char *fp = ip->bw_conv_buf + ip->bw_conv_buflen - fromlen;
|
|
||||||
memmove(fp, ip->bw_rest, (size_t)ip->bw_restlen);
|
|
||||||
memmove(fp + ip->bw_restlen, *bufp, (size_t)len);
|
|
||||||
from = fp;
|
|
||||||
tolen = ip->bw_conv_buflen - fromlen;
|
|
||||||
} else {
|
|
||||||
from = *bufp;
|
|
||||||
fromlen = (size_t)len;
|
|
||||||
tolen = ip->bw_conv_buflen;
|
|
||||||
}
|
|
||||||
char *to = ip->bw_conv_buf;
|
char *to = ip->bw_conv_buf;
|
||||||
|
|
||||||
if (ip->bw_first) {
|
if (ip->bw_first) {
|
||||||
@@ -201,91 +186,50 @@ static int buf_write_convert_with_iconv(struct bw_info *ip, char **bufp, int *le
|
|||||||
ip->bw_first = false;
|
ip->bw_first = false;
|
||||||
}
|
}
|
||||||
|
|
||||||
// If iconv() has an error or there is not enough room, fail.
|
// If iconv() has an error, fail.
|
||||||
if ((iconv(ip->bw_iconv_fd, (void *)&from, &fromlen, &to, &tolen)
|
if (iconv(ip->bw_iconv_fd, (void *)&from, &fromlen, &to, &tolen)
|
||||||
== (size_t)-1 && ICONV_ERRNO != ICONV_EINVAL)
|
== (size_t)-1 && ICONV_ERRNO != ICONV_EINVAL) {
|
||||||
|| fromlen > CONV_RESTLEN) {
|
|
||||||
ip->bw_conv_error = true;
|
ip->bw_conv_error = true;
|
||||||
return FAIL;
|
return -1;
|
||||||
}
|
}
|
||||||
|
|
||||||
// copy remainder to ip->bw_rest[] to be used for the next call.
|
|
||||||
if (fromlen > 0) {
|
|
||||||
memmove(ip->bw_rest, (void *)from, fromlen);
|
|
||||||
}
|
|
||||||
ip->bw_restlen = (int)fromlen;
|
|
||||||
|
|
||||||
*bufp = ip->bw_conv_buf;
|
*bufp = ip->bw_conv_buf;
|
||||||
*lenp = (int)(to - ip->bw_conv_buf);
|
*lenp = (int)(to - ip->bw_conv_buf);
|
||||||
|
|
||||||
return OK;
|
return len - (int)fromlen;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Converts a buffer encoding based on values in ip.
|
||||||
|
///
|
||||||
|
/// @param ip buf_write_bytes context
|
||||||
|
/// @param[in,out] bufp Pointer to the input buffer. On return, pointer to the output buffer.
|
||||||
|
/// @param[in,out] lenp Pointer to the input buffer length. On return the referenced value is set to
|
||||||
|
/// the length of the output buffer.
|
||||||
|
///
|
||||||
|
/// @return The number of input buffer bytes consumed for the conversion, which may be less than the
|
||||||
|
/// initial input buffer size when the buffer ends with an incomplete character sequence.
|
||||||
static int buf_write_convert(struct bw_info *ip, char **bufp, int *lenp)
|
static int buf_write_convert(struct bw_info *ip, char **bufp, int *lenp)
|
||||||
{
|
{
|
||||||
int flags = ip->bw_flags; // extra flags
|
int flags = ip->bw_flags; // extra flags
|
||||||
|
|
||||||
if (flags & FIO_UTF8) {
|
int wlen = *lenp;
|
||||||
// Convert latin1 in the buffer to UTF-8 in the file.
|
if (flags & (FIO_UCS4 | FIO_UTF16 | FIO_UCS2 | FIO_LATIN1)) {
|
||||||
char *p = ip->bw_conv_buf; // translate to buffer
|
|
||||||
for (int wlen = 0; wlen < *lenp; wlen++) {
|
|
||||||
p += utf_char2bytes((uint8_t)(*bufp)[wlen], p);
|
|
||||||
}
|
|
||||||
*bufp = ip->bw_conv_buf;
|
|
||||||
*lenp = (int)(p - ip->bw_conv_buf);
|
|
||||||
} else if (flags & (FIO_UCS4 | FIO_UTF16 | FIO_UCS2 | FIO_LATIN1)) {
|
|
||||||
unsigned c;
|
unsigned c;
|
||||||
int n = 0;
|
int n = 0;
|
||||||
// Convert UTF-8 bytes in the buffer to UCS-2, UCS-4, UTF-16 or
|
// Convert UTF-8 bytes in the buffer to UCS-2, UCS-4, UTF-16 or
|
||||||
// Latin1 chars in the file.
|
// Latin1 chars in the file.
|
||||||
// translate in-place (can only get shorter) or to buffer
|
// translate in-place (can only get shorter) or to buffer
|
||||||
char *p = flags & FIO_LATIN1 ? *bufp : ip->bw_conv_buf;
|
char *p = flags & FIO_LATIN1 ? *bufp : ip->bw_conv_buf;
|
||||||
for (int wlen = 0; wlen < *lenp; wlen += n) {
|
for (wlen = 0; wlen < *lenp; wlen += n) {
|
||||||
if (wlen == 0 && ip->bw_restlen != 0) {
|
n = utf_ptr2len_len(*bufp + wlen, *lenp - wlen);
|
||||||
// Use remainder of previous call. Append the start of
|
if (n > *lenp - wlen) {
|
||||||
// buf[] to get a full sequence. Might still be too
|
// We have an incomplete byte sequence at the end to
|
||||||
// short!
|
// be written. We can't convert it without the
|
||||||
int l = MIN(*lenp, CONV_RESTLEN - ip->bw_restlen);
|
// remaining bytes. Keep them for the next call.
|
||||||
memmove(ip->bw_rest + ip->bw_restlen, *bufp, (size_t)l);
|
break;
|
||||||
n = utf_ptr2len_len((char *)ip->bw_rest, ip->bw_restlen + l);
|
|
||||||
if (n > ip->bw_restlen + *lenp) {
|
|
||||||
// We have an incomplete byte sequence at the end to
|
|
||||||
// be written. We can't convert it without the
|
|
||||||
// remaining bytes. Keep them for the next call.
|
|
||||||
if (ip->bw_restlen + *lenp > CONV_RESTLEN) {
|
|
||||||
return FAIL;
|
|
||||||
}
|
|
||||||
ip->bw_restlen += *lenp;
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
c = (n > 1) ? (unsigned)utf_ptr2char((char *)ip->bw_rest)
|
|
||||||
: ip->bw_rest[0];
|
|
||||||
if (n >= ip->bw_restlen) {
|
|
||||||
n -= ip->bw_restlen;
|
|
||||||
ip->bw_restlen = 0;
|
|
||||||
} else {
|
|
||||||
ip->bw_restlen -= n;
|
|
||||||
memmove(ip->bw_rest, ip->bw_rest + n,
|
|
||||||
(size_t)ip->bw_restlen);
|
|
||||||
n = 0;
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
n = utf_ptr2len_len(*bufp + wlen, *lenp - wlen);
|
|
||||||
if (n > *lenp - wlen) {
|
|
||||||
// We have an incomplete byte sequence at the end to
|
|
||||||
// be written. We can't convert it without the
|
|
||||||
// remaining bytes. Keep them for the next call.
|
|
||||||
if (*lenp - wlen > CONV_RESTLEN) {
|
|
||||||
return FAIL;
|
|
||||||
}
|
|
||||||
ip->bw_restlen = *lenp - wlen;
|
|
||||||
memmove(ip->bw_rest, *bufp + wlen,
|
|
||||||
(size_t)ip->bw_restlen);
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
c = n > 1 ? (unsigned)utf_ptr2char(*bufp + wlen)
|
|
||||||
: (uint8_t)(*bufp)[wlen];
|
|
||||||
}
|
}
|
||||||
|
c = n > 1 ? (unsigned)utf_ptr2char(*bufp + wlen)
|
||||||
|
: (uint8_t)(*bufp)[wlen];
|
||||||
// Check that there is enough space
|
// Check that there is enough space
|
||||||
if (!(flags & FIO_LATIN1)) {
|
if (!(flags & FIO_LATIN1)) {
|
||||||
size_t need = (flags & FIO_UCS4) ? 4 : 2;
|
size_t need = (flags & FIO_UCS4) ? 4 : 2;
|
||||||
@@ -315,12 +259,10 @@ static int buf_write_convert(struct bw_info *ip, char **bufp, int *lenp)
|
|||||||
}
|
}
|
||||||
|
|
||||||
if (ip->bw_iconv_fd != (iconv_t)-1) {
|
if (ip->bw_iconv_fd != (iconv_t)-1) {
|
||||||
if (buf_write_convert_with_iconv(ip, bufp, lenp) == FAIL) {
|
return buf_write_convert_with_iconv(ip, bufp, lenp);
|
||||||
return FAIL;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
return OK;
|
return wlen;
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Call write() to write a number of bytes to the file.
|
/// Call write() to write a number of bytes to the file.
|
||||||
@@ -333,19 +275,33 @@ static int buf_write_bytes(struct bw_info *ip)
|
|||||||
int len = ip->bw_len; // length of data
|
int len = ip->bw_len; // length of data
|
||||||
int flags = ip->bw_flags; // extra flags
|
int flags = ip->bw_flags; // extra flags
|
||||||
|
|
||||||
|
int converted = len;
|
||||||
|
int remaining = 0;
|
||||||
|
|
||||||
// Skip conversion when writing the BOM.
|
// Skip conversion when writing the BOM.
|
||||||
if (!(flags & FIO_NOCONVERT)) {
|
if (!(flags & FIO_NOCONVERT)) {
|
||||||
if (buf_write_convert(ip, &buf, &len) == FAIL) {
|
if ((converted = buf_write_convert(ip, &buf, &len)) < 0) {
|
||||||
|
return FAIL;
|
||||||
|
}
|
||||||
|
|
||||||
|
remaining = ip->bw_len - converted;
|
||||||
|
}
|
||||||
|
|
||||||
|
ip->bw_len = remaining;
|
||||||
|
|
||||||
|
// Skip writing while checking conversion
|
||||||
|
if (ip->bw_fd >= 0) {
|
||||||
|
int wlen = (int)write_eintr(ip->bw_fd, buf, (size_t)len);
|
||||||
|
if (wlen < len) {
|
||||||
return FAIL;
|
return FAIL;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (ip->bw_fd < 0) {
|
if (remaining > 0) {
|
||||||
// Only checking conversion, which is OK if we get here.
|
memmove(ip->bw_buf, ip->bw_buf + converted, (size_t)remaining);
|
||||||
return OK;
|
|
||||||
}
|
}
|
||||||
int wlen = (int)write_eintr(ip->bw_fd, buf, (size_t)len);
|
|
||||||
return (wlen < len) ? FAIL : OK;
|
return OK;
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Check modification time of file, before writing to it.
|
/// Check modification time of file, before writing to it.
|
||||||
@@ -1054,7 +1010,6 @@ int buf_write(buf_T *buf, char *fname, char *sfname, linenr_T start, linenr_T en
|
|||||||
write_info.bw_conv_buf = NULL;
|
write_info.bw_conv_buf = NULL;
|
||||||
write_info.bw_conv_error = false;
|
write_info.bw_conv_error = false;
|
||||||
write_info.bw_conv_error_lnum = 0;
|
write_info.bw_conv_error_lnum = 0;
|
||||||
write_info.bw_restlen = 0;
|
|
||||||
write_info.bw_iconv_fd = (iconv_t)-1;
|
write_info.bw_iconv_fd = (iconv_t)-1;
|
||||||
|
|
||||||
// After writing a file changedtick changes but we don't want to display
|
// After writing a file changedtick changes but we don't want to display
|
||||||
@@ -1269,13 +1224,11 @@ int buf_write(buf_T *buf, char *fname, char *sfname, linenr_T start, linenr_T en
|
|||||||
if (converted) {
|
if (converted) {
|
||||||
wb_flags = get_fio_flags(fenc);
|
wb_flags = get_fio_flags(fenc);
|
||||||
if (wb_flags & (FIO_UCS2 | FIO_UCS4 | FIO_UTF16 | FIO_UTF8)) {
|
if (wb_flags & (FIO_UCS2 | FIO_UCS4 | FIO_UTF16 | FIO_UTF8)) {
|
||||||
// overallocate a bit, in case we read incomplete multi-byte chars
|
|
||||||
int size = bufsize + CONV_RESTLEN;
|
|
||||||
// Need to allocate a buffer to translate into.
|
// Need to allocate a buffer to translate into.
|
||||||
if (wb_flags & (FIO_UCS2 | FIO_UTF16 | FIO_UTF8)) {
|
if (wb_flags & (FIO_UCS2 | FIO_UTF16 | FIO_UTF8)) {
|
||||||
write_info.bw_conv_buflen = (size_t)size * 2;
|
write_info.bw_conv_buflen = (size_t)bufsize * 2;
|
||||||
} else { // FIO_UCS4
|
} else { // FIO_UCS4
|
||||||
write_info.bw_conv_buflen = (size_t)size * 4;
|
write_info.bw_conv_buflen = (size_t)bufsize * 4;
|
||||||
}
|
}
|
||||||
write_info.bw_conv_buf = verbose_try_malloc(write_info.bw_conv_buflen);
|
write_info.bw_conv_buf = verbose_try_malloc(write_info.bw_conv_buflen);
|
||||||
if (!write_info.bw_conv_buf) {
|
if (!write_info.bw_conv_buf) {
|
||||||
@@ -1477,11 +1430,10 @@ restore_backup:
|
|||||||
sha256_start(&sha_ctx);
|
sha256_start(&sha_ctx);
|
||||||
}
|
}
|
||||||
|
|
||||||
write_info.bw_len = bufsize;
|
write_info.bw_len = 0;
|
||||||
write_info.bw_flags = wb_flags;
|
write_info.bw_flags = wb_flags;
|
||||||
fileformat = get_fileformat_force(buf, eap);
|
fileformat = get_fileformat_force(buf, eap);
|
||||||
char *s = buffer;
|
char *s = buffer;
|
||||||
int len = 0;
|
|
||||||
for (lnum = start; lnum <= end; lnum++) {
|
for (lnum = start; lnum <= end; lnum++) {
|
||||||
// The next while loop is done once for each character written.
|
// The next while loop is done once for each character written.
|
||||||
// Keep it fast!
|
// Keep it fast!
|
||||||
@@ -1499,16 +1451,15 @@ restore_backup:
|
|||||||
*s = c;
|
*s = c;
|
||||||
}
|
}
|
||||||
s++;
|
s++;
|
||||||
if (++len != bufsize) {
|
if (++write_info.bw_len != bufsize) {
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
if (buf_write_bytes(&write_info) == FAIL) {
|
if (buf_write_bytes(&write_info) == FAIL) {
|
||||||
end = 0; // write error: break loop
|
end = 0; // write error: break loop
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
nchars += bufsize;
|
nchars += bufsize - write_info.bw_len;
|
||||||
s = buffer;
|
s = buffer + write_info.bw_len;
|
||||||
len = 0;
|
|
||||||
write_info.bw_start_lnum = lnum;
|
write_info.bw_start_lnum = lnum;
|
||||||
}
|
}
|
||||||
// write failed or last line has no EOL: stop here
|
// write failed or last line has no EOL: stop here
|
||||||
@@ -1526,26 +1477,24 @@ restore_backup:
|
|||||||
} else {
|
} else {
|
||||||
*s++ = CAR; // EOL_MAC or EOL_DOS: write CR
|
*s++ = CAR; // EOL_MAC or EOL_DOS: write CR
|
||||||
if (fileformat == EOL_DOS) { // write CR-NL
|
if (fileformat == EOL_DOS) { // write CR-NL
|
||||||
if (++len == bufsize) {
|
if (++write_info.bw_len == bufsize) {
|
||||||
if (buf_write_bytes(&write_info) == FAIL) {
|
if (buf_write_bytes(&write_info) == FAIL) {
|
||||||
end = 0; // write error: break loop
|
end = 0; // write error: break loop
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
nchars += bufsize;
|
nchars += bufsize - write_info.bw_len;
|
||||||
s = buffer;
|
s = buffer + write_info.bw_len;
|
||||||
len = 0;
|
|
||||||
}
|
}
|
||||||
*s++ = NL;
|
*s++ = NL;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if (++len == bufsize) {
|
if (++write_info.bw_len == bufsize) {
|
||||||
if (buf_write_bytes(&write_info) == FAIL) {
|
if (buf_write_bytes(&write_info) == FAIL) {
|
||||||
end = 0; // Write error: break loop.
|
end = 0; // Write error: break loop.
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
nchars += bufsize;
|
nchars += bufsize - write_info.bw_len;
|
||||||
s = buffer;
|
s = buffer + write_info.bw_len;
|
||||||
len = 0;
|
|
||||||
|
|
||||||
os_breakcheck();
|
os_breakcheck();
|
||||||
if (got_int) {
|
if (got_int) {
|
||||||
@@ -1554,12 +1503,19 @@ restore_backup:
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if (len > 0 && end > 0) {
|
if (write_info.bw_len > 0 && end > 0) {
|
||||||
write_info.bw_len = len;
|
int remaining = write_info.bw_len;
|
||||||
if (buf_write_bytes(&write_info) == FAIL) {
|
if (buf_write_bytes(&write_info) == FAIL) {
|
||||||
end = 0; // write error
|
end = 0; // write error
|
||||||
}
|
}
|
||||||
nchars += len;
|
nchars += remaining - write_info.bw_len;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Did we convert & write everything?
|
||||||
|
if (end != 0 && write_info.bw_len > 0) {
|
||||||
|
write_info.bw_conv_error = true;
|
||||||
|
write_info.bw_conv_error_lnum = end;
|
||||||
|
end = 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (!buf->b_p_fixeol && buf->b_p_eof) {
|
if (!buf->b_p_fixeol && buf->b_p_eof) {
|
||||||
|
|||||||
@@ -11,6 +11,7 @@ local api = n.api
|
|||||||
local skip = t.skip
|
local skip = t.skip
|
||||||
local is_os = t.is_os
|
local is_os = t.is_os
|
||||||
local is_ci = t.is_ci
|
local is_ci = t.is_ci
|
||||||
|
local read_file = t.read_file
|
||||||
|
|
||||||
local fname = 'Xtest-functional-ex_cmds-write'
|
local fname = 'Xtest-functional-ex_cmds-write'
|
||||||
local fname_bak = fname .. '~'
|
local fname_bak = fname .. '~'
|
||||||
@@ -181,6 +182,99 @@ describe(':write', function()
|
|||||||
vim.uv.fs_symlink(fname_bak .. ('/xxxxx'):rep(20), fname)
|
vim.uv.fs_symlink(fname_bak .. ('/xxxxx'):rep(20), fname)
|
||||||
eq("Vim(write):E166: Can't open linked file for writing", pcall_err(command, 'write!'))
|
eq("Vim(write):E166: Can't open linked file for writing", pcall_err(command, 'write!'))
|
||||||
end)
|
end)
|
||||||
|
|
||||||
|
it('fails converting a trailing incomplete sequence', function()
|
||||||
|
-- From https://github.com/neovim/neovim/issues/36990, an invalid UTF-8 sequence at the end of
|
||||||
|
-- the file during conversion testing can overwrite the rest of the file during the real
|
||||||
|
-- conversion.
|
||||||
|
|
||||||
|
api.nvim_buf_set_lines(0, 0, 1, true, { 'line 1', 'line 2', 'aaabbb\235\128' })
|
||||||
|
command('set noendofline nofixendofline')
|
||||||
|
|
||||||
|
eq(
|
||||||
|
"Vim(write):E513: Write error, conversion failed in line 3 (make 'fenc' empty to override)",
|
||||||
|
pcall_err(command, 'write ++enc=latin1 ' .. fname)
|
||||||
|
)
|
||||||
|
end)
|
||||||
|
|
||||||
|
it('converts to latin1 with an invalid sequence at buffer boundary', function()
|
||||||
|
-- From https://github.com/neovim/neovim/issues/36990, an invalid UTF-8 sequence that falls
|
||||||
|
-- right at the end of the 8 KiB buffer used for encoding conversions causes subsequent data to
|
||||||
|
-- be overwritten.
|
||||||
|
|
||||||
|
local content = string.rep('a', 1024 * 8 - 1) .. '\251' .. string.rep('b', 20)
|
||||||
|
api.nvim_buf_set_lines(0, 0, 1, true, { content })
|
||||||
|
command('set noendofline nofixendofline fenc=latin1')
|
||||||
|
command('write ' .. fname)
|
||||||
|
|
||||||
|
local tail = string.sub(read_file(fname) or '', -10)
|
||||||
|
eq('bbbbbbbbbb', tail)
|
||||||
|
end)
|
||||||
|
|
||||||
|
it('converts to CP1251 with iconv', function()
|
||||||
|
api.nvim_buf_set_lines(
|
||||||
|
0,
|
||||||
|
0,
|
||||||
|
1,
|
||||||
|
true,
|
||||||
|
{ 'Привет, мир!', 'Это простой тест.' }
|
||||||
|
)
|
||||||
|
command('write ++enc=cp1251 ++ff=unix ' .. fname)
|
||||||
|
|
||||||
|
eq(
|
||||||
|
'\207\240\232\226\229\242, \236\232\240!\n'
|
||||||
|
.. '\221\242\238 \239\240\238\241\242\238\233 \242\229\241\242.\n',
|
||||||
|
read_file(fname)
|
||||||
|
)
|
||||||
|
end)
|
||||||
|
|
||||||
|
it('converts to GB18030 with iconv', function()
|
||||||
|
api.nvim_buf_set_lines(0, 0, 1, true, { '你好,世界!', '这是一个测试。' })
|
||||||
|
command('write ++enc=gb18030 ++ff=unix ' .. fname)
|
||||||
|
|
||||||
|
eq(
|
||||||
|
'\196\227\186\195\163\172\202\192\189\231\163\161\n'
|
||||||
|
.. '\213\226\202\199\210\187\184\246\178\226\202\212\161\163\n',
|
||||||
|
read_file(fname)
|
||||||
|
)
|
||||||
|
end)
|
||||||
|
|
||||||
|
it('converts to Shift_JIS with iconv', function()
|
||||||
|
api.nvim_buf_set_lines(
|
||||||
|
0,
|
||||||
|
0,
|
||||||
|
1,
|
||||||
|
true,
|
||||||
|
{ 'こんにちは、世界!', 'これはテストです。' }
|
||||||
|
)
|
||||||
|
command('write ++enc=sjis ++ff=unix ' .. fname)
|
||||||
|
|
||||||
|
eq(
|
||||||
|
'\130\177\130\241\130\201\130\191\130\205\129A\144\162\138E\129I\n'
|
||||||
|
.. '\130\177\130\234\130\205\131e\131X\131g\130\197\130\183\129B\n',
|
||||||
|
read_file(fname)
|
||||||
|
)
|
||||||
|
end)
|
||||||
|
|
||||||
|
it('fails converting an illegal sequence with iconv', function()
|
||||||
|
api.nvim_buf_set_lines(0, 0, 1, true, { 'line 1', 'aaa\128bbb' })
|
||||||
|
|
||||||
|
eq(
|
||||||
|
"Vim(write):E513: Write error, conversion failed (make 'fenc' empty to override)",
|
||||||
|
pcall_err(command, 'write ++enc=cp1251 ' .. fname)
|
||||||
|
)
|
||||||
|
end)
|
||||||
|
|
||||||
|
it('handles a multi-byte sequence crossing the buffer boundary converting with iconv', function()
|
||||||
|
local content = string.rep('a', 1024 * 8 - 1) .. 'Дbbbbb'
|
||||||
|
api.nvim_buf_set_lines(0, 0, 1, true, { content })
|
||||||
|
-- Skip the backup so we're testing the "checking" phase also.
|
||||||
|
command('set nowritebackup')
|
||||||
|
command('write ++enc=cp1251 ++ff=unix ' .. fname)
|
||||||
|
|
||||||
|
local expected = string.rep('a', 1024 * 8 - 1) .. '\196bbbbb\n'
|
||||||
|
eq(expected, read_file(fname))
|
||||||
|
end)
|
||||||
end)
|
end)
|
||||||
|
|
||||||
describe(':update', function()
|
describe(':update', function()
|
||||||
|
|||||||
Reference in New Issue
Block a user