mirror of
https://github.com/neovim/neovim.git
synced 2025-09-26 21:18:34 +00:00
eval/encode: Fix invalid UTF-8 strings handling:
1. Do not allow reading past buffer end when creating error messages. 2. Fix surrogate pairs range, avoid magic constants.
This commit is contained in:
@@ -895,7 +895,8 @@ static inline int convert_to_json_string(garray_T *const gap,
|
|||||||
if (p_enc_conv.vc_type != CONV_NONE) {
|
if (p_enc_conv.vc_type != CONV_NONE) {
|
||||||
tofree = string_convert(&p_enc_conv, buf, &utf_len);
|
tofree = string_convert(&p_enc_conv, buf, &utf_len);
|
||||||
if (tofree == NULL) {
|
if (tofree == NULL) {
|
||||||
EMSG2(_("E474: Failed to convert string \"%s\" to UTF-8"), utf_buf);
|
emsgf(_("E474: Failed to convert string \"%.*s\" to UTF-8"),
|
||||||
|
utf_len, utf_buf);
|
||||||
return FAIL;
|
return FAIL;
|
||||||
}
|
}
|
||||||
utf_buf = tofree;
|
utf_buf = tofree;
|
||||||
@@ -930,18 +931,21 @@ static inline int convert_to_json_string(garray_T *const gap,
|
|||||||
}
|
}
|
||||||
default: {
|
default: {
|
||||||
if (ch > 0x7F && shift == 1) {
|
if (ch > 0x7F && shift == 1) {
|
||||||
EMSG2(_("E474: String \"%s\" contains byte that does not start any "
|
emsgf(_("E474: String \"%.*s\" contains byte that does not start "
|
||||||
"UTF-8 character"), utf_buf);
|
"any UTF-8 character"),
|
||||||
|
utf_len - (i - shift), utf_buf + i - shift);
|
||||||
return FAIL;
|
return FAIL;
|
||||||
} else if ((0xD800 <= ch && ch <= 0xDB7F)
|
} else if ((SURROGATE_HI_START <= ch && ch <= SURROGATE_HI_END)
|
||||||
|| (0xDC00 <= ch && ch <= 0xDFFF)) {
|
|| (SURROGATE_LO_START <= ch && ch <= SURROGATE_LO_END)) {
|
||||||
EMSG2(_("E474: UTF-8 string contains code point which belongs "
|
emsgf(_("E474: UTF-8 string contains code point which belongs "
|
||||||
"to surrogate pairs: %s"), utf_buf + i);
|
"to a surrogate pair: %.*s"),
|
||||||
|
utf_len - (i - shift), utf_buf + i - shift);
|
||||||
return FAIL;
|
return FAIL;
|
||||||
} else if (ENCODE_RAW(p_enc_conv, ch)) {
|
} else if (ENCODE_RAW(p_enc_conv, ch)) {
|
||||||
str_len += shift;
|
str_len += shift;
|
||||||
} else {
|
} else {
|
||||||
str_len += ((sizeof("\\u1234") - 1) * (size_t) (1 + (ch > 0xFFFF)));
|
str_len += ((sizeof("\\u1234") - 1)
|
||||||
|
* (size_t) (1 + (ch >= SURROGATE_FIRST_CHAR)));
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
@@ -609,6 +609,21 @@ int emsgu(char_u *s, uint64_t n)
|
|||||||
return emsg(IObuff);
|
return emsg(IObuff);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Print an error message with unknown number of arguments
|
||||||
|
bool emsgf(const char *const fmt, ...)
|
||||||
|
{
|
||||||
|
if (emsg_not_now()) {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
va_list ap;
|
||||||
|
va_start(ap, fmt);
|
||||||
|
vim_vsnprintf((char *) IObuff, IOSIZE, fmt, ap, NULL);
|
||||||
|
va_end(ap);
|
||||||
|
|
||||||
|
return emsg(IObuff);
|
||||||
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Like msg(), but truncate to a single line if p_shm contains 't', or when
|
* Like msg(), but truncate to a single line if p_shm contains 't', or when
|
||||||
* "force" is TRUE. This truncates in another way as for normal messages.
|
* "force" is TRUE. This truncates in another way as for normal messages.
|
||||||
@@ -3097,11 +3112,12 @@ int vim_snprintf(char *str, size_t str_m, char *fmt, ...)
|
|||||||
return str_l;
|
return str_l;
|
||||||
}
|
}
|
||||||
|
|
||||||
int vim_vsnprintf(char *str, size_t str_m, char *fmt, va_list ap, typval_T *tvs)
|
int vim_vsnprintf(char *str, size_t str_m, const char *fmt, va_list ap,
|
||||||
|
typval_T *tvs)
|
||||||
{
|
{
|
||||||
size_t str_l = 0;
|
size_t str_l = 0;
|
||||||
bool str_avail = str_l < str_m;
|
bool str_avail = str_l < str_m;
|
||||||
char *p = fmt;
|
const char *p = fmt;
|
||||||
int arg_idx = 1;
|
int arg_idx = 1;
|
||||||
|
|
||||||
if (!p) {
|
if (!p) {
|
||||||
@@ -3135,7 +3151,7 @@ int vim_vsnprintf(char *str, size_t str_m, char *fmt, va_list ap, typval_T *tvs)
|
|||||||
char tmp[TMP_LEN];
|
char tmp[TMP_LEN];
|
||||||
|
|
||||||
// string address in case of string argument
|
// string address in case of string argument
|
||||||
char *str_arg;
|
const char *str_arg;
|
||||||
|
|
||||||
// natural field width of arg without padding and sign
|
// natural field width of arg without padding and sign
|
||||||
size_t str_arg_l;
|
size_t str_arg_l;
|
||||||
|
@@ -663,4 +663,11 @@ describe('jsonencode() function', function()
|
|||||||
eq(1, eval('"\x01" =~# "\\\\p"'))
|
eq(1, eval('"\x01" =~# "\\\\p"'))
|
||||||
eq('"\\u0001"', funcs.jsonencode('\x01'))
|
eq('"\\u0001"', funcs.jsonencode('\x01'))
|
||||||
end)
|
end)
|
||||||
|
|
||||||
|
it('fails when using surrogate character in a UTF-8 string', function()
|
||||||
|
eq('Vim(call):E474: UTF-8 string contains code point which belongs to a surrogate pair: \xED\xA0\x80',
|
||||||
|
exc_exec('call jsonencode("\xED\xA0\x80")'))
|
||||||
|
eq('Vim(call):E474: UTF-8 string contains code point which belongs to a surrogate pair: \xED\xAF\xBF',
|
||||||
|
exc_exec('call jsonencode("\xED\xAF\xBF")'))
|
||||||
|
end)
|
||||||
end)
|
end)
|
||||||
|
Reference in New Issue
Block a user