eval/encode: Fix invalid UTF-8 strings handling:

1. Do not allow reading past buffer end when creating error messages.
2. Fix surrogate pairs range, avoid magic constants.
This commit is contained in:
ZyX
2016-02-11 01:29:09 +03:00
parent 77776b09c6
commit f0bd4a1494
3 changed files with 38 additions and 11 deletions

View File

@@ -895,7 +895,8 @@ static inline int convert_to_json_string(garray_T *const gap,
if (p_enc_conv.vc_type != CONV_NONE) { if (p_enc_conv.vc_type != CONV_NONE) {
tofree = string_convert(&p_enc_conv, buf, &utf_len); tofree = string_convert(&p_enc_conv, buf, &utf_len);
if (tofree == NULL) { if (tofree == NULL) {
EMSG2(_("E474: Failed to convert string \"%s\" to UTF-8"), utf_buf); emsgf(_("E474: Failed to convert string \"%.*s\" to UTF-8"),
utf_len, utf_buf);
return FAIL; return FAIL;
} }
utf_buf = tofree; utf_buf = tofree;
@@ -930,18 +931,21 @@ static inline int convert_to_json_string(garray_T *const gap,
} }
default: { default: {
if (ch > 0x7F && shift == 1) { if (ch > 0x7F && shift == 1) {
EMSG2(_("E474: String \"%s\" contains byte that does not start any " emsgf(_("E474: String \"%.*s\" contains byte that does not start "
"UTF-8 character"), utf_buf); "any UTF-8 character"),
utf_len - (i - shift), utf_buf + i - shift);
return FAIL; return FAIL;
} else if ((0xD800 <= ch && ch <= 0xDB7F) } else if ((SURROGATE_HI_START <= ch && ch <= SURROGATE_HI_END)
|| (0xDC00 <= ch && ch <= 0xDFFF)) { || (SURROGATE_LO_START <= ch && ch <= SURROGATE_LO_END)) {
EMSG2(_("E474: UTF-8 string contains code point which belongs " emsgf(_("E474: UTF-8 string contains code point which belongs "
"to surrogate pairs: %s"), utf_buf + i); "to a surrogate pair: %.*s"),
utf_len - (i - shift), utf_buf + i - shift);
return FAIL; return FAIL;
} else if (ENCODE_RAW(p_enc_conv, ch)) { } else if (ENCODE_RAW(p_enc_conv, ch)) {
str_len += shift; str_len += shift;
} else { } else {
str_len += ((sizeof("\\u1234") - 1) * (size_t) (1 + (ch > 0xFFFF))); str_len += ((sizeof("\\u1234") - 1)
* (size_t) (1 + (ch >= SURROGATE_FIRST_CHAR)));
} }
break; break;
} }

View File

@@ -609,6 +609,21 @@ int emsgu(char_u *s, uint64_t n)
return emsg(IObuff); return emsg(IObuff);
} }
/// Print an error message with unknown number of arguments
bool emsgf(const char *const fmt, ...)
{
if (emsg_not_now()) {
return true;
}
va_list ap;
va_start(ap, fmt);
vim_vsnprintf((char *) IObuff, IOSIZE, fmt, ap, NULL);
va_end(ap);
return emsg(IObuff);
}
/* /*
* Like msg(), but truncate to a single line if p_shm contains 't', or when * Like msg(), but truncate to a single line if p_shm contains 't', or when
* "force" is TRUE. This truncates in another way as for normal messages. * "force" is TRUE. This truncates in another way as for normal messages.
@@ -3097,11 +3112,12 @@ int vim_snprintf(char *str, size_t str_m, char *fmt, ...)
return str_l; return str_l;
} }
int vim_vsnprintf(char *str, size_t str_m, char *fmt, va_list ap, typval_T *tvs) int vim_vsnprintf(char *str, size_t str_m, const char *fmt, va_list ap,
typval_T *tvs)
{ {
size_t str_l = 0; size_t str_l = 0;
bool str_avail = str_l < str_m; bool str_avail = str_l < str_m;
char *p = fmt; const char *p = fmt;
int arg_idx = 1; int arg_idx = 1;
if (!p) { if (!p) {
@@ -3135,7 +3151,7 @@ int vim_vsnprintf(char *str, size_t str_m, char *fmt, va_list ap, typval_T *tvs)
char tmp[TMP_LEN]; char tmp[TMP_LEN];
// string address in case of string argument // string address in case of string argument
char *str_arg; const char *str_arg;
// natural field width of arg without padding and sign // natural field width of arg without padding and sign
size_t str_arg_l; size_t str_arg_l;

View File

@@ -663,4 +663,11 @@ describe('jsonencode() function', function()
eq(1, eval('"\x01" =~# "\\\\p"')) eq(1, eval('"\x01" =~# "\\\\p"'))
eq('"\\u0001"', funcs.jsonencode('\x01')) eq('"\\u0001"', funcs.jsonencode('\x01'))
end) end)
it('fails when using surrogate character in a UTF-8 string', function()
eq('Vim(call):E474: UTF-8 string contains code point which belongs to a surrogate pair: \xED\xA0\x80',
exc_exec('call jsonencode("\xED\xA0\x80")'))
eq('Vim(call):E474: UTF-8 string contains code point which belongs to a surrogate pair: \xED\xAF\xBF',
exc_exec('call jsonencode("\xED\xAF\xBF")'))
end)
end) end)