mirror of
https://github.com/neovim/neovim.git
synced 2025-09-27 13:38:34 +00:00
eval/encode: Fix non-utf-8 &encoding handling, add tests
This commit is contained in:
@@ -882,11 +882,11 @@ static inline int convert_to_json_string(garray_T *const gap,
|
|||||||
const size_t len)
|
const size_t len)
|
||||||
FUNC_ATTR_NONNULL_ARG(1) FUNC_ATTR_ALWAYS_INLINE
|
FUNC_ATTR_NONNULL_ARG(1) FUNC_ATTR_ALWAYS_INLINE
|
||||||
{
|
{
|
||||||
const char *buf_ = buf;
|
const char *utf_buf = buf;
|
||||||
if (buf_ == NULL) {
|
if (utf_buf == NULL) {
|
||||||
ga_concat(gap, "\"\"");
|
ga_concat(gap, "\"\"");
|
||||||
} else {
|
} else {
|
||||||
size_t len_ = len;
|
size_t utf_len = len;
|
||||||
char *tofree = NULL;
|
char *tofree = NULL;
|
||||||
if (last_p_enc != (const void *) p_enc) {
|
if (last_p_enc != (const void *) p_enc) {
|
||||||
p_enc_conv.vc_type = CONV_NONE;
|
p_enc_conv.vc_type = CONV_NONE;
|
||||||
@@ -895,17 +895,28 @@ static inline int convert_to_json_string(garray_T *const gap,
|
|||||||
last_p_enc = p_enc;
|
last_p_enc = p_enc;
|
||||||
}
|
}
|
||||||
if (p_enc_conv.vc_type != CONV_NONE) {
|
if (p_enc_conv.vc_type != CONV_NONE) {
|
||||||
tofree = string_convert(&p_enc_conv, buf_, &len_);
|
tofree = string_convert(&p_enc_conv, buf, &utf_len);
|
||||||
if (tofree == NULL) {
|
if (tofree == NULL) {
|
||||||
EMSG2(_("E474: Failed to convert string \"%s\" to UTF-8"), buf_);
|
EMSG2(_("E474: Failed to convert string \"%s\" to UTF-8"), utf_buf);
|
||||||
return FAIL;
|
return FAIL;
|
||||||
}
|
}
|
||||||
buf_ = tofree;
|
utf_buf = tofree;
|
||||||
}
|
}
|
||||||
size_t str_len = 0;
|
size_t str_len = 0;
|
||||||
for (size_t i = 0; i < len_;) {
|
// Encode character as \u0000 if
|
||||||
const int ch = utf_ptr2char(buf + i);
|
// 1. It is an ASCII control character (0x0 .. 0x1F, 0x7F).
|
||||||
const size_t shift = (ch == 0? 1: utf_ptr2len(buf + i));
|
// 2. &encoding is not UTF-8 and code point is above 0x7F.
|
||||||
|
// 3. &encoding is UTF-8 and code point is not printable according to
|
||||||
|
// utf_printable().
|
||||||
|
// This is done to make it possible to :echo values when &encoding is not
|
||||||
|
// UTF-8.
|
||||||
|
#define ENCODE_RAW(p_enc_conv, ch) \
|
||||||
|
(ch >= 0x20 && (p_enc_conv.vc_type == CONV_NONE \
|
||||||
|
? utf_printable(ch) \
|
||||||
|
: ch < 0x7F))
|
||||||
|
for (size_t i = 0; i < utf_len;) {
|
||||||
|
const int ch = utf_ptr2char(utf_buf + i);
|
||||||
|
const size_t shift = (ch == 0? 1: utf_ptr2len(utf_buf + i));
|
||||||
assert(shift > 0);
|
assert(shift > 0);
|
||||||
i += shift;
|
i += shift;
|
||||||
switch (ch) {
|
switch (ch) {
|
||||||
@@ -922,14 +933,14 @@ static inline int convert_to_json_string(garray_T *const gap,
|
|||||||
default: {
|
default: {
|
||||||
if (ch > 0x7F && shift == 1) {
|
if (ch > 0x7F && shift == 1) {
|
||||||
EMSG2(_("E474: String \"%s\" contains byte that does not start any "
|
EMSG2(_("E474: String \"%s\" contains byte that does not start any "
|
||||||
"UTF-8 character"), buf_);
|
"UTF-8 character"), utf_buf);
|
||||||
return FAIL;
|
return FAIL;
|
||||||
} else if ((0xD800 <= ch && ch <= 0xDB7F)
|
} else if ((0xD800 <= ch && ch <= 0xDB7F)
|
||||||
|| (0xDC00 <= ch && ch <= 0xDFFF)) {
|
|| (0xDC00 <= ch && ch <= 0xDFFF)) {
|
||||||
EMSG2(_("E474: UTF-8 string contains code point which belongs "
|
EMSG2(_("E474: UTF-8 string contains code point which belongs "
|
||||||
"to surrogate pairs"), buf_);
|
"to surrogate pairs: %s"), utf_buf + i);
|
||||||
return FAIL;
|
return FAIL;
|
||||||
} else if (vim_isprintc(ch)) {
|
} else if (ENCODE_RAW(p_enc_conv, ch)) {
|
||||||
str_len += shift;
|
str_len += shift;
|
||||||
} else {
|
} else {
|
||||||
str_len += ((sizeof("\\u1234") - 1) * (size_t) (1 + (ch > 0xFFFF)));
|
str_len += ((sizeof("\\u1234") - 1) * (size_t) (1 + (ch > 0xFFFF)));
|
||||||
@@ -940,12 +951,12 @@ static inline int convert_to_json_string(garray_T *const gap,
|
|||||||
}
|
}
|
||||||
ga_append(gap, '"');
|
ga_append(gap, '"');
|
||||||
ga_grow(gap, (int) str_len);
|
ga_grow(gap, (int) str_len);
|
||||||
for (size_t i = 0; i < len_;) {
|
for (size_t i = 0; i < utf_len;) {
|
||||||
const int ch = utf_ptr2char(buf + i);
|
const int ch = utf_ptr2char(utf_buf + i);
|
||||||
const size_t shift = (ch == 0? 1: utf_char2len(ch));
|
const size_t shift = (ch == 0? 1: utf_char2len(ch));
|
||||||
assert(shift > 0);
|
assert(shift > 0);
|
||||||
// Is false on invalid unicode, but this should already be handled.
|
// Is false on invalid unicode, but this should already be handled.
|
||||||
assert(ch == 0 || shift == utf_ptr2len(buf + i));
|
assert(ch == 0 || shift == utf_ptr2len(utf_buf + i));
|
||||||
switch (ch) {
|
switch (ch) {
|
||||||
case BS:
|
case BS:
|
||||||
case TAB:
|
case TAB:
|
||||||
@@ -958,8 +969,8 @@ static inline int convert_to_json_string(garray_T *const gap,
|
|||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
default: {
|
default: {
|
||||||
if (vim_isprintc(ch)) {
|
if (ENCODE_RAW(p_enc_conv, ch)) {
|
||||||
ga_concat_len(gap, buf + i, shift);
|
ga_concat_len(gap, utf_buf + i, shift);
|
||||||
} else if (ch < SURROGATE_FIRST_CHAR) {
|
} else if (ch < SURROGATE_FIRST_CHAR) {
|
||||||
ga_concat_len(gap, ((const char[]) {
|
ga_concat_len(gap, ((const char[]) {
|
||||||
'\\', 'u',
|
'\\', 'u',
|
||||||
|
@@ -8,8 +8,8 @@ local execute = helpers.execute
|
|||||||
local exc_exec = helpers.exc_exec
|
local exc_exec = helpers.exc_exec
|
||||||
|
|
||||||
describe('jsondecode() function', function()
|
describe('jsondecode() function', function()
|
||||||
before_each(function()
|
local restart = function(cmd)
|
||||||
clear()
|
clear(cmd)
|
||||||
execute([[
|
execute([[
|
||||||
function Eq(exp, act)
|
function Eq(exp, act)
|
||||||
let act = a:act
|
let act = a:act
|
||||||
@@ -53,7 +53,8 @@ describe('jsondecode() function', function()
|
|||||||
endif
|
endif
|
||||||
endfunction
|
endfunction
|
||||||
]])
|
]])
|
||||||
end)
|
end
|
||||||
|
before_each(restart)
|
||||||
|
|
||||||
local speq = function(expected, actual_expr)
|
local speq = function(expected, actual_expr)
|
||||||
eq(1, funcs.EvalEq(expected, actual_expr))
|
eq(1, funcs.EvalEq(expected, actual_expr))
|
||||||
@@ -396,6 +397,7 @@ describe('jsondecode() function', function()
|
|||||||
it('parses strings with NUL properly', function()
|
it('parses strings with NUL properly', function()
|
||||||
sp_decode_eq({_TYPE='string', _VAL={'\n'}}, '"\\u0000"')
|
sp_decode_eq({_TYPE='string', _VAL={'\n'}}, '"\\u0000"')
|
||||||
sp_decode_eq({_TYPE='string', _VAL={'\n', '\n'}}, '"\\u0000\\n\\u0000"')
|
sp_decode_eq({_TYPE='string', _VAL={'\n', '\n'}}, '"\\u0000\\n\\u0000"')
|
||||||
|
sp_decode_eq({_TYPE='string', _VAL={'\n«\n'}}, '"\\u0000\\u00AB\\u0000"')
|
||||||
end)
|
end)
|
||||||
|
|
||||||
it('parses dictionaries with duplicate keys to special maps', function()
|
it('parses dictionaries with duplicate keys to special maps', function()
|
||||||
@@ -436,6 +438,12 @@ describe('jsondecode() function', function()
|
|||||||
sp_decode_eq({_TYPE='map', _VAL={{'b', 3}, {'a', 1}, {'c', 4}, {'d', 2}, {{_TYPE='string', _VAL={'\n'}}, 4}}},
|
sp_decode_eq({_TYPE='map', _VAL={{'b', 3}, {'a', 1}, {'c', 4}, {'d', 2}, {{_TYPE='string', _VAL={'\n'}}, 4}}},
|
||||||
'{"b": 3, "a": 1, "c": 4, "d": 2, "\\u0000": 4}')
|
'{"b": 3, "a": 1, "c": 4, "d": 2, "\\u0000": 4}')
|
||||||
end)
|
end)
|
||||||
|
|
||||||
|
it('converts strings to latin1 when &encoding is latin1', function()
|
||||||
|
restart('set encoding=latin1')
|
||||||
|
eq('\xAB', funcs.jsondecode('"\\u00AB"'))
|
||||||
|
sp_decode_eq({_TYPE='string', _VAL={'\n\xAB\n'}}, '"\\u0000\\u00AB\\u0000"')
|
||||||
|
end)
|
||||||
end)
|
end)
|
||||||
|
|
||||||
describe('jsonencode() function', function()
|
describe('jsonencode() function', function()
|
||||||
@@ -447,6 +455,7 @@ describe('jsonencode() function', function()
|
|||||||
eq('"\\t"', funcs.jsonencode('\t'))
|
eq('"\\t"', funcs.jsonencode('\t'))
|
||||||
eq('"\\n"', funcs.jsonencode('\n'))
|
eq('"\\n"', funcs.jsonencode('\n'))
|
||||||
eq('"\\u001B"', funcs.jsonencode('\27'))
|
eq('"\\u001B"', funcs.jsonencode('\27'))
|
||||||
|
eq('"þÿþ"', funcs.jsonencode('þÿþ'))
|
||||||
end)
|
end)
|
||||||
|
|
||||||
it('dumps numbers', function()
|
it('dumps numbers', function()
|
||||||
@@ -642,4 +651,16 @@ describe('jsonencode() function', function()
|
|||||||
eq('Vim(call):E118: Too many arguments for function: jsonencode',
|
eq('Vim(call):E118: Too many arguments for function: jsonencode',
|
||||||
exc_exec('call jsonencode(["", ""], 1)'))
|
exc_exec('call jsonencode(["", ""], 1)'))
|
||||||
end)
|
end)
|
||||||
|
|
||||||
|
it('converts strings from latin1 when &encoding is latin1', function()
|
||||||
|
clear('set encoding=latin1')
|
||||||
|
eq('"\\u00AB"', funcs.jsonencode('\xAB'))
|
||||||
|
eq('"\\u0000\\u00AB\\u0000"', eval('jsonencode({"_TYPE": v:msgpack_types.string, "_VAL": ["\\n\xAB\\n"]})'))
|
||||||
|
end)
|
||||||
|
|
||||||
|
it('ignores improper values in &isprint', function()
|
||||||
|
meths.set_option('isprint', '1')
|
||||||
|
eq(1, eval('"\x01" =~# "\\\\p"'))
|
||||||
|
eq('"\\u0001"', funcs.jsonencode('\x01'))
|
||||||
|
end)
|
||||||
end)
|
end)
|
||||||
|
Reference in New Issue
Block a user