mirror of
https://github.com/neovim/neovim.git
synced 2025-09-11 13:58:18 +00:00
eval/decode: Fail on control and invalid unicode characters
This commit is contained in:
@@ -264,8 +264,8 @@ int json_decode_string(const char *const buf, const size_t len,
|
|||||||
}
|
}
|
||||||
case '"': {
|
case '"': {
|
||||||
size_t len = 0;
|
size_t len = 0;
|
||||||
const char *s;
|
const char *const s = ++p;
|
||||||
for (s = ++p; p < e && *p != '"'; p++) {
|
while (p < e && *p != '"') {
|
||||||
if (*p == '\\') {
|
if (*p == '\\') {
|
||||||
p++;
|
p++;
|
||||||
if (p == e) {
|
if (p == e) {
|
||||||
@@ -285,9 +285,10 @@ int json_decode_string(const char *const buf, const size_t len,
|
|||||||
p - 1);
|
p - 1);
|
||||||
goto json_decode_string_fail;
|
goto json_decode_string_fail;
|
||||||
}
|
}
|
||||||
// One UTF-8 character below U+10000 can take up to 3 bytes
|
// One UTF-8 character below U+10000 can take up to 3 bytes,
|
||||||
|
// above up to 6, but they are encoded using two \u escapes.
|
||||||
len += 3;
|
len += 3;
|
||||||
p += 4;
|
p += 5;
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
case '\\':
|
case '\\':
|
||||||
@@ -299,6 +300,7 @@ int json_decode_string(const char *const buf, const size_t len,
|
|||||||
case 'r':
|
case 'r':
|
||||||
case 'f': {
|
case 'f': {
|
||||||
len++;
|
len++;
|
||||||
|
p++;
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
default: {
|
default: {
|
||||||
@@ -307,7 +309,30 @@ int json_decode_string(const char *const buf, const size_t len,
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
len++;
|
uint8_t p_byte = (uint8_t) *p;
|
||||||
|
// unescaped = %x20-21 / %x23-5B / %x5D-10FFFF
|
||||||
|
if (p_byte < 0x20) {
|
||||||
|
EMSG2(_("E474: ASCII control characters cannot be present "
|
||||||
|
"inside string: %s"), p);
|
||||||
|
goto json_decode_string_fail;
|
||||||
|
}
|
||||||
|
const int ch = utf_ptr2char((char_u *) p);
|
||||||
|
// All characters above U+007F are encoded using two or more bytes
|
||||||
|
// and thus cannot possibly be equal to *p. But utf_ptr2char({0xFF,
|
||||||
|
// 0}) will return 0xFF, even though 0xFF cannot start any UTF-8
|
||||||
|
// code point at all.
|
||||||
|
if (ch >= 0x80 && p_byte == ch) {
|
||||||
|
EMSG2(_("E474: Only UTF-8 strings allowed: %s"), p);
|
||||||
|
goto json_decode_string_fail;
|
||||||
|
} else if (ch > 0x10FFFF) {
|
||||||
|
EMSG2(_("E474: Only UTF-8 code points up to U+10FFFF "
|
||||||
|
"are allowed to appear unescaped: %s"), p);
|
||||||
|
goto json_decode_string_fail;
|
||||||
|
}
|
||||||
|
const size_t ch_len = (size_t) utf_char2len(ch);
|
||||||
|
assert(ch_len == (size_t) (ch ? utf_ptr2len((char_u *) p) : 1));
|
||||||
|
len += ch_len;
|
||||||
|
p += ch_len;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if (*p != '"') {
|
if (*p != '"') {
|
||||||
|
@@ -235,6 +235,67 @@ describe('jsondecode() function', function()
|
|||||||
eq('', funcs.jsondecode('""'))
|
eq('', funcs.jsondecode('""'))
|
||||||
eq('\\/"\t\b\n\r\f', funcs.jsondecode([["\\\/\"\t\b\n\r\f"]]))
|
eq('\\/"\t\b\n\r\f', funcs.jsondecode([["\\\/\"\t\b\n\r\f"]]))
|
||||||
eq('/a', funcs.jsondecode([["\/a"]]))
|
eq('/a', funcs.jsondecode([["\/a"]]))
|
||||||
|
-- Unicode characters: 2-byte, 3-byte, 4-byte
|
||||||
|
eq({
|
||||||
|
'«',
|
||||||
|
'ફ',
|
||||||
|
'\xF0\x90\x80\x80',
|
||||||
|
}, funcs.jsondecode({
|
||||||
|
'[',
|
||||||
|
'"«",',
|
||||||
|
'"ફ",',
|
||||||
|
'"\xF0\x90\x80\x80"',
|
||||||
|
']',
|
||||||
|
}))
|
||||||
|
end)
|
||||||
|
|
||||||
|
it('fails on strings with invalid bytes', function()
|
||||||
|
eq('Vim(call):E474: Only UTF-8 strings allowed: \255"',
|
||||||
|
exc_exec('call jsondecode("\\t\\"\\xFF\\"")'))
|
||||||
|
eq('Vim(call):E474: ASCII control characters cannot be present inside string: ',
|
||||||
|
exc_exec('call jsondecode(["\\"\\n\\""])'))
|
||||||
|
-- 0xC2 starts 2-byte unicode character
|
||||||
|
eq('Vim(call):E474: Only UTF-8 strings allowed: \194"',
|
||||||
|
exc_exec('call jsondecode("\\t\\"\\xC2\\"")'))
|
||||||
|
-- 0xE0 0xAA starts 3-byte unicode character
|
||||||
|
eq('Vim(call):E474: Only UTF-8 strings allowed: \224"',
|
||||||
|
exc_exec('call jsondecode("\\t\\"\\xE0\\"")'))
|
||||||
|
eq('Vim(call):E474: Only UTF-8 strings allowed: \224\170"',
|
||||||
|
exc_exec('call jsondecode("\\t\\"\\xE0\\xAA\\"")'))
|
||||||
|
-- 0xF0 0x90 0x80 starts 4-byte unicode character
|
||||||
|
eq('Vim(call):E474: Only UTF-8 strings allowed: \240"',
|
||||||
|
exc_exec('call jsondecode("\\t\\"\\xF0\\"")'))
|
||||||
|
eq('Vim(call):E474: Only UTF-8 strings allowed: \240\144"',
|
||||||
|
exc_exec('call jsondecode("\\t\\"\\xF0\\x90\\"")'))
|
||||||
|
eq('Vim(call):E474: Only UTF-8 strings allowed: \240\144\128"',
|
||||||
|
exc_exec('call jsondecode("\\t\\"\\xF0\\x90\\x80\\"")'))
|
||||||
|
-- 0xF9 0x80 0x80 0x80 starts 5-byte unicode character
|
||||||
|
eq('Vim(call):E474: Only UTF-8 strings allowed: \xF9"',
|
||||||
|
exc_exec('call jsondecode("\\t\\"\\xF9\\"")'))
|
||||||
|
eq('Vim(call):E474: Only UTF-8 strings allowed: \xF9\x80"',
|
||||||
|
exc_exec('call jsondecode("\\t\\"\\xF9\\x80\\"")'))
|
||||||
|
eq('Vim(call):E474: Only UTF-8 strings allowed: \xF9\x80\x80"',
|
||||||
|
exc_exec('call jsondecode("\\t\\"\\xF9\\x80\\x80\\"")'))
|
||||||
|
eq('Vim(call):E474: Only UTF-8 strings allowed: \xF9\x80\x80\x80"',
|
||||||
|
exc_exec('call jsondecode("\\t\\"\\xF9\\x80\\x80\\x80\\"")'))
|
||||||
|
-- 0xFC 0x90 0x80 0x80 0x80 starts 6-byte unicode character
|
||||||
|
eq('Vim(call):E474: Only UTF-8 strings allowed: \xFC"',
|
||||||
|
exc_exec('call jsondecode("\\t\\"\\xFC\\"")'))
|
||||||
|
eq('Vim(call):E474: Only UTF-8 strings allowed: \xFC\x90"',
|
||||||
|
exc_exec('call jsondecode("\\t\\"\\xFC\\x90\\"")'))
|
||||||
|
eq('Vim(call):E474: Only UTF-8 strings allowed: \xFC\x90\x80"',
|
||||||
|
exc_exec('call jsondecode("\\t\\"\\xFC\\x90\\x80\\"")'))
|
||||||
|
eq('Vim(call):E474: Only UTF-8 strings allowed: \xFC\x90\x80\x80"',
|
||||||
|
exc_exec('call jsondecode("\\t\\"\\xFC\\x90\\x80\\x80\\"")'))
|
||||||
|
eq('Vim(call):E474: Only UTF-8 strings allowed: \xFC\x90\x80\x80\x80"',
|
||||||
|
exc_exec('call jsondecode("\\t\\"\\xFC\\x90\\x80\\x80\\x80\\"")'))
|
||||||
|
-- Specification does not allow unquoted characters above 0x10FFFF
|
||||||
|
eq('Vim(call):E474: Only UTF-8 code points up to U+10FFFF are allowed to appear unescaped: \xF9\x80\x80\x80\x80"',
|
||||||
|
exc_exec('call jsondecode("\\t\\"\\xF9\\x80\\x80\\x80\\x80\\"")'))
|
||||||
|
eq('Vim(call):E474: Only UTF-8 code points up to U+10FFFF are allowed to appear unescaped: \xFC\x90\x80\x80\x80\x80"',
|
||||||
|
exc_exec('call jsondecode("\\t\\"\\xFC\\x90\\x80\\x80\\x80\\x80\\"")'))
|
||||||
|
-- '"\xF9\x80\x80\x80\x80"',
|
||||||
|
-- '"\xFC\x90\x80\x80\x80\x80"',
|
||||||
end)
|
end)
|
||||||
end)
|
end)
|
||||||
|
|
||||||
|
Reference in New Issue
Block a user