Files
neovim/src/nvim/eval/decode.c
2024-08-05 12:22:12 +02:00

1135 lines
37 KiB
C

#include <assert.h>
#include <stdbool.h>
#include <stddef.h>
#include <stdint.h>
#include <stdlib.h>
#include <string.h>
#include "klib/kvec.h"
#include "mpack/object.h"
#include "nvim/ascii_defs.h"
#include "nvim/charset.h"
#include "nvim/eval.h"
#include "nvim/eval/decode.h"
#include "nvim/eval/encode.h"
#include "nvim/eval/typval.h"
#include "nvim/eval/typval_defs.h"
#include "nvim/eval_defs.h"
#include "nvim/garray.h"
#include "nvim/gettext_defs.h"
#include "nvim/macros_defs.h"
#include "nvim/mbyte.h"
#include "nvim/memory.h"
#include "nvim/message.h"
#include "nvim/types_defs.h"
#include "nvim/vim_defs.h"
/// Helper structure for container_struct
typedef struct {
size_t stack_index; ///< Index of current container in stack.
list_T *special_val; ///< _VAL key contents for special maps.
///< When container is not a special dictionary it is
///< NULL.
const char *s; ///< Location where container starts.
typval_T container; ///< Container. Either VAR_LIST, VAR_DICT or VAR_LIST
///< which is _VAL from special dictionary.
} ContainerStackItem;
/// Helper structure for values struct
typedef struct {
bool is_special_string; ///< Indicates that current value is a special
///< dictionary with string.
bool didcomma; ///< True if previous token was comma.
bool didcolon; ///< True if previous token was colon.
typval_T val; ///< Actual value.
} ValuesStackItem;
/// Vector containing values not yet saved in any container
typedef kvec_t(ValuesStackItem) ValuesStack;
/// Vector containing containers, each next container is located inside previous
typedef kvec_t(ContainerStackItem) ContainerStack;
#ifdef INCLUDE_GENERATED_DECLARATIONS
# include "eval/decode.c.generated.h"
#endif
/// Create special dictionary
///
/// @param[out] rettv Location where created dictionary will be saved.
/// @param[in] type Type of the dictionary.
/// @param[in] val Value associated with the _VAL key.
static inline void create_special_dict(typval_T *const rettv, const MessagePackType type,
typval_T val)
FUNC_ATTR_NONNULL_ALL
{
dict_T *const dict = tv_dict_alloc();
dictitem_T *const type_di = tv_dict_item_alloc_len(S_LEN("_TYPE"));
type_di->di_tv.v_type = VAR_LIST;
type_di->di_tv.v_lock = VAR_UNLOCKED;
type_di->di_tv.vval.v_list = (list_T *)eval_msgpack_type_lists[type];
tv_list_ref(type_di->di_tv.vval.v_list);
tv_dict_add(dict, type_di);
dictitem_T *const val_di = tv_dict_item_alloc_len(S_LEN("_VAL"));
val_di->di_tv = val;
tv_dict_add(dict, val_di);
dict->dv_refcount++;
*rettv = (typval_T) {
.v_type = VAR_DICT,
.v_lock = VAR_UNLOCKED,
.vval = { .v_dict = dict },
};
}
#define DICT_LEN(dict) (dict)->dv_hashtab.ht_used
/// Helper function used for working with stack vectors used by JSON decoder
///
/// @param[in,out] obj New object. Will either be put into the stack (and,
/// probably, also inside container) or freed.
/// @param[out] stack Object stack.
/// @param[out] container_stack Container objects stack.
/// @param[in,out] pp Position in string which is currently being parsed. Used
/// for error reporting and is also set when decoding is
/// restarted due to the necessity of converting regular
/// dictionary to a special map.
/// @param[out] next_map_special Is set to true when dictionary needs to be
/// converted to a special map, otherwise not
/// touched. Indicates that decoding has been
/// restarted.
/// @param[out] didcomma True if previous token was comma. Is set to recorded
/// value when decoder is restarted, otherwise unused.
/// @param[out] didcolon True if previous token was colon. Is set to recorded
/// value when decoder is restarted, otherwise unused.
///
/// @return OK in case of success, FAIL in case of error.
static inline int json_decoder_pop(ValuesStackItem obj, ValuesStack *const stack,
ContainerStack *const container_stack, const char **const pp,
bool *const next_map_special, bool *const didcomma,
bool *const didcolon)
FUNC_ATTR_NONNULL_ALL
{
if (kv_size(*container_stack) == 0) {
kv_push(*stack, obj);
return OK;
}
ContainerStackItem last_container = kv_last(*container_stack);
const char *val_location = *pp;
if (obj.val.v_type == last_container.container.v_type
// vval.v_list and vval.v_dict should have the same size and offset
&& ((void *)obj.val.vval.v_list
== (void *)last_container.container.vval.v_list)) {
(void)kv_pop(*container_stack);
val_location = last_container.s;
last_container = kv_last(*container_stack);
}
if (last_container.container.v_type == VAR_LIST) {
if (tv_list_len(last_container.container.vval.v_list) != 0
&& !obj.didcomma) {
semsg(_("E474: Expected comma before list item: %s"), val_location);
tv_clear(&obj.val);
return FAIL;
}
assert(last_container.special_val == NULL);
tv_list_append_owned_tv(last_container.container.vval.v_list, obj.val);
} else if (last_container.stack_index == kv_size(*stack) - 2) {
if (!obj.didcolon) {
semsg(_("E474: Expected colon before dictionary value: %s"),
val_location);
tv_clear(&obj.val);
return FAIL;
}
ValuesStackItem key = kv_pop(*stack);
if (last_container.special_val == NULL) {
// These cases should have already been handled.
assert(!(key.is_special_string || key.val.vval.v_string == NULL));
dictitem_T *const obj_di = tv_dict_item_alloc(key.val.vval.v_string);
tv_clear(&key.val);
if (tv_dict_add(last_container.container.vval.v_dict, obj_di)
== FAIL) {
abort();
}
obj_di->di_tv = obj.val;
} else {
list_T *const kv_pair = tv_list_alloc(2);
tv_list_append_list(last_container.special_val, kv_pair);
tv_list_append_owned_tv(kv_pair, key.val);
tv_list_append_owned_tv(kv_pair, obj.val);
}
} else {
// Object with key only
if (!obj.is_special_string && obj.val.v_type != VAR_STRING) {
semsg(_("E474: Expected string key: %s"), *pp);
tv_clear(&obj.val);
return FAIL;
} else if (!obj.didcomma
&& (last_container.special_val == NULL
&& (DICT_LEN(last_container.container.vval.v_dict) != 0))) {
semsg(_("E474: Expected comma before dictionary key: %s"), val_location);
tv_clear(&obj.val);
return FAIL;
}
// Handle special dictionaries
if (last_container.special_val == NULL
&& (obj.is_special_string
|| obj.val.vval.v_string == NULL
|| tv_dict_find(last_container.container.vval.v_dict, obj.val.vval.v_string, -1))) {
tv_clear(&obj.val);
// Restart
(void)kv_pop(*container_stack);
ValuesStackItem last_container_val =
kv_A(*stack, last_container.stack_index);
while (kv_size(*stack) > last_container.stack_index) {
tv_clear(&(kv_pop(*stack).val));
}
*pp = last_container.s;
*didcomma = last_container_val.didcomma;
*didcolon = last_container_val.didcolon;
*next_map_special = true;
return OK;
}
kv_push(*stack, obj);
}
return OK;
}
#define LENP(p, e) \
((int)((e) - (p))), (p)
#define OBJ(obj_tv, is_sp_string, didcomma_, didcolon_) \
((ValuesStackItem) { \
.is_special_string = (is_sp_string), \
.val = (obj_tv), \
.didcomma = (didcomma_), \
.didcolon = (didcolon_), \
})
#define POP(obj_tv, is_sp_string) \
do { \
if (json_decoder_pop(OBJ(obj_tv, is_sp_string, *didcomma, *didcolon), \
stack, container_stack, \
&p, next_map_special, didcomma, didcolon) \
== FAIL) { \
goto parse_json_string_fail; \
} \
if (*next_map_special) { \
goto parse_json_string_ret; \
} \
} while (0)
/// Create a new special dictionary that ought to represent a MAP
///
/// @param[out] ret_tv Address where new special dictionary is saved.
/// @param[in] len Expected number of items to be populated before list
/// becomes accessible from Vimscript. It is still valid to
/// underpopulate a list, value only controls how many elements
/// will be allocated in advance. @see ListLenSpecials.
///
/// @return [allocated] list which should contain key-value pairs. Return value
/// may be safely ignored.
list_T *decode_create_map_special_dict(typval_T *const ret_tv, const ptrdiff_t len)
FUNC_ATTR_NONNULL_ALL
{
list_T *const list = tv_list_alloc(len);
tv_list_ref(list);
create_special_dict(ret_tv, kMPMap, ((typval_T) {
.v_type = VAR_LIST,
.v_lock = VAR_UNLOCKED,
.vval = { .v_list = list },
}));
return list;
}
/// Convert char* string to typval_T
///
/// Depending on whether string has (no) NUL bytes, it may use a special
/// dictionary, VAR_BLOB, or decode string to VAR_STRING.
///
/// @param[in] s String to decode.
/// @param[in] len String length.
/// @param[in] force_blob whether string always should be decoded as a blob,
/// or only when embedded NUL bytes were present
/// @param[in] s_allocated If true, then `s` was allocated and can be saved in
/// a returned structure. If it is not saved there, it
/// will be freed.
///
/// @return Decoded string.
typval_T decode_string(const char *const s, const size_t len, bool force_blob,
const bool s_allocated)
FUNC_ATTR_WARN_UNUSED_RESULT
{
assert(s != NULL || len == 0);
const bool use_blob = force_blob || ((s != NULL) && (memchr(s, NUL, len) != NULL));
if (use_blob) {
typval_T tv;
tv.v_lock = VAR_UNLOCKED;
blob_T *b = tv_blob_alloc_ret(&tv);
if (s_allocated) {
b->bv_ga.ga_data = (void *)s;
b->bv_ga.ga_len = (int)len;
b->bv_ga.ga_maxlen = (int)len;
} else {
ga_concat_len(&b->bv_ga, s, len);
}
return tv;
}
return (typval_T) {
.v_type = VAR_STRING,
.v_lock = VAR_UNLOCKED,
.vval = { .v_string = ((s == NULL || s_allocated) ? (char *)s : xmemdupz(s, len)) },
};
}
/// Parse JSON double-quoted string
///
/// @param[in] buf Buffer being converted.
/// @param[in] buf_len Length of the buffer.
/// @param[in,out] pp Pointer to the start of the string. Must point to '"'.
/// Is advanced to the closing '"'. Also see
/// json_decoder_pop(), it may set pp to another location
/// and alter next_map_special, didcomma and didcolon.
/// @param[out] stack Object stack.
/// @param[out] container_stack Container objects stack.
/// @param[out] next_map_special Is set to true when dictionary is converted
/// to a special map, otherwise not touched.
/// @param[out] didcomma True if previous token was comma. Is set to recorded
/// value when decoder is restarted, otherwise unused.
/// @param[out] didcolon True if previous token was colon. Is set to recorded
/// value when decoder is restarted, otherwise unused.
///
/// @return OK in case of success, FAIL in case of error.
static inline int parse_json_string(const char *const buf, const size_t buf_len,
const char **const pp, ValuesStack *const stack,
ContainerStack *const container_stack,
bool *const next_map_special, bool *const didcomma,
bool *const didcolon)
FUNC_ATTR_NONNULL_ALL FUNC_ATTR_WARN_UNUSED_RESULT FUNC_ATTR_ALWAYS_INLINE
{
const char *const e = buf + buf_len;
const char *p = *pp;
size_t len = 0;
const char *const s = ++p;
int ret = OK;
while (p < e && *p != '"') {
if (*p == '\\') {
p++;
if (p == e) {
semsg(_("E474: Unfinished escape sequence: %.*s"),
(int)buf_len, buf);
goto parse_json_string_fail;
}
switch (*p) {
case 'u':
if (p + 4 >= e) {
semsg(_("E474: Unfinished unicode escape sequence: %.*s"),
(int)buf_len, buf);
goto parse_json_string_fail;
} else if (!ascii_isxdigit(p[1])
|| !ascii_isxdigit(p[2])
|| !ascii_isxdigit(p[3])
|| !ascii_isxdigit(p[4])) {
semsg(_("E474: Expected four hex digits after \\u: %.*s"),
LENP(p - 1, e));
goto parse_json_string_fail;
}
// One UTF-8 character below U+10000 can take up to 3 bytes,
// above up to 6, but they are encoded using two \u escapes.
len += 3;
p += 5;
break;
case '\\':
case '/':
case '"':
case 't':
case 'b':
case 'n':
case 'r':
case 'f':
len++;
p++;
break;
default:
semsg(_("E474: Unknown escape sequence: %.*s"), LENP(p - 1, e));
goto parse_json_string_fail;
}
} else {
uint8_t p_byte = (uint8_t)(*p);
// unescaped = %x20-21 / %x23-5B / %x5D-10FFFF
if (p_byte < 0x20) {
semsg(_("E474: ASCII control characters cannot be present "
"inside string: %.*s"), LENP(p, e));
goto parse_json_string_fail;
}
const int ch = utf_ptr2char(p);
// All characters above U+007F are encoded using two or more bytes
// and thus cannot possibly be equal to *p. But utf_ptr2char({0xFF,
// 0}) will return 0xFF, even though 0xFF cannot start any UTF-8
// code point at all.
//
// The only exception is U+00C3 which is represented as 0xC3 0x83.
if (ch >= 0x80 && p_byte == ch
&& !(ch == 0xC3 && p + 1 < e && (uint8_t)p[1] == 0x83)) {
semsg(_("E474: Only UTF-8 strings allowed: %.*s"), LENP(p, e));
goto parse_json_string_fail;
} else if (ch > 0x10FFFF) {
semsg(_("E474: Only UTF-8 code points up to U+10FFFF "
"are allowed to appear unescaped: %.*s"), LENP(p, e));
goto parse_json_string_fail;
}
const size_t ch_len = (size_t)utf_char2len(ch);
assert(ch_len == (size_t)(ch ? utf_ptr2len(p) : 1));
len += ch_len;
p += ch_len;
}
}
if (p == e || *p != '"') {
semsg(_("E474: Expected string end: %.*s"), (int)buf_len, buf);
goto parse_json_string_fail;
}
char *str = xmalloc(len + 1);
int fst_in_pair = 0;
char *str_end = str;
#define PUT_FST_IN_PAIR(fst_in_pair, str_end) \
do { \
if ((fst_in_pair) != 0) { \
(str_end) += utf_char2bytes(fst_in_pair, (str_end)); \
(fst_in_pair) = 0; \
} \
} while (0)
for (const char *t = s; t < p; t++) {
if (t[0] != '\\' || t[1] != 'u') {
PUT_FST_IN_PAIR(fst_in_pair, str_end);
}
if (*t == '\\') {
t++;
switch (*t) {
case 'u': {
const char ubuf[] = { t[1], t[2], t[3], t[4] };
t += 4;
uvarnumber_T ch;
vim_str2nr(ubuf, NULL, NULL,
STR2NR_HEX | STR2NR_FORCE, NULL, &ch, 4, true, NULL);
if (SURROGATE_HI_START <= ch && ch <= SURROGATE_HI_END) {
PUT_FST_IN_PAIR(fst_in_pair, str_end);
fst_in_pair = (int)ch;
} else if (SURROGATE_LO_START <= ch && ch <= SURROGATE_LO_END
&& fst_in_pair != 0) {
const int full_char = ((int)(ch - SURROGATE_LO_START)
+ ((fst_in_pair - SURROGATE_HI_START) << 10)
+ SURROGATE_FIRST_CHAR);
str_end += utf_char2bytes(full_char, str_end);
fst_in_pair = 0;
} else {
PUT_FST_IN_PAIR(fst_in_pair, str_end);
str_end += utf_char2bytes((int)ch, str_end);
}
break;
}
case '\\':
case '/':
case '"':
case 't':
case 'b':
case 'n':
case 'r':
case 'f': {
static const char escapes[] = {
['\\'] = '\\',
['/'] = '/',
['"'] = '"',
['t'] = TAB,
['b'] = BS,
['n'] = NL,
['r'] = CAR,
['f'] = FF,
};
*str_end++ = escapes[(int)(*t)];
break;
}
default:
abort();
}
} else {
*str_end++ = *t;
}
}
PUT_FST_IN_PAIR(fst_in_pair, str_end);
#undef PUT_FST_IN_PAIR
*str_end = NUL;
typval_T obj = decode_string(str, (size_t)(str_end - str), false, true);
POP(obj, obj.v_type != VAR_STRING);
goto parse_json_string_ret;
parse_json_string_fail:
ret = FAIL;
parse_json_string_ret:
*pp = p;
return ret;
}
#undef POP
/// Parse JSON number: both floating-point and integer
///
/// Number format: `-?\d+(?:.\d+)?(?:[eE][+-]?\d+)?`.
///
/// @param[in] buf Buffer being converted.
/// @param[in] buf_len Length of the buffer.
/// @param[in,out] pp Pointer to the start of the number. Must point to
/// a digit or a minus sign. Is advanced to the last
/// character of the number. Also see json_decoder_pop(), it
/// may set pp to another location and alter
/// next_map_special, didcomma and didcolon.
/// @param[out] stack Object stack.
/// @param[out] container_stack Container objects stack.
/// @param[out] next_map_special Is set to true when dictionary is converted
/// to a special map, otherwise not touched.
/// @param[out] didcomma True if previous token was comma. Is set to recorded
/// value when decoder is restarted, otherwise unused.
/// @param[out] didcolon True if previous token was colon. Is set to recorded
/// value when decoder is restarted, otherwise unused.
///
/// @return OK in case of success, FAIL in case of error.
static inline int parse_json_number(const char *const buf, const size_t buf_len,
const char **const pp, ValuesStack *const stack,
ContainerStack *const container_stack,
bool *const next_map_special, bool *const didcomma,
bool *const didcolon)
FUNC_ATTR_NONNULL_ALL FUNC_ATTR_WARN_UNUSED_RESULT FUNC_ATTR_ALWAYS_INLINE
{
const char *const e = buf + buf_len;
const char *p = *pp;
int ret = OK;
const char *const s = p;
const char *ints = NULL;
const char *fracs = NULL;
const char *exps = NULL;
const char *exps_s = NULL;
if (*p == '-') {
p++;
}
ints = p;
if (p >= e) {
goto parse_json_number_check;
}
while (p < e && ascii_isdigit(*p)) {
p++;
}
if (p != ints + 1 && *ints == '0') {
semsg(_("E474: Leading zeroes are not allowed: %.*s"), LENP(s, e));
goto parse_json_number_fail;
}
if (p >= e || p == ints) {
goto parse_json_number_check;
}
if (*p == '.') {
p++;
fracs = p;
while (p < e && ascii_isdigit(*p)) {
p++;
}
if (p >= e || p == fracs) {
goto parse_json_number_check;
}
}
if (*p == 'e' || *p == 'E') {
p++;
exps_s = p;
if (p < e && (*p == '-' || *p == '+')) {
p++;
}
exps = p;
while (p < e && ascii_isdigit(*p)) {
p++;
}
}
parse_json_number_check:
if (p == ints) {
semsg(_("E474: Missing number after minus sign: %.*s"), LENP(s, e));
goto parse_json_number_fail;
} else if (p == fracs || (fracs != NULL && exps_s == fracs + 1)) {
semsg(_("E474: Missing number after decimal dot: %.*s"), LENP(s, e));
goto parse_json_number_fail;
} else if (p == exps) {
semsg(_("E474: Missing exponent: %.*s"), LENP(s, e));
goto parse_json_number_fail;
}
typval_T tv = {
.v_type = VAR_NUMBER,
.v_lock = VAR_UNLOCKED,
};
const size_t exp_num_len = (size_t)(p - s);
if (fracs || exps) {
// Convert floating-point number
const size_t num_len = string2float(s, &tv.vval.v_float);
if (exp_num_len != num_len) {
semsg(_("E685: internal error: while converting number \"%.*s\" "
"to float string2float consumed %zu bytes in place of %zu"),
(int)exp_num_len, s, num_len, exp_num_len);
}
tv.v_type = VAR_FLOAT;
} else {
// Convert integer
varnumber_T nr;
int num_len;
vim_str2nr(s, NULL, &num_len, 0, &nr, NULL, (int)(p - s), true, NULL);
if ((int)exp_num_len != num_len) {
semsg(_("E685: internal error: while converting number \"%.*s\" "
"to integer vim_str2nr consumed %i bytes in place of %zu"),
(int)exp_num_len, s, num_len, exp_num_len);
}
tv.vval.v_number = nr;
}
if (json_decoder_pop(OBJ(tv, false, *didcomma, *didcolon),
stack, container_stack,
&p, next_map_special, didcomma, didcolon) == FAIL) {
goto parse_json_number_fail;
}
if (*next_map_special) {
goto parse_json_number_ret;
}
p--;
goto parse_json_number_ret;
parse_json_number_fail:
ret = FAIL;
parse_json_number_ret:
*pp = p;
return ret;
}
#define POP(obj_tv, is_sp_string) \
do { \
if (json_decoder_pop(OBJ(obj_tv, is_sp_string, didcomma, didcolon), \
&stack, &container_stack, \
&p, &next_map_special, &didcomma, &didcolon) \
== FAIL) { \
goto json_decode_string_fail; \
} \
if (next_map_special) { \
goto json_decode_string_cycle_start; \
} \
} while (0)
/// Convert JSON string into Vimscript object
///
/// @param[in] buf String to convert. UTF-8 encoding is assumed.
/// @param[in] buf_len Length of the string.
/// @param[out] rettv Location where to save results.
///
/// @return OK in case of success, FAIL otherwise.
int json_decode_string(const char *const buf, const size_t buf_len, typval_T *const rettv)
FUNC_ATTR_NONNULL_ALL FUNC_ATTR_WARN_UNUSED_RESULT
{
const char *p = buf;
const char *const e = buf + buf_len;
while (p < e && (*p == ' ' || *p == TAB || *p == NL || *p == CAR)) {
p++;
}
if (p == e) {
emsg(_("E474: Attempt to decode a blank string"));
return FAIL;
}
int ret = OK;
ValuesStack stack = KV_INITIAL_VALUE;
ContainerStack container_stack = KV_INITIAL_VALUE;
rettv->v_type = VAR_UNKNOWN;
bool didcomma = false;
bool didcolon = false;
bool next_map_special = false;
for (; p < e; p++) {
json_decode_string_cycle_start:
assert(*p == '{' || next_map_special == false);
switch (*p) {
case '}':
case ']': {
if (kv_size(container_stack) == 0) {
semsg(_("E474: No container to close: %.*s"), LENP(p, e));
goto json_decode_string_fail;
}
ContainerStackItem last_container = kv_last(container_stack);
if (*p == '}' && last_container.container.v_type != VAR_DICT) {
semsg(_("E474: Closing list with curly bracket: %.*s"), LENP(p, e));
goto json_decode_string_fail;
} else if (*p == ']' && last_container.container.v_type != VAR_LIST) {
semsg(_("E474: Closing dictionary with square bracket: %.*s"),
LENP(p, e));
goto json_decode_string_fail;
} else if (didcomma) {
semsg(_("E474: Trailing comma: %.*s"), LENP(p, e));
goto json_decode_string_fail;
} else if (didcolon) {
semsg(_("E474: Expected value after colon: %.*s"), LENP(p, e));
goto json_decode_string_fail;
} else if (last_container.stack_index != kv_size(stack) - 1) {
assert(last_container.stack_index < kv_size(stack) - 1);
semsg(_("E474: Expected value: %.*s"), LENP(p, e));
goto json_decode_string_fail;
}
if (kv_size(stack) == 1) {
p++;
(void)kv_pop(container_stack);
goto json_decode_string_after_cycle;
} else {
if (json_decoder_pop(kv_pop(stack), &stack, &container_stack, &p,
&next_map_special, &didcomma, &didcolon)
== FAIL) {
goto json_decode_string_fail;
}
assert(!next_map_special);
break;
}
}
case ',': {
if (kv_size(container_stack) == 0) {
semsg(_("E474: Comma not inside container: %.*s"), LENP(p, e));
goto json_decode_string_fail;
}
ContainerStackItem last_container = kv_last(container_stack);
if (didcomma) {
semsg(_("E474: Duplicate comma: %.*s"), LENP(p, e));
goto json_decode_string_fail;
} else if (didcolon) {
semsg(_("E474: Comma after colon: %.*s"), LENP(p, e));
goto json_decode_string_fail;
} else if (last_container.container.v_type == VAR_DICT
&& last_container.stack_index != kv_size(stack) - 1) {
semsg(_("E474: Using comma in place of colon: %.*s"), LENP(p, e));
goto json_decode_string_fail;
} else if (last_container.special_val == NULL
? (last_container.container.v_type == VAR_DICT
? (DICT_LEN(last_container.container.vval.v_dict) == 0)
: (tv_list_len(last_container.container.vval.v_list)
== 0))
: (tv_list_len(last_container.special_val) == 0)) {
semsg(_("E474: Leading comma: %.*s"), LENP(p, e));
goto json_decode_string_fail;
}
didcomma = true;
continue;
}
case ':': {
if (kv_size(container_stack) == 0) {
semsg(_("E474: Colon not inside container: %.*s"), LENP(p, e));
goto json_decode_string_fail;
}
ContainerStackItem last_container = kv_last(container_stack);
if (last_container.container.v_type != VAR_DICT) {
semsg(_("E474: Using colon not in dictionary: %.*s"), LENP(p, e));
goto json_decode_string_fail;
} else if (last_container.stack_index != kv_size(stack) - 2) {
semsg(_("E474: Unexpected colon: %.*s"), LENP(p, e));
goto json_decode_string_fail;
} else if (didcomma) {
semsg(_("E474: Colon after comma: %.*s"), LENP(p, e));
goto json_decode_string_fail;
} else if (didcolon) {
semsg(_("E474: Duplicate colon: %.*s"), LENP(p, e));
goto json_decode_string_fail;
}
didcolon = true;
continue;
}
case ' ':
case TAB:
case NL:
case CAR:
continue;
case 'n':
if ((p + 3) >= e || strncmp(p + 1, "ull", 3) != 0) {
semsg(_("E474: Expected null: %.*s"), LENP(p, e));
goto json_decode_string_fail;
}
p += 3;
POP(((typval_T) {
.v_type = VAR_SPECIAL,
.v_lock = VAR_UNLOCKED,
.vval = { .v_special = kSpecialVarNull },
}), false);
break;
case 't':
if ((p + 3) >= e || strncmp(p + 1, "rue", 3) != 0) {
semsg(_("E474: Expected true: %.*s"), LENP(p, e));
goto json_decode_string_fail;
}
p += 3;
POP(((typval_T) {
.v_type = VAR_BOOL,
.v_lock = VAR_UNLOCKED,
.vval = { .v_bool = kBoolVarTrue },
}), false);
break;
case 'f':
if ((p + 4) >= e || strncmp(p + 1, "alse", 4) != 0) {
semsg(_("E474: Expected false: %.*s"), LENP(p, e));
goto json_decode_string_fail;
}
p += 4;
POP(((typval_T) {
.v_type = VAR_BOOL,
.v_lock = VAR_UNLOCKED,
.vval = { .v_bool = kBoolVarFalse },
}), false);
break;
case '"':
if (parse_json_string(buf, buf_len, &p, &stack, &container_stack,
&next_map_special, &didcomma, &didcolon)
== FAIL) {
// Error message was already given
goto json_decode_string_fail;
}
if (next_map_special) {
goto json_decode_string_cycle_start;
}
break;
case '-':
case '0':
case '1':
case '2':
case '3':
case '4':
case '5':
case '6':
case '7':
case '8':
case '9':
if (parse_json_number(buf, buf_len, &p, &stack, &container_stack,
&next_map_special, &didcomma, &didcolon)
== FAIL) {
// Error message was already given
goto json_decode_string_fail;
}
if (next_map_special) {
goto json_decode_string_cycle_start;
}
break;
case '[': {
list_T *list = tv_list_alloc(kListLenMayKnow);
tv_list_ref(list);
typval_T tv = {
.v_type = VAR_LIST,
.v_lock = VAR_UNLOCKED,
.vval = { .v_list = list },
};
kv_push(container_stack, ((ContainerStackItem) { .stack_index = kv_size(stack),
.s = p,
.container = tv,
.special_val = NULL }));
kv_push(stack, OBJ(tv, false, didcomma, didcolon));
break;
}
case '{': {
typval_T tv;
list_T *val_list = NULL;
if (next_map_special) {
next_map_special = false;
val_list = decode_create_map_special_dict(&tv, kListLenMayKnow);
} else {
dict_T *dict = tv_dict_alloc();
dict->dv_refcount++;
tv = (typval_T) {
.v_type = VAR_DICT,
.v_lock = VAR_UNLOCKED,
.vval = { .v_dict = dict },
};
}
kv_push(container_stack, ((ContainerStackItem) { .stack_index = kv_size(stack),
.s = p,
.container = tv,
.special_val = val_list }));
kv_push(stack, OBJ(tv, false, didcomma, didcolon));
break;
}
default:
semsg(_("E474: Unidentified byte: %.*s"), LENP(p, e));
goto json_decode_string_fail;
}
didcomma = false;
didcolon = false;
if (kv_size(container_stack) == 0) {
p++;
break;
}
}
json_decode_string_after_cycle:
for (; p < e; p++) {
switch (*p) {
case NL:
case ' ':
case TAB:
case CAR:
break;
default:
semsg(_("E474: Trailing characters: %.*s"), LENP(p, e));
goto json_decode_string_fail;
}
}
if (kv_size(stack) == 1 && kv_size(container_stack) == 0) {
*rettv = kv_pop(stack).val;
goto json_decode_string_ret;
}
semsg(_("E474: Unexpected end of input: %.*s"), (int)buf_len, buf);
json_decode_string_fail:
ret = FAIL;
while (kv_size(stack)) {
tv_clear(&(kv_pop(stack).val));
}
json_decode_string_ret:
kv_destroy(stack);
kv_destroy(container_stack);
return ret;
}
#undef LENP
#undef POP
#undef OBJ
#undef DICT_LEN
static void positive_integer_to_special_typval(typval_T *rettv, uint64_t val)
{
if (val <= VARNUMBER_MAX) {
*rettv = (typval_T) {
.v_type = VAR_NUMBER,
.v_lock = VAR_UNLOCKED,
.vval = { .v_number = (varnumber_T)val },
};
} else {
list_T *const list = tv_list_alloc(4);
tv_list_ref(list);
create_special_dict(rettv, kMPInteger, ((typval_T) {
.v_type = VAR_LIST,
.v_lock = VAR_UNLOCKED,
.vval = { .v_list = list },
}));
tv_list_append_number(list, 1);
tv_list_append_number(list, (varnumber_T)((val >> 62) & 0x3));
tv_list_append_number(list, (varnumber_T)((val >> 31) & 0x7FFFFFFF));
tv_list_append_number(list, (varnumber_T)(val & 0x7FFFFFFF));
}
}
static void typval_parse_enter(mpack_parser_t *parser, mpack_node_t *node)
{
typval_T *result = NULL;
mpack_node_t *parent = MPACK_PARENT_NODE(node);
if (parent) {
switch (parent->tok.type) {
case MPACK_TOKEN_ARRAY: {
list_T *list = parent->data[1].p;
result = tv_list_append_owned_tv(list, (typval_T) { .v_type = VAR_UNKNOWN });
break;
}
case MPACK_TOKEN_MAP: {
typval_T(*items)[2] = parent->data[1].p;
result = &items[parent->pos][parent->key_visited];
break;
}
case MPACK_TOKEN_STR:
case MPACK_TOKEN_BIN:
case MPACK_TOKEN_EXT:
assert(node->tok.type == MPACK_TOKEN_CHUNK);
break;
default:
abort();
}
} else {
result = parser->data.p;
}
// for types that are completed in typval_parse_exit
node->data[0].p = result;
node->data[1].p = NULL; // free on error if non-NULL
switch (node->tok.type) {
case MPACK_TOKEN_NIL:
*result = (typval_T) {
.v_type = VAR_SPECIAL,
.v_lock = VAR_UNLOCKED,
.vval = { .v_special = kSpecialVarNull },
};
break;
case MPACK_TOKEN_BOOLEAN:
*result = (typval_T) {
.v_type = VAR_BOOL,
.v_lock = VAR_UNLOCKED,
.vval = {
.v_bool = mpack_unpack_boolean(node->tok) ? kBoolVarTrue : kBoolVarFalse
},
};
break;
case MPACK_TOKEN_SINT: {
*result = (typval_T) {
.v_type = VAR_NUMBER,
.v_lock = VAR_UNLOCKED,
.vval = { .v_number = mpack_unpack_sint(node->tok) },
};
break;
}
case MPACK_TOKEN_UINT:
positive_integer_to_special_typval(result, mpack_unpack_uint(node->tok));
break;
case MPACK_TOKEN_FLOAT:
*result = (typval_T) {
.v_type = VAR_FLOAT,
.v_lock = VAR_UNLOCKED,
.vval = { .v_float = mpack_unpack_float(node->tok) },
};
break;
case MPACK_TOKEN_BIN:
case MPACK_TOKEN_STR:
case MPACK_TOKEN_EXT:
// actually converted in typval_parse_exit after the data chunks
node->data[1].p = xmallocz(node->tok.length);
break;
case MPACK_TOKEN_CHUNK: {
char *data = parent->data[1].p;
memcpy(data + parent->pos,
node->tok.data.chunk_ptr, node->tok.length);
break;
}
case MPACK_TOKEN_ARRAY: {
list_T *const list = tv_list_alloc((ptrdiff_t)node->tok.length);
tv_list_ref(list);
*result = (typval_T) {
.v_type = VAR_LIST,
.v_lock = VAR_UNLOCKED,
.vval = { .v_list = list },
};
node->data[1].p = list;
break;
}
case MPACK_TOKEN_MAP:
// we don't know if this will be safe to convert to a typval dict yet
node->data[1].p = xmallocz(node->tok.length * 2 * sizeof(typval_T));
break;
}
}
/// Free node which was entered but never exited, due to a nested error
///
/// Don't bother with typvals as these will be GC:d eventually
void typval_parser_error_free(mpack_parser_t *parser)
{
for (uint32_t i = 0; i < parser->size; i++) {
mpack_node_t *node = &parser->items[i];
switch (node->tok.type) {
case MPACK_TOKEN_BIN:
case MPACK_TOKEN_STR:
case MPACK_TOKEN_EXT:
case MPACK_TOKEN_MAP:
XFREE_CLEAR(node->data[1].p);
break;
default:
break;
}
}
}
static void typval_parse_exit(mpack_parser_t *parser, mpack_node_t *node)
{
typval_T *result = node->data[0].p;
switch (node->tok.type) {
case MPACK_TOKEN_BIN:
case MPACK_TOKEN_STR:
*result = decode_string(node->data[1].p, node->tok.length, false, true);
node->data[1].p = NULL;
break;
case MPACK_TOKEN_EXT: {
list_T *const list = tv_list_alloc(2);
tv_list_ref(list);
tv_list_append_number(list, node->tok.data.ext_type);
list_T *const ext_val_list = tv_list_alloc(kListLenMayKnow);
tv_list_append_list(list, ext_val_list);
create_special_dict(result, kMPExt, ((typval_T) { .v_type = VAR_LIST,
.v_lock = VAR_UNLOCKED,
.vval = { .v_list = list } }));
// TODO(bfredl): why not use BLOB?
encode_list_write((void *)ext_val_list, node->data[1].p, node->tok.length);
XFREE_CLEAR(node->data[1].p);
}
break;
case MPACK_TOKEN_MAP: {
typval_T(*items)[2] = node->data[1].p;
for (size_t i = 0; i < node->tok.length; i++) {
typval_T *key = &items[i][0];
if (key->v_type != VAR_STRING
|| key->vval.v_string == NULL
|| key->vval.v_string[0] == NUL) {
goto msgpack_to_vim_generic_map;
}
}
dict_T *const dict = tv_dict_alloc();
dict->dv_refcount++;
*result = (typval_T) {
.v_type = VAR_DICT,
.v_lock = VAR_UNLOCKED,
.vval = { .v_dict = dict },
};
for (size_t i = 0; i < node->tok.length; i++) {
char *key = items[i][0].vval.v_string;
size_t keylen = strlen(key);
dictitem_T *const di = xmallocz(offsetof(dictitem_T, di_key) + keylen);
memcpy(&di->di_key[0], key, keylen);
di->di_tv.v_type = VAR_UNKNOWN;
if (tv_dict_add(dict, di) == FAIL) {
// Duplicate key: fallback to generic map
TV_DICT_ITER(dict, d, {
d->di_tv.v_type = VAR_SPECIAL; // don't free values in tv_clear(), they will be reused
d->di_tv.vval.v_special = kSpecialVarNull;
});
tv_clear(result);
xfree(di);
goto msgpack_to_vim_generic_map;
}
di->di_tv = items[i][1];
}
for (size_t i = 0; i < node->tok.length; i++) {
xfree(items[i][0].vval.v_string);
}
XFREE_CLEAR(node->data[1].p);
break;
msgpack_to_vim_generic_map: {}
list_T *const list = decode_create_map_special_dict(result, node->tok.length);
for (size_t i = 0; i < node->tok.length; i++) {
list_T *const kv_pair = tv_list_alloc(2);
tv_list_append_list(list, kv_pair);
tv_list_append_owned_tv(kv_pair, items[i][0]);
tv_list_append_owned_tv(kv_pair, items[i][1]);
}
XFREE_CLEAR(node->data[1].p);
break;
}
default:
// other kinds are handled completely in typval_parse_enter
break;
}
}
int mpack_parse_typval(mpack_parser_t *parser, const char **data, size_t *size)
{
return mpack_parse(parser, data, size, typval_parse_enter, typval_parse_exit);
}
int unpack_typval(const char **data, size_t *size, typval_T *ret)
{
ret->v_type = VAR_UNKNOWN;
mpack_parser_t parser;
mpack_parser_init(&parser, 0);
parser.data.p = ret;
int status = mpack_parse_typval(&parser, data, size);
if (status != MPACK_OK) {
typval_parser_error_free(&parser);
tv_clear(ret);
}
return status;
}