neovim/src/nvim/eval/decode.c

#include <assert.h>
#include <stdbool.h>
#include <stddef.h>
#include <stdint.h>
#include <stdlib.h>
#include <string.h>

#include "klib/kvec.h"
#include "mpack/object.h"
#include "nvim/ascii_defs.h"
#include "nvim/charset.h"
#include "nvim/eval.h"
#include "nvim/eval/decode.h"
#include "nvim/eval/encode.h"
#include "nvim/eval/typval.h"
#include "nvim/eval/typval_defs.h"
#include "nvim/eval_defs.h"
#include "nvim/garray.h"
#include "nvim/gettext_defs.h"
#include "nvim/macros_defs.h"
#include "nvim/mbyte.h"
#include "nvim/memory.h"
#include "nvim/message.h"
#include "nvim/types_defs.h"
#include "nvim/vim_defs.h"

/// Helper structure for container_struct
typedef struct {
  size_t stack_index;   ///< Index of current container in stack.
  list_T *special_val;  ///< _VAL key contents for special maps.
                        ///< When container is not a special dictionary it is
                        ///< NULL.
  const char *s;        ///< Location where container starts.
  typval_T container;   ///< Container. Either VAR_LIST, VAR_DICT or VAR_LIST
                        ///< which is _VAL from special dictionary.
} ContainerStackItem;

/// Helper structure for values struct
typedef struct {
  bool is_special_string;  ///< Indicates that current value is a special
                           ///< dictionary with string.
  bool didcomma;           ///< True if previous token was comma.
  bool didcolon;           ///< True if previous token was colon.
  typval_T val;            ///< Actual value.
} ValuesStackItem;

/// Vector containing values not yet saved in any container
typedef kvec_t(ValuesStackItem) ValuesStack;

/// Vector containing containers, each next container is located inside previous
typedef kvec_t(ContainerStackItem) ContainerStack;

#ifdef INCLUDE_GENERATED_DECLARATIONS
# include "eval/decode.c.generated.h"
#endif

/// Create special dictionary
///
/// @param[out]  rettv  Location where created dictionary will be saved.
/// @param[in]  type  Type of the dictionary.
/// @param[in]  val  Value associated with the _VAL key.
static inline void create_special_dict(typval_T *const rettv, const MessagePackType type,
                                       typval_T val)
  FUNC_ATTR_NONNULL_ALL
{
  dict_T *const dict = tv_dict_alloc();
  dictitem_T *const type_di = tv_dict_item_alloc_len(S_LEN("_TYPE"));
  type_di->di_tv.v_type = VAR_LIST;
  type_di->di_tv.v_lock = VAR_UNLOCKED;
  type_di->di_tv.vval.v_list = (list_T *)eval_msgpack_type_lists[type];
  tv_list_ref(type_di->di_tv.vval.v_list);
  tv_dict_add(dict, type_di);
  dictitem_T *const val_di = tv_dict_item_alloc_len(S_LEN("_VAL"));
  val_di->di_tv = val;
  tv_dict_add(dict, val_di);
  dict->dv_refcount++;
  *rettv = (typval_T) {
    .v_type = VAR_DICT,
    .v_lock = VAR_UNLOCKED,
    .vval = { .v_dict = dict },
  };
}

#define DICT_LEN(dict) (dict)->dv_hashtab.ht_used

/// Helper function used for working with stack vectors used by JSON decoder
///
/// @param[in,out]  obj  New object. Will either be put into the stack (and,
///                      probably, also inside container) or freed.
/// @param[out]  stack  Object stack.
/// @param[out]  container_stack  Container objects stack.
/// @param[in,out]  pp  Position in string which is currently being parsed. Used
///                     for error reporting and is also set when decoding is
///                     restarted due to the necessity of converting regular
///                     dictionary to a special map.
/// @param[out]  next_map_special  Is set to true when dictionary needs to be
///                                converted to a special map, otherwise not
///                                touched. Indicates that decoding has been
///                                restarted.
/// @param[out]  didcomma  True if previous token was comma. Is set to recorded
///                        value when decoder is restarted, otherwise unused.
/// @param[out]  didcolon  True if previous token was colon. Is set to recorded
///                        value when decoder is restarted, otherwise unused.
///
/// @return OK in case of success, FAIL in case of error.
static inline int json_decoder_pop(ValuesStackItem obj, ValuesStack *const stack,
                                   ContainerStack *const container_stack, const char **const pp,
                                   bool *const next_map_special, bool *const didcomma,
                                   bool *const didcolon)
  FUNC_ATTR_NONNULL_ALL
{
  if (kv_size(*container_stack) == 0) {
    kv_push(*stack, obj);
    return OK;
  }
  ContainerStackItem last_container = kv_last(*container_stack);
  const char *val_location = *pp;
  if (obj.val.v_type == last_container.container.v_type
      // vval.v_list and vval.v_dict should have the same size and offset
      && ((void *)obj.val.vval.v_list
          == (void *)last_container.container.vval.v_list)) {
    (void)kv_pop(*container_stack);
    val_location = last_container.s;
    last_container = kv_last(*container_stack);
  }
  if (last_container.container.v_type == VAR_LIST) {
    if (tv_list_len(last_container.container.vval.v_list) != 0
        && !obj.didcomma) {
      semsg(_("E474: Expected comma before list item: %s"), val_location);
      tv_clear(&obj.val);
      return FAIL;
    }
    assert(last_container.special_val == NULL);
    tv_list_append_owned_tv(last_container.container.vval.v_list, obj.val);
  } else if (last_container.stack_index == kv_size(*stack) - 2) {
    if (!obj.didcolon) {
      semsg(_("E474: Expected colon before dictionary value: %s"),
            val_location);
      tv_clear(&obj.val);
      return FAIL;
    }
    ValuesStackItem key = kv_pop(*stack);
    if (last_container.special_val == NULL) {
      // These cases should have already been handled.
      assert(!(key.is_special_string || key.val.vval.v_string == NULL));
      dictitem_T *const obj_di = tv_dict_item_alloc(key.val.vval.v_string);
      tv_clear(&key.val);
      if (tv_dict_add(last_container.container.vval.v_dict, obj_di)
          == FAIL) {
        abort();
      }
      obj_di->di_tv = obj.val;
    } else {
      list_T *const kv_pair = tv_list_alloc(2);
      tv_list_append_list(last_container.special_val, kv_pair);
      tv_list_append_owned_tv(kv_pair, key.val);
      tv_list_append_owned_tv(kv_pair, obj.val);
    }
  } else {
    // Object with key only
    if (!obj.is_special_string && obj.val.v_type != VAR_STRING) {
      semsg(_("E474: Expected string key: %s"), *pp);
      tv_clear(&obj.val);
      return FAIL;
    } else if (!obj.didcomma
               && (last_container.special_val == NULL
                   && (DICT_LEN(last_container.container.vval.v_dict) != 0))) {
      semsg(_("E474: Expected comma before dictionary key: %s"), val_location);
      tv_clear(&obj.val);
      return FAIL;
    }
    // Handle special dictionaries
    if (last_container.special_val == NULL
        && (obj.is_special_string
            || obj.val.vval.v_string == NULL
            || tv_dict_find(last_container.container.vval.v_dict, obj.val.vval.v_string, -1))) {
      tv_clear(&obj.val);

      // Restart
      (void)kv_pop(*container_stack);
      ValuesStackItem last_container_val =
        kv_A(*stack, last_container.stack_index);
      while (kv_size(*stack) > last_container.stack_index) {
        tv_clear(&(kv_pop(*stack).val));
      }
      *pp = last_container.s;
      *didcomma = last_container_val.didcomma;
      *didcolon = last_container_val.didcolon;
      *next_map_special = true;
      return OK;
    }
    kv_push(*stack, obj);
  }
  return OK;
}

#define LENP(p, e) \
  ((int)((e) - (p))), (p)
#define OBJ(obj_tv, is_sp_string, didcomma_, didcolon_) \
  ((ValuesStackItem) { \
    .is_special_string = (is_sp_string), \
    .val = (obj_tv), \
    .didcomma = (didcomma_), \
    .didcolon = (didcolon_), \
  })

#define POP(obj_tv, is_sp_string) \
  do { \
    if (json_decoder_pop(OBJ(obj_tv, is_sp_string, *didcomma, *didcolon), \
                         stack, container_stack, \
                         &p, next_map_special, didcomma, didcolon) \
        == FAIL) { \
      goto parse_json_string_fail; \
    } \
    if (*next_map_special) { \
      goto parse_json_string_ret; \
    } \
  } while (0)

/// Create a new special dictionary that ought to represent a MAP
///
/// @param[out]  ret_tv  Address where new special dictionary is saved.
/// @param[in]  len  Expected number of items to be populated before list
///                  becomes accessible from Vimscript. It is still valid to
///                  underpopulate a list, value only controls how many elements
///                  will be allocated in advance. @see ListLenSpecials.
///
/// @return [allocated] list which should contain key-value pairs. Return value
///                     may be safely ignored.
list_T *decode_create_map_special_dict(typval_T *const ret_tv, const ptrdiff_t len)
  FUNC_ATTR_NONNULL_ALL
{
  list_T *const list = tv_list_alloc(len);
  tv_list_ref(list);
  create_special_dict(ret_tv, kMPMap, ((typval_T) {
    .v_type = VAR_LIST,
    .v_lock = VAR_UNLOCKED,
    .vval = { .v_list = list },
  }));
  return list;
}

/// Convert char* string to typval_T
///
/// Depending on whether string has (no) NUL bytes, it may use a special
/// dictionary, VAR_BLOB, or decode string to VAR_STRING.
///
/// @param[in]  s  String to decode.
/// @param[in]  len  String length.
/// @param[in]  force_blob  whether string always should be decoded as a blob,
///                         or only when embedded NUL bytes were present
/// @param[in]  s_allocated  If true, then `s` was allocated and can be saved in
///                          a returned structure. If it is not saved there, it
///                          will be freed.
///
/// @return Decoded string.
typval_T decode_string(const char *const s, const size_t len, bool force_blob,
                       const bool s_allocated)
  FUNC_ATTR_WARN_UNUSED_RESULT
{
  assert(s != NULL || len == 0);
  const bool use_blob = force_blob || ((s != NULL) && (memchr(s, NUL, len) != NULL));
  if (use_blob) {
    typval_T tv;
    tv.v_lock = VAR_UNLOCKED;
    blob_T *b = tv_blob_alloc_ret(&tv);
    if (s_allocated) {
      b->bv_ga.ga_data = (void *)s;
      b->bv_ga.ga_len = (int)len;
      b->bv_ga.ga_maxlen = (int)len;
    } else {
      ga_concat_len(&b->bv_ga, s, len);
    }
    return tv;
  }
  return (typval_T) {
    .v_type = VAR_STRING,
    .v_lock = VAR_UNLOCKED,
    .vval = { .v_string = ((s == NULL || s_allocated) ? (char *)s : xmemdupz(s, len)) },
  };
}

/// Parse JSON double-quoted string
///
/// @param[in]  buf  Buffer being converted.
/// @param[in]  buf_len  Length of the buffer.
/// @param[in,out]  pp  Pointer to the start of the string. Must point to '"'.
///                     Is advanced to the closing '"'. Also see
///                     json_decoder_pop(), it may set pp to another location
///                     and alter next_map_special, didcomma and didcolon.
/// @param[out]  stack  Object stack.
/// @param[out]  container_stack  Container objects stack.
/// @param[out]  next_map_special  Is set to true when dictionary is converted
///                                to a special map, otherwise not touched.
/// @param[out]  didcomma  True if previous token was comma. Is set to recorded
///                        value when decoder is restarted, otherwise unused.
/// @param[out]  didcolon  True if previous token was colon. Is set to recorded
///                        value when decoder is restarted, otherwise unused.
///
/// @return OK in case of success, FAIL in case of error.
static inline int parse_json_string(const char *const buf, const size_t buf_len,
                                    const char **const pp, ValuesStack *const stack,
                                    ContainerStack *const container_stack,
                                    bool *const next_map_special, bool *const didcomma,
                                    bool *const didcolon)
  FUNC_ATTR_NONNULL_ALL FUNC_ATTR_WARN_UNUSED_RESULT FUNC_ATTR_ALWAYS_INLINE
{
  const char *const e = buf + buf_len;
  const char *p = *pp;
  size_t len = 0;
  const char *const s = ++p;
  int ret = OK;
  while (p < e && *p != '"') {
    if (*p == '\\') {
      p++;
      if (p == e) {
        semsg(_("E474: Unfinished escape sequence: %.*s"),
              (int)buf_len, buf);
        goto parse_json_string_fail;
      }
      switch (*p) {
      case 'u':
        if (p + 4 >= e) {
          semsg(_("E474: Unfinished unicode escape sequence: %.*s"),
                (int)buf_len, buf);
          goto parse_json_string_fail;
        } else if (!ascii_isxdigit(p[1])
                   || !ascii_isxdigit(p[2])
                   || !ascii_isxdigit(p[3])
                   || !ascii_isxdigit(p[4])) {
          semsg(_("E474: Expected four hex digits after \\u: %.*s"),
                LENP(p - 1, e));
          goto parse_json_string_fail;
        }
        // One UTF-8 character below U+10000 can take up to 3 bytes,
        // above up to 6, but they are encoded using two \u escapes.
        len += 3;
        p += 5;
        break;
      case '\\':
      case '/':
      case '"':
      case 't':
      case 'b':
      case 'n':
      case 'r':
      case 'f':
        len++;
        p++;
        break;
      default:
        semsg(_("E474: Unknown escape sequence: %.*s"), LENP(p - 1, e));
        goto parse_json_string_fail;
      }
    } else {
      uint8_t p_byte = (uint8_t)(*p);
      // unescaped = %x20-21 / %x23-5B / %x5D-10FFFF
      if (p_byte < 0x20) {
        semsg(_("E474: ASCII control characters cannot be present "
                "inside string: %.*s"), LENP(p, e));
        goto parse_json_string_fail;
      }
      const int ch = utf_ptr2char(p);
      // All characters above U+007F are encoded using two or more bytes
      // and thus cannot possibly be equal to *p. But utf_ptr2char({0xFF,
      // 0}) will return 0xFF, even though 0xFF cannot start any UTF-8
      // code point at all.
      //
      // The only exception is U+00C3 which is represented as 0xC3 0x83.
      if (ch >= 0x80 && p_byte == ch
          && !(ch == 0xC3 && p + 1 < e && (uint8_t)p[1] == 0x83)) {
        semsg(_("E474: Only UTF-8 strings allowed: %.*s"), LENP(p, e));
        goto parse_json_string_fail;
      } else if (ch > 0x10FFFF) {
        semsg(_("E474: Only UTF-8 code points up to U+10FFFF "
                "are allowed to appear unescaped: %.*s"), LENP(p, e));
        goto parse_json_string_fail;
      }
      const size_t ch_len = (size_t)utf_char2len(ch);
      assert(ch_len == (size_t)(ch ? utf_ptr2len(p) : 1));
      len += ch_len;
      p += ch_len;
    }
  }
  if (p == e || *p != '"') {
    semsg(_("E474: Expected string end: %.*s"), (int)buf_len, buf);
    goto parse_json_string_fail;
  }
  char *str = xmalloc(len + 1);
  int fst_in_pair = 0;
  char *str_end = str;
#define PUT_FST_IN_PAIR(fst_in_pair, str_end) \
  do { \
    if ((fst_in_pair) != 0) { \
      (str_end) += utf_char2bytes(fst_in_pair, (str_end)); \
      (fst_in_pair) = 0; \
    } \
  } while (0)
  for (const char *t = s; t < p; t++) {
    if (t[0] != '\\' || t[1] != 'u') {
      PUT_FST_IN_PAIR(fst_in_pair, str_end);
    }
    if (*t == '\\') {
      t++;
      switch (*t) {
      case 'u': {
        const char ubuf[] = { t[1], t[2], t[3], t[4] };
        t += 4;
        uvarnumber_T ch;
        vim_str2nr(ubuf, NULL, NULL,
                   STR2NR_HEX | STR2NR_FORCE, NULL, &ch, 4, true, NULL);
        if (SURROGATE_HI_START <= ch && ch <= SURROGATE_HI_END) {
          PUT_FST_IN_PAIR(fst_in_pair, str_end);
          fst_in_pair = (int)ch;
        } else if (SURROGATE_LO_START <= ch && ch <= SURROGATE_LO_END
                   && fst_in_pair != 0) {
          const int full_char = ((int)(ch - SURROGATE_LO_START)
                                 + ((fst_in_pair - SURROGATE_HI_START) << 10)
                                 + SURROGATE_FIRST_CHAR);
          str_end += utf_char2bytes(full_char, str_end);
          fst_in_pair = 0;
        } else {
          PUT_FST_IN_PAIR(fst_in_pair, str_end);
          str_end += utf_char2bytes((int)ch, str_end);
        }
        break;
      }
      case '\\':
      case '/':
      case '"':
      case 't':
      case 'b':
      case 'n':
      case 'r':
      case 'f': {
        static const char escapes[] = {
          ['\\'] = '\\',
          ['/'] = '/',
          ['"'] = '"',
          ['t'] = TAB,
          ['b'] = BS,
          ['n'] = NL,
          ['r'] = CAR,
          ['f'] = FF,
        };
        *str_end++ = escapes[(int)(*t)];
        break;
      }
      default:
        abort();
      }
    } else {
      *str_end++ = *t;
    }
  }
  PUT_FST_IN_PAIR(fst_in_pair, str_end);
#undef PUT_FST_IN_PAIR
  *str_end = NUL;
  typval_T obj = decode_string(str, (size_t)(str_end - str), false, true);
  POP(obj, obj.v_type != VAR_STRING);
  goto parse_json_string_ret;
parse_json_string_fail:
  ret = FAIL;
parse_json_string_ret:
  *pp = p;
  return ret;
}

#undef POP

/// Parse JSON number: both floating-point and integer
///
/// Number format: `-?\d+(?:.\d+)?(?:[eE][+-]?\d+)?`.
///
/// @param[in]  buf  Buffer being converted.
/// @param[in]  buf_len  Length of the buffer.
/// @param[in,out]  pp  Pointer to the start of the number. Must point to
///                     a digit or a minus sign. Is advanced to the last
///                     character of the number. Also see json_decoder_pop(), it
///                     may set pp to another location and alter
///                     next_map_special, didcomma and didcolon.
/// @param[out]  stack  Object stack.
/// @param[out]  container_stack  Container objects stack.
/// @param[out]  next_map_special  Is set to true when dictionary is converted
///                                to a special map, otherwise not touched.
/// @param[out]  didcomma  True if previous token was comma. Is set to recorded
///                        value when decoder is restarted, otherwise unused.
/// @param[out]  didcolon  True if previous token was colon. Is set to recorded
///                        value when decoder is restarted, otherwise unused.
///
/// @return OK in case of success, FAIL in case of error.
static inline int parse_json_number(const char *const buf, const size_t buf_len,
                                    const char **const pp, ValuesStack *const stack,
                                    ContainerStack *const container_stack,
                                    bool *const next_map_special, bool *const didcomma,
                                    bool *const didcolon)
  FUNC_ATTR_NONNULL_ALL FUNC_ATTR_WARN_UNUSED_RESULT FUNC_ATTR_ALWAYS_INLINE
{
  const char *const e = buf + buf_len;
  const char *p = *pp;
  int ret = OK;
  const char *const s = p;
  const char *ints = NULL;
  const char *fracs = NULL;
  const char *exps = NULL;
  const char *exps_s = NULL;
  if (*p == '-') {
    p++;
  }
  ints = p;
  if (p >= e) {
    goto parse_json_number_check;
  }
  while (p < e && ascii_isdigit(*p)) {
    p++;
  }
  if (p != ints + 1 && *ints == '0') {
    semsg(_("E474: Leading zeroes are not allowed: %.*s"), LENP(s, e));
    goto parse_json_number_fail;
  }
  if (p >= e || p == ints) {
    goto parse_json_number_check;
  }
  if (*p == '.') {
    p++;
    fracs = p;
    while (p < e && ascii_isdigit(*p)) {
      p++;
    }
    if (p >= e || p == fracs) {
      goto parse_json_number_check;
    }
  }
  if (*p == 'e' || *p == 'E') {
    p++;
    exps_s = p;
    if (p < e && (*p == '-' || *p == '+')) {
      p++;
    }
    exps = p;
    while (p < e && ascii_isdigit(*p)) {
      p++;
    }
  }
parse_json_number_check:
  if (p == ints) {
    semsg(_("E474: Missing number after minus sign: %.*s"), LENP(s, e));
    goto parse_json_number_fail;
  } else if (p == fracs || (fracs != NULL && exps_s == fracs + 1)) {
    semsg(_("E474: Missing number after decimal dot: %.*s"), LENP(s, e));
    goto parse_json_number_fail;
  } else if (p == exps) {
    semsg(_("E474: Missing exponent: %.*s"), LENP(s, e));
    goto parse_json_number_fail;
  }
  typval_T tv = {
    .v_type = VAR_NUMBER,
    .v_lock = VAR_UNLOCKED,
  };
  const size_t exp_num_len = (size_t)(p - s);
  if (fracs || exps) {
    // Convert floating-point number
    const size_t num_len = string2float(s, &tv.vval.v_float);
    if (exp_num_len != num_len) {
      semsg(_("E685: internal error: while converting number \"%.*s\" "
              "to float string2float consumed %zu bytes in place of %zu"),
            (int)exp_num_len, s, num_len, exp_num_len);
    }
    tv.v_type = VAR_FLOAT;
  } else {
    // Convert integer
    varnumber_T nr;
    int num_len;
    vim_str2nr(s, NULL, &num_len, 0, &nr, NULL, (int)(p - s), true, NULL);
    if ((int)exp_num_len != num_len) {
      semsg(_("E685: internal error: while converting number \"%.*s\" "
              "to integer vim_str2nr consumed %i bytes in place of %zu"),
            (int)exp_num_len, s, num_len, exp_num_len);
    }
    tv.vval.v_number = nr;
  }
  if (json_decoder_pop(OBJ(tv, false, *didcomma, *didcolon),
                       stack, container_stack,
                       &p, next_map_special, didcomma, didcolon) == FAIL) {
    goto parse_json_number_fail;
  }
  if (*next_map_special) {
    goto parse_json_number_ret;
  }
  p--;
  goto parse_json_number_ret;
parse_json_number_fail:
  ret = FAIL;
parse_json_number_ret:
  *pp = p;
  return ret;
}

#define POP(obj_tv, is_sp_string) \
  do { \
    if (json_decoder_pop(OBJ(obj_tv, is_sp_string, didcomma, didcolon), \
                         &stack, &container_stack, \
                         &p, &next_map_special, &didcomma, &didcolon) \
        == FAIL) { \
      goto json_decode_string_fail; \
    } \
    if (next_map_special) { \
      goto json_decode_string_cycle_start; \
    } \
  } while (0)

/// Convert JSON string into Vimscript object
///
/// @param[in]  buf  String to convert. UTF-8 encoding is assumed.
/// @param[in]  buf_len  Length of the string.
/// @param[out]  rettv  Location where to save results.
///
/// @return OK in case of success, FAIL otherwise.
int json_decode_string(const char *const buf, const size_t buf_len, typval_T *const rettv)
  FUNC_ATTR_NONNULL_ALL FUNC_ATTR_WARN_UNUSED_RESULT
{
  const char *p = buf;
  const char *const e = buf + buf_len;
  while (p < e && (*p == ' ' || *p == TAB || *p == NL || *p == CAR)) {
    p++;
  }
  if (p == e) {
    emsg(_("E474: Attempt to decode a blank string"));
    return FAIL;
  }
  int ret = OK;
  ValuesStack stack = KV_INITIAL_VALUE;
  ContainerStack container_stack = KV_INITIAL_VALUE;
  rettv->v_type = VAR_UNKNOWN;
  bool didcomma = false;
  bool didcolon = false;
  bool next_map_special = false;
  for (; p < e; p++) {
json_decode_string_cycle_start:
    assert(*p == '{' || next_map_special == false);
    switch (*p) {
    case '}':
    case ']': {
      if (kv_size(container_stack) == 0) {
        semsg(_("E474: No container to close: %.*s"), LENP(p, e));
        goto json_decode_string_fail;
      }
      ContainerStackItem last_container = kv_last(container_stack);
      if (*p == '}' && last_container.container.v_type != VAR_DICT) {
        semsg(_("E474: Closing list with curly bracket: %.*s"), LENP(p, e));
        goto json_decode_string_fail;
      } else if (*p == ']' && last_container.container.v_type != VAR_LIST) {
        semsg(_("E474: Closing dictionary with square bracket: %.*s"),
              LENP(p, e));
        goto json_decode_string_fail;
      } else if (didcomma) {
        semsg(_("E474: Trailing comma: %.*s"), LENP(p, e));
        goto json_decode_string_fail;
      } else if (didcolon) {
        semsg(_("E474: Expected value after colon: %.*s"), LENP(p, e));
        goto json_decode_string_fail;
      } else if (last_container.stack_index != kv_size(stack) - 1) {
        assert(last_container.stack_index < kv_size(stack) - 1);
        semsg(_("E474: Expected value: %.*s"), LENP(p, e));
        goto json_decode_string_fail;
      }
      if (kv_size(stack) == 1) {
        p++;
        (void)kv_pop(container_stack);
        goto json_decode_string_after_cycle;
      } else {
        if (json_decoder_pop(kv_pop(stack), &stack, &container_stack, &p,
                             &next_map_special, &didcomma, &didcolon)
            == FAIL) {
          goto json_decode_string_fail;
        }
        assert(!next_map_special);
        break;
      }
    }
    case ',': {
      if (kv_size(container_stack) == 0) {
        semsg(_("E474: Comma not inside container: %.*s"), LENP(p, e));
        goto json_decode_string_fail;
      }
      ContainerStackItem last_container = kv_last(container_stack);
      if (didcomma) {
        semsg(_("E474: Duplicate comma: %.*s"), LENP(p, e));
        goto json_decode_string_fail;
      } else if (didcolon) {
        semsg(_("E474: Comma after colon: %.*s"), LENP(p, e));
        goto json_decode_string_fail;
      } else if (last_container.container.v_type == VAR_DICT
                 && last_container.stack_index != kv_size(stack) - 1) {
        semsg(_("E474: Using comma in place of colon: %.*s"), LENP(p, e));
        goto json_decode_string_fail;
      } else if (last_container.special_val == NULL
                 ? (last_container.container.v_type == VAR_DICT
                    ? (DICT_LEN(last_container.container.vval.v_dict) == 0)
                    : (tv_list_len(last_container.container.vval.v_list)
                       == 0))
                 : (tv_list_len(last_container.special_val) == 0)) {
        semsg(_("E474: Leading comma: %.*s"), LENP(p, e));
        goto json_decode_string_fail;
      }
      didcomma = true;
      continue;
    }
    case ':': {
      if (kv_size(container_stack) == 0) {
        semsg(_("E474: Colon not inside container: %.*s"), LENP(p, e));
        goto json_decode_string_fail;
      }
      ContainerStackItem last_container = kv_last(container_stack);
      if (last_container.container.v_type != VAR_DICT) {
        semsg(_("E474: Using colon not in dictionary: %.*s"), LENP(p, e));
        goto json_decode_string_fail;
      } else if (last_container.stack_index != kv_size(stack) - 2) {
        semsg(_("E474: Unexpected colon: %.*s"), LENP(p, e));
        goto json_decode_string_fail;
      } else if (didcomma) {
        semsg(_("E474: Colon after comma: %.*s"), LENP(p, e));
        goto json_decode_string_fail;
      } else if (didcolon) {
        semsg(_("E474: Duplicate colon: %.*s"), LENP(p, e));
        goto json_decode_string_fail;
      }
      didcolon = true;
      continue;
    }
    case ' ':
    case TAB:
    case NL:
    case CAR:
      continue;
    case 'n':
      if ((p + 3) >= e || strncmp(p + 1, "ull", 3) != 0) {
        semsg(_("E474: Expected null: %.*s"), LENP(p, e));
        goto json_decode_string_fail;
      }
      p += 3;
      POP(((typval_T) {
        .v_type = VAR_SPECIAL,
        .v_lock = VAR_UNLOCKED,
        .vval = { .v_special = kSpecialVarNull },
      }), false);
      break;
    case 't':
      if ((p + 3) >= e || strncmp(p + 1, "rue", 3) != 0) {
        semsg(_("E474: Expected true: %.*s"), LENP(p, e));
        goto json_decode_string_fail;
      }
      p += 3;
      POP(((typval_T) {
        .v_type = VAR_BOOL,
        .v_lock = VAR_UNLOCKED,
        .vval = { .v_bool = kBoolVarTrue },
      }), false);
      break;
    case 'f':
      if ((p + 4) >= e || strncmp(p + 1, "alse", 4) != 0) {
        semsg(_("E474: Expected false: %.*s"), LENP(p, e));
        goto json_decode_string_fail;
      }
      p += 4;
      POP(((typval_T) {
        .v_type = VAR_BOOL,
        .v_lock = VAR_UNLOCKED,
        .vval = { .v_bool = kBoolVarFalse },
      }), false);
      break;
    case '"':
      if (parse_json_string(buf, buf_len, &p, &stack, &container_stack,
                            &next_map_special, &didcomma, &didcolon)
          == FAIL) {
        // Error message was already given
        goto json_decode_string_fail;
      }
      if (next_map_special) {
        goto json_decode_string_cycle_start;
      }
      break;
    case '-':
    case '0':
    case '1':
    case '2':
    case '3':
    case '4':
    case '5':
    case '6':
    case '7':
    case '8':
    case '9':
      if (parse_json_number(buf, buf_len, &p, &stack, &container_stack,
                            &next_map_special, &didcomma, &didcolon)
          == FAIL) {
        // Error message was already given
        goto json_decode_string_fail;
      }
      if (next_map_special) {
        goto json_decode_string_cycle_start;
      }
      break;
    case '[': {
      list_T *list = tv_list_alloc(kListLenMayKnow);
      tv_list_ref(list);
      typval_T tv = {
        .v_type = VAR_LIST,
        .v_lock = VAR_UNLOCKED,
        .vval = { .v_list = list },
      };
      kv_push(container_stack, ((ContainerStackItem) { .stack_index = kv_size(stack),
                                                       .s = p,
                                                       .container = tv,
                                                       .special_val = NULL }));
      kv_push(stack, OBJ(tv, false, didcomma, didcolon));
      break;
    }
    case '{': {
      typval_T tv;
      list_T *val_list = NULL;
      if (next_map_special) {
        next_map_special = false;
        val_list = decode_create_map_special_dict(&tv, kListLenMayKnow);
      } else {
        dict_T *dict = tv_dict_alloc();
        dict->dv_refcount++;
        tv = (typval_T) {
          .v_type = VAR_DICT,
          .v_lock = VAR_UNLOCKED,
          .vval = { .v_dict = dict },
        };
      }
      kv_push(container_stack, ((ContainerStackItem) { .stack_index = kv_size(stack),
                                                       .s = p,
                                                       .container = tv,
                                                       .special_val = val_list }));
      kv_push(stack, OBJ(tv, false, didcomma, didcolon));
      break;
    }
    default:
      semsg(_("E474: Unidentified byte: %.*s"), LENP(p, e));
      goto json_decode_string_fail;
    }
    didcomma = false;
    didcolon = false;
    if (kv_size(container_stack) == 0) {
      p++;
      break;
    }
  }
json_decode_string_after_cycle:
  for (; p < e; p++) {
    switch (*p) {
    case NL:
    case ' ':
    case TAB:
    case CAR:
      break;
    default:
      semsg(_("E474: Trailing characters: %.*s"), LENP(p, e));
      goto json_decode_string_fail;
    }
  }
  if (kv_size(stack) == 1 && kv_size(container_stack) == 0) {
    *rettv = kv_pop(stack).val;
    goto json_decode_string_ret;
  }
  semsg(_("E474: Unexpected end of input: %.*s"), (int)buf_len, buf);
json_decode_string_fail:
  ret = FAIL;
  while (kv_size(stack)) {
    tv_clear(&(kv_pop(stack).val));
  }
json_decode_string_ret:
  kv_destroy(stack);
  kv_destroy(container_stack);
  return ret;
}

#undef LENP
#undef POP

#undef OBJ

#undef DICT_LEN

static void positive_integer_to_special_typval(typval_T *rettv, uint64_t val)
{
  if (val <= VARNUMBER_MAX) {
    *rettv = (typval_T) {
      .v_type = VAR_NUMBER,
      .v_lock = VAR_UNLOCKED,
      .vval = { .v_number = (varnumber_T)val },
    };
  } else {
    list_T *const list = tv_list_alloc(4);
    tv_list_ref(list);
    create_special_dict(rettv, kMPInteger, ((typval_T) {
      .v_type = VAR_LIST,
      .v_lock = VAR_UNLOCKED,
      .vval = { .v_list = list },
    }));
    tv_list_append_number(list, 1);
    tv_list_append_number(list, (varnumber_T)((val >> 62) & 0x3));
    tv_list_append_number(list, (varnumber_T)((val >> 31) & 0x7FFFFFFF));
    tv_list_append_number(list, (varnumber_T)(val & 0x7FFFFFFF));
  }
}

static void typval_parse_enter(mpack_parser_t *parser, mpack_node_t *node)
{
  typval_T *result = NULL;

  mpack_node_t *parent = MPACK_PARENT_NODE(node);
  if (parent) {
    switch (parent->tok.type) {
    case MPACK_TOKEN_ARRAY: {
      list_T *list = parent->data[1].p;
      result = tv_list_append_owned_tv(list, (typval_T) { .v_type = VAR_UNKNOWN });
      break;
    }
    case MPACK_TOKEN_MAP: {
      typval_T(*items)[2] = parent->data[1].p;
      result = &items[parent->pos][parent->key_visited];
      break;
    }

    case MPACK_TOKEN_STR:
    case MPACK_TOKEN_BIN:
    case MPACK_TOKEN_EXT:
      assert(node->tok.type == MPACK_TOKEN_CHUNK);
      break;

    default:
      abort();
    }
  } else {
    result = parser->data.p;
  }

  // for types that are completed in typval_parse_exit
  node->data[0].p = result;
  node->data[1].p = NULL;  // free on error if non-NULL

  switch (node->tok.type) {
  case MPACK_TOKEN_NIL:
    *result = (typval_T) {
      .v_type = VAR_SPECIAL,
      .v_lock = VAR_UNLOCKED,
      .vval = { .v_special = kSpecialVarNull },
    };
    break;
  case MPACK_TOKEN_BOOLEAN:
    *result = (typval_T) {
      .v_type = VAR_BOOL,
      .v_lock = VAR_UNLOCKED,
      .vval = {
        .v_bool = mpack_unpack_boolean(node->tok) ? kBoolVarTrue : kBoolVarFalse
      },
    };
    break;
  case MPACK_TOKEN_SINT: {
    *result = (typval_T) {
      .v_type = VAR_NUMBER,
      .v_lock = VAR_UNLOCKED,
      .vval = { .v_number = mpack_unpack_sint(node->tok) },
    };
    break;
  }
  case MPACK_TOKEN_UINT:
    positive_integer_to_special_typval(result, mpack_unpack_uint(node->tok));
    break;
  case MPACK_TOKEN_FLOAT:
    *result = (typval_T) {
      .v_type = VAR_FLOAT,
      .v_lock = VAR_UNLOCKED,
      .vval = { .v_float = mpack_unpack_float(node->tok) },
    };
    break;

  case MPACK_TOKEN_BIN:
  case MPACK_TOKEN_STR:
  case MPACK_TOKEN_EXT:
    // actually converted in typval_parse_exit after the data chunks
    node->data[1].p = xmallocz(node->tok.length);
    break;
  case MPACK_TOKEN_CHUNK: {
    char *data = parent->data[1].p;
    memcpy(data + parent->pos,
           node->tok.data.chunk_ptr, node->tok.length);
    break;
  }

  case MPACK_TOKEN_ARRAY: {
    list_T *const list = tv_list_alloc((ptrdiff_t)node->tok.length);
    tv_list_ref(list);
    *result = (typval_T) {
      .v_type = VAR_LIST,
      .v_lock = VAR_UNLOCKED,
      .vval = { .v_list = list },
    };
    node->data[1].p = list;
    break;
  }
  case MPACK_TOKEN_MAP:
    // we don't know if this will be safe to convert to a typval dict yet
    node->data[1].p = xmallocz(node->tok.length * 2 * sizeof(typval_T));
    break;
  }
}

/// Free node which was entered but never exited, due to a nested error
///
/// Don't bother with typvals as these will be GC:d eventually
void typval_parser_error_free(mpack_parser_t *parser)
{
  for (uint32_t i = 0; i < parser->size; i++) {
    mpack_node_t *node = &parser->items[i];
    switch (node->tok.type) {
    case MPACK_TOKEN_BIN:
    case MPACK_TOKEN_STR:
    case MPACK_TOKEN_EXT:
    case MPACK_TOKEN_MAP:
      XFREE_CLEAR(node->data[1].p);
      break;
    default:
      break;
    }
  }
}

static void typval_parse_exit(mpack_parser_t *parser, mpack_node_t *node)
{
  typval_T *result = node->data[0].p;
  switch (node->tok.type) {
  case MPACK_TOKEN_BIN:
  case MPACK_TOKEN_STR:
    *result = decode_string(node->data[1].p, node->tok.length, false, true);
    node->data[1].p = NULL;
    break;

  case MPACK_TOKEN_EXT: {
    list_T *const list = tv_list_alloc(2);
    tv_list_ref(list);
    tv_list_append_number(list, node->tok.data.ext_type);
    list_T *const ext_val_list = tv_list_alloc(kListLenMayKnow);
    tv_list_append_list(list, ext_val_list);
    create_special_dict(result, kMPExt, ((typval_T) { .v_type = VAR_LIST,
                                                      .v_lock = VAR_UNLOCKED,
                                                      .vval = { .v_list = list } }));
    // TODO(bfredl): why not use BLOB?
    encode_list_write((void *)ext_val_list, node->data[1].p, node->tok.length);
    XFREE_CLEAR(node->data[1].p);
  }
  break;

  case MPACK_TOKEN_MAP: {
    typval_T(*items)[2] = node->data[1].p;
    for (size_t i = 0; i < node->tok.length; i++) {
      typval_T *key = &items[i][0];
      if (key->v_type != VAR_STRING
          || key->vval.v_string == NULL
          || key->vval.v_string[0] == NUL) {
        goto msgpack_to_vim_generic_map;
      }
    }
    dict_T *const dict = tv_dict_alloc();
    dict->dv_refcount++;
    *result = (typval_T) {
      .v_type = VAR_DICT,
      .v_lock = VAR_UNLOCKED,
      .vval = { .v_dict = dict },
    };
    for (size_t i = 0; i < node->tok.length; i++) {
      char *key = items[i][0].vval.v_string;
      size_t keylen = strlen(key);
      dictitem_T *const di = xmallocz(offsetof(dictitem_T, di_key) + keylen);
      memcpy(&di->di_key[0], key, keylen);
      di->di_tv.v_type = VAR_UNKNOWN;
      if (tv_dict_add(dict, di) == FAIL) {
        // Duplicate key: fallback to generic map
        TV_DICT_ITER(dict, d, {
            d->di_tv.v_type = VAR_SPECIAL;  // don't free values in tv_clear(), they will be reused
            d->di_tv.vval.v_special = kSpecialVarNull;
          });
        tv_clear(result);
        xfree(di);
        goto msgpack_to_vim_generic_map;
      }
      di->di_tv = items[i][1];
    }
    for (size_t i = 0; i < node->tok.length; i++) {
      xfree(items[i][0].vval.v_string);
    }
    XFREE_CLEAR(node->data[1].p);
    break;
msgpack_to_vim_generic_map: {}
    list_T *const list = decode_create_map_special_dict(result, node->tok.length);
    for (size_t i = 0; i < node->tok.length; i++) {
      list_T *const kv_pair = tv_list_alloc(2);
      tv_list_append_list(list, kv_pair);

      tv_list_append_owned_tv(kv_pair, items[i][0]);
      tv_list_append_owned_tv(kv_pair, items[i][1]);
    }
    XFREE_CLEAR(node->data[1].p);
    break;
  }

  default:
    // other kinds are handled completely in typval_parse_enter
    break;
  }
}

int mpack_parse_typval(mpack_parser_t *parser, const char **data, size_t *size)
{
  return mpack_parse(parser, data, size, typval_parse_enter, typval_parse_exit);
}

int unpack_typval(const char **data, size_t *size, typval_T *ret)
{
  ret->v_type = VAR_UNKNOWN;
  mpack_parser_t parser;
  mpack_parser_init(&parser, 0);
  parser.data.p = ret;
  int status = mpack_parse_typval(&parser, data, size);
  if (status != MPACK_OK) {
    typval_parser_error_free(&parser);
    tv_clear(ret);
  }
  return status;
}