refactor(shada): rework msgpack decoding without msgpack-c

This also makes shada reading slightly faster due to avoiding
some copying and allocation.

Use keysets to drive decoding of msgpack maps for shada entries.
This commit is contained in:
bfredl
2024-07-02 13:45:50 +02:00
parent 0c2860d9e5
commit f926cc32c9
42 changed files with 1025 additions and 1165 deletions

View File

@@ -1,5 +1,4 @@
#include <assert.h>
#include <msgpack/object.h>
#include <stdbool.h>
#include <stddef.h>
#include <stdint.h>
@@ -7,6 +6,7 @@
#include <string.h>
#include "klib/kvec.h"
#include "mpack/object.h"
#include "nvim/ascii_defs.h"
#include "nvim/charset.h"
#include "nvim/eval.h"
@@ -885,173 +885,250 @@ json_decode_string_ret:
#undef DICT_LEN
/// Convert msgpack object to a Vimscript one
int msgpack_to_vim(const msgpack_object mobj, typval_T *const rettv)
FUNC_ATTR_NONNULL_ALL FUNC_ATTR_WARN_UNUSED_RESULT
static void positive_integer_to_special_typval(typval_T *rettv, uint64_t val)
{
switch (mobj.type) {
case MSGPACK_OBJECT_NIL:
if (val <= VARNUMBER_MAX) {
*rettv = (typval_T) {
.v_type = VAR_NUMBER,
.v_lock = VAR_UNLOCKED,
.vval = { .v_number = (varnumber_T)val },
};
} else {
list_T *const list = tv_list_alloc(4);
tv_list_ref(list);
create_special_dict(rettv, kMPInteger, ((typval_T) {
.v_type = VAR_LIST,
.v_lock = VAR_UNLOCKED,
.vval = { .v_list = list },
}));
tv_list_append_number(list, 1);
tv_list_append_number(list, (varnumber_T)((val >> 62) & 0x3));
tv_list_append_number(list, (varnumber_T)((val >> 31) & 0x7FFFFFFF));
tv_list_append_number(list, (varnumber_T)(val & 0x7FFFFFFF));
}
}
static void typval_parse_enter(mpack_parser_t *parser, mpack_node_t *node)
{
typval_T *result = NULL;
mpack_node_t *parent = MPACK_PARENT_NODE(node);
if (parent) {
switch (parent->tok.type) {
case MPACK_TOKEN_ARRAY: {
list_T *list = parent->data[1].p;
result = tv_list_append_owned_tv(list, (typval_T) { .v_type = VAR_UNKNOWN });
break;
}
case MPACK_TOKEN_MAP: {
typval_T(*items)[2] = parent->data[1].p;
result = &items[parent->pos][parent->key_visited];
break;
}
case MPACK_TOKEN_STR:
case MPACK_TOKEN_BIN:
case MPACK_TOKEN_EXT:
assert(node->tok.type == MPACK_TOKEN_CHUNK);
break;
default:
abort();
}
} else {
result = parser->data.p;
}
// for types that are completed in typval_parse_exit
node->data[0].p = result;
node->data[1].p = NULL; // free on error if non-NULL
switch (node->tok.type) {
case MPACK_TOKEN_NIL:
*result = (typval_T) {
.v_type = VAR_SPECIAL,
.v_lock = VAR_UNLOCKED,
.vval = { .v_special = kSpecialVarNull },
};
break;
case MSGPACK_OBJECT_BOOLEAN:
*rettv = (typval_T) {
case MPACK_TOKEN_BOOLEAN:
*result = (typval_T) {
.v_type = VAR_BOOL,
.v_lock = VAR_UNLOCKED,
.vval = {
.v_bool = mobj.via.boolean ? kBoolVarTrue : kBoolVarFalse
.v_bool = mpack_unpack_boolean(node->tok) ? kBoolVarTrue : kBoolVarFalse
},
};
break;
case MSGPACK_OBJECT_POSITIVE_INTEGER:
if (mobj.via.u64 <= VARNUMBER_MAX) {
*rettv = (typval_T) {
.v_type = VAR_NUMBER,
.v_lock = VAR_UNLOCKED,
.vval = { .v_number = (varnumber_T)mobj.via.u64 },
};
} else {
list_T *const list = tv_list_alloc(4);
tv_list_ref(list);
create_special_dict(rettv, kMPInteger, ((typval_T) {
.v_type = VAR_LIST,
.v_lock = VAR_UNLOCKED,
.vval = { .v_list = list },
}));
uint64_t n = mobj.via.u64;
tv_list_append_number(list, 1);
tv_list_append_number(list, (varnumber_T)((n >> 62) & 0x3));
tv_list_append_number(list, (varnumber_T)((n >> 31) & 0x7FFFFFFF));
tv_list_append_number(list, (varnumber_T)(n & 0x7FFFFFFF));
}
break;
case MSGPACK_OBJECT_NEGATIVE_INTEGER:
if (mobj.via.i64 >= VARNUMBER_MIN) {
*rettv = (typval_T) {
.v_type = VAR_NUMBER,
.v_lock = VAR_UNLOCKED,
.vval = { .v_number = (varnumber_T)mobj.via.i64 },
};
} else {
list_T *const list = tv_list_alloc(4);
tv_list_ref(list);
create_special_dict(rettv, kMPInteger, ((typval_T) {
.v_type = VAR_LIST,
.v_lock = VAR_UNLOCKED,
.vval = { .v_list = list },
}));
uint64_t n = -((uint64_t)mobj.via.i64);
tv_list_append_number(list, -1);
tv_list_append_number(list, (varnumber_T)((n >> 62) & 0x3));
tv_list_append_number(list, (varnumber_T)((n >> 31) & 0x7FFFFFFF));
tv_list_append_number(list, (varnumber_T)(n & 0x7FFFFFFF));
}
break;
case MSGPACK_OBJECT_FLOAT32:
case MSGPACK_OBJECT_FLOAT64:
*rettv = (typval_T) {
.v_type = VAR_FLOAT,
case MPACK_TOKEN_SINT: {
*result = (typval_T) {
.v_type = VAR_NUMBER,
.v_lock = VAR_UNLOCKED,
.vval = { .v_float = mobj.via.f64 },
.vval = { .v_number = mpack_unpack_sint(node->tok) },
};
break;
case MSGPACK_OBJECT_STR:
case MSGPACK_OBJECT_BIN:
*rettv = decode_string(mobj.via.bin.ptr, mobj.via.bin.size, false, false);
}
case MPACK_TOKEN_UINT:
positive_integer_to_special_typval(result, mpack_unpack_uint(node->tok));
break;
case MSGPACK_OBJECT_ARRAY: {
list_T *const list = tv_list_alloc((ptrdiff_t)mobj.via.array.size);
case MPACK_TOKEN_FLOAT:
*result = (typval_T) {
.v_type = VAR_FLOAT,
.v_lock = VAR_UNLOCKED,
.vval = { .v_float = mpack_unpack_float(node->tok) },
};
break;
case MPACK_TOKEN_BIN:
case MPACK_TOKEN_STR:
case MPACK_TOKEN_EXT:
// actually converted in typval_parse_exit after the data chunks
node->data[1].p = xmallocz(node->tok.length);
break;
case MPACK_TOKEN_CHUNK: {
char *data = parent->data[1].p;
memcpy(data + parent->pos,
node->tok.data.chunk_ptr, node->tok.length);
break;
}
case MPACK_TOKEN_ARRAY: {
list_T *const list = tv_list_alloc((ptrdiff_t)node->tok.length);
tv_list_ref(list);
*rettv = (typval_T) {
*result = (typval_T) {
.v_type = VAR_LIST,
.v_lock = VAR_UNLOCKED,
.vval = { .v_list = list },
};
for (size_t i = 0; i < mobj.via.array.size; i++) {
// Not populated yet, need to create list item to push.
tv_list_append_owned_tv(list, (typval_T) { .v_type = VAR_UNKNOWN });
if (msgpack_to_vim(mobj.via.array.ptr[i],
TV_LIST_ITEM_TV(tv_list_last(list)))
== FAIL) {
return FAIL;
}
}
node->data[1].p = list;
break;
}
case MSGPACK_OBJECT_MAP: {
for (size_t i = 0; i < mobj.via.map.size; i++) {
if ((mobj.via.map.ptr[i].key.type != MSGPACK_OBJECT_STR
&& mobj.via.map.ptr[i].key.type != MSGPACK_OBJECT_BIN)
|| mobj.via.map.ptr[i].key.via.str.size == 0
|| memchr(mobj.via.map.ptr[i].key.via.str.ptr, NUL,
mobj.via.map.ptr[i].key.via.str.size) != NULL) {
case MPACK_TOKEN_MAP:
// we don't know if this will be safe to convert to a typval dict yet
node->data[1].p = xmallocz(node->tok.length * 2 * sizeof(typval_T));
break;
}
}
/// free node which was entered but never exited, due to a nested error
///
/// Don't bother with typvals as these will be GC:d eventually
void typval_parser_error_free(mpack_parser_t *parser)
{
for (uint32_t i = 0; i < parser->size; i++) {
mpack_node_t *node = &parser->items[i];
switch (node->tok.type) {
case MPACK_TOKEN_BIN:
case MPACK_TOKEN_STR:
case MPACK_TOKEN_EXT:
case MPACK_TOKEN_MAP:
XFREE_CLEAR(node->data[1].p);
break;
default:
break;
}
}
}
static void typval_parse_exit(mpack_parser_t *parser, mpack_node_t *node)
{
typval_T *result = node->data[0].p;
switch (node->tok.type) {
case MPACK_TOKEN_BIN:
case MPACK_TOKEN_STR:
*result = decode_string(node->data[1].p, node->tok.length, false, true);
node->data[1].p = NULL;
break;
case MPACK_TOKEN_EXT: {
list_T *const list = tv_list_alloc(2);
tv_list_ref(list);
tv_list_append_number(list, node->tok.data.ext_type);
list_T *const ext_val_list = tv_list_alloc(kListLenMayKnow);
tv_list_append_list(list, ext_val_list);
create_special_dict(result, kMPExt, ((typval_T) { .v_type = VAR_LIST,
.v_lock = VAR_UNLOCKED,
.vval = { .v_list = list } }));
// TODO(bfredl): why not use BLOB?
encode_list_write((void *)ext_val_list, node->data[1].p, node->tok.length);
XFREE_CLEAR(node->data[1].p);
}
break;
case MPACK_TOKEN_MAP: {
typval_T(*items)[2] = node->data[1].p;
for (size_t i = 0; i < node->tok.length; i++) {
typval_T *key = &items[i][0];
if (key->v_type != VAR_STRING
|| key->vval.v_string == NULL
|| key->vval.v_string[0] == NUL) {
goto msgpack_to_vim_generic_map;
}
}
dict_T *const dict = tv_dict_alloc();
dict->dv_refcount++;
*rettv = (typval_T) {
*result = (typval_T) {
.v_type = VAR_DICT,
.v_lock = VAR_UNLOCKED,
.vval = { .v_dict = dict },
};
for (size_t i = 0; i < mobj.via.map.size; i++) {
dictitem_T *const di = xmallocz(offsetof(dictitem_T, di_key)
+ mobj.via.map.ptr[i].key.via.str.size);
memcpy(&di->di_key[0], mobj.via.map.ptr[i].key.via.str.ptr,
mobj.via.map.ptr[i].key.via.str.size);
for (size_t i = 0; i < node->tok.length; i++) {
char *key = items[i][0].vval.v_string;
size_t keylen = strlen(key);
dictitem_T *const di = xmallocz(offsetof(dictitem_T, di_key) + keylen);
memcpy(&di->di_key[0], key, keylen);
di->di_tv.v_type = VAR_UNKNOWN;
if (tv_dict_add(dict, di) == FAIL) {
// Duplicate key: fallback to generic map
tv_clear(rettv);
TV_DICT_ITER(dict, d, {
d->di_tv.v_type = VAR_SPECIAL; // don't free values in tv_clear(), they will be reused
d->di_tv.vval.v_special = kSpecialVarNull;
});
tv_clear(result);
xfree(di);
goto msgpack_to_vim_generic_map;
}
if (msgpack_to_vim(mobj.via.map.ptr[i].val, &di->di_tv) == FAIL) {
return FAIL;
}
di->di_tv = items[i][1];
}
for (size_t i = 0; i < node->tok.length; i++) {
xfree(items[i][0].vval.v_string);
}
XFREE_CLEAR(node->data[1].p);
break;
msgpack_to_vim_generic_map: {}
list_T *const list = decode_create_map_special_dict(rettv, (ptrdiff_t)mobj.via.map.size);
for (size_t i = 0; i < mobj.via.map.size; i++) {
list_T *const list = decode_create_map_special_dict(result, node->tok.length);
for (size_t i = 0; i < node->tok.length; i++) {
list_T *const kv_pair = tv_list_alloc(2);
tv_list_append_list(list, kv_pair);
typval_T key_tv = { .v_type = VAR_UNKNOWN };
if (msgpack_to_vim(mobj.via.map.ptr[i].key, &key_tv) == FAIL) {
tv_clear(&key_tv);
return FAIL;
}
tv_list_append_owned_tv(kv_pair, key_tv);
tv_list_append_owned_tv(kv_pair, items[i][0]);
tv_list_append_owned_tv(kv_pair, items[i][1]);
}
XFREE_CLEAR(node->data[1].p);
break;
}
typval_T val_tv = { .v_type = VAR_UNKNOWN };
if (msgpack_to_vim(mobj.via.map.ptr[i].val, &val_tv) == FAIL) {
tv_clear(&val_tv);
return FAIL;
}
tv_list_append_owned_tv(kv_pair, val_tv);
}
default:
// other kinds are handled completely in typval_parse_enter,
break;
}
case MSGPACK_OBJECT_EXT: {
list_T *const list = tv_list_alloc(2);
tv_list_ref(list);
tv_list_append_number(list, mobj.via.ext.type);
list_T *const ext_val_list = tv_list_alloc(kListLenMayKnow);
tv_list_append_list(list, ext_val_list);
create_special_dict(rettv, kMPExt, ((typval_T) { .v_type = VAR_LIST,
.v_lock = VAR_UNLOCKED,
.vval = { .v_list = list } }));
if (encode_list_write((void *)ext_val_list, mobj.via.ext.ptr,
mobj.via.ext.size) == -1) {
return FAIL;
}
break;
}
}
return OK;
}
int mpack_parse_typval(mpack_parser_t *parser, const char **data, size_t *size)
{
return mpack_parse(parser, data, size, typval_parse_enter, typval_parse_exit);
}
int unpack_typval(const char **data, size_t *size, typval_T *ret)
{
ret->v_type = VAR_UNKNOWN;
mpack_parser_t parser;
mpack_parser_init(&parser, 0);
parser.data.p = ret;
int status = mpack_parse_typval(&parser, data, size);
if (status != MPACK_OK) {
typval_parser_error_free(&parser);
tv_clear(ret);
}
return status;
}