feat(json): vim.json.encode() sort_keys #35574

Problem: There is no way to ensure a stable key order when encoding a JSON string,
which can be useful for comparisons and producing cleaner diffs.

Solution: Introduce a `sort_keys` option for `vim.json.encode()`,which
is disabled by default. When enabled, object keys are sorted in
alphabetical order.
This commit is contained in:
skewb1k
2025-09-14 07:17:07 +03:00
committed by GitHub
parent 05a511996f
commit a897cc17a5
5 changed files with 272 additions and 52 deletions

View File

@@ -3391,6 +3391,8 @@ vim.json.encode({obj}, {opts}) *vim.json.encode()*
• indent: (string) (default "") String used for indentation at • indent: (string) (default "") String used for indentation at
each nesting level. If non-empty enables newlines and a each nesting level. If non-empty enables newlines and a
space after colons. space after colons.
• sort_keys: (boolean) (default false) Sort object keys in
alphabetical order.
Return: ~ Return: ~
(`string`) (`string`)

View File

@@ -266,6 +266,7 @@ LUA
• |vim.list.bisect()| for binary search. • |vim.list.bisect()| for binary search.
• Experimental `vim.pos` and `vim.range` for Position/Range abstraction. • Experimental `vim.pos` and `vim.range` for Position/Range abstraction.
• |vim.json.encode()| has an `indent` option for pretty-formatting. • |vim.json.encode()| has an `indent` option for pretty-formatting.
• |vim.json.encode()| has an `sort_keys` option.
OPTIONS OPTIONS

View File

@@ -56,5 +56,7 @@ function vim.json.decode(str, opts) end
--- characters "/" in string values. --- characters "/" in string values.
--- - indent: (string) (default "") String used for indentation at each nesting level. --- - indent: (string) (default "") String used for indentation at each nesting level.
--- If non-empty enables newlines and a space after colons. --- If non-empty enables newlines and a space after colons.
--- - sort_keys: (boolean) (default false) Sort object
--- keys in alphabetical order.
---@return string ---@return string
function vim.json.encode(obj, opts) end function vim.json.encode(obj, opts) end

217
src/cjson/lua_cjson.c vendored
View File

@@ -88,6 +88,7 @@
#define DEFAULT_ENCODE_ESCAPE_FORWARD_SLASH 1 #define DEFAULT_ENCODE_ESCAPE_FORWARD_SLASH 1
#define DEFAULT_ENCODE_SKIP_UNSUPPORTED_VALUE_TYPES 0 #define DEFAULT_ENCODE_SKIP_UNSUPPORTED_VALUE_TYPES 0
#define DEFAULT_ENCODE_INDENT NULL #define DEFAULT_ENCODE_INDENT NULL
#define DEFAULT_ENCODE_SORT_KEYS 0
#ifdef DISABLE_INVALID_NUMBERS #ifdef DISABLE_INVALID_NUMBERS
#undef DEFAULT_DECODE_INVALID_NUMBERS #undef DEFAULT_DECODE_INVALID_NUMBERS
@@ -152,6 +153,32 @@ static const char *json_token_type_name[] = {
NULL NULL
}; };
typedef struct {
strbuf_t *buf;
size_t offset;
size_t length;
int raw_typ;
union {
lua_Number number;
const char *string;
} raw;
} key_entry_t;
/* Stores all keys for a table when key sorting is enabled.
* - buf: buffer holding serialized key strings
* - keys: array of key_entry_t pointing into buf
* - size: number of keys stored
* - capacity: allocated capacity of keys array
*/
typedef struct {
strbuf_t buf;
key_entry_t *keys;
size_t size;
size_t capacity;
} keybuf_t;
#define KEYBUF_DEFAULT_CAPACITY 32
typedef struct { typedef struct {
json_token_type_t ch2token[256]; json_token_type_t ch2token[256];
char escape2char[256]; /* Decoding */ char escape2char[256]; /* Decoding */
@@ -160,6 +187,10 @@ typedef struct {
* encode_keep_buffer is set */ * encode_keep_buffer is set */
strbuf_t encode_buf; strbuf_t encode_buf;
/* encode_keybuf is only allocated and used when
* sort_keys is set */
keybuf_t encode_keybuf;
int encode_sparse_convert; int encode_sparse_convert;
int encode_sparse_ratio; int encode_sparse_ratio;
int encode_sparse_safe; int encode_sparse_safe;
@@ -170,6 +201,7 @@ typedef struct {
int encode_empty_table_as_object; int encode_empty_table_as_object;
int encode_escape_forward_slash; int encode_escape_forward_slash;
const char *encode_indent; const char *encode_indent;
int encode_sort_keys;
int decode_invalid_numbers; int decode_invalid_numbers;
int decode_max_depth; int decode_max_depth;
@@ -180,6 +212,11 @@ typedef struct {
typedef struct { typedef struct {
const char **char2escape[256]; const char **char2escape[256];
const char *indent; const char *indent;
int sort_keys;
/* keybuf is only allocated and used when
* sort_keys is set */
keybuf_t keybuf;
} json_encode_options_t; } json_encode_options_t;
typedef struct { typedef struct {
@@ -522,6 +559,17 @@ static int json_cfg_encode_escape_forward_slash(lua_State *l)
} }
*/ */
/*
static int json_cfg_encode_sort_keys(lua_State *l)
{
json_config_t *cfg = json_arg_init(l, 1);
json_enum_option(l, 1, &cfg->encode_sort_keys, NULL, 1);
return 1;
}
*/
static int json_destroy_config(lua_State *l) static int json_destroy_config(lua_State *l)
{ {
json_config_t *cfg; json_config_t *cfg;
@@ -565,6 +613,7 @@ static void json_create_config(lua_State *l)
cfg->encode_escape_forward_slash = DEFAULT_ENCODE_ESCAPE_FORWARD_SLASH; cfg->encode_escape_forward_slash = DEFAULT_ENCODE_ESCAPE_FORWARD_SLASH;
cfg->encode_skip_unsupported_value_types = DEFAULT_ENCODE_SKIP_UNSUPPORTED_VALUE_TYPES; cfg->encode_skip_unsupported_value_types = DEFAULT_ENCODE_SKIP_UNSUPPORTED_VALUE_TYPES;
cfg->encode_indent = DEFAULT_ENCODE_INDENT; cfg->encode_indent = DEFAULT_ENCODE_INDENT;
cfg->encode_sort_keys = DEFAULT_ENCODE_SORT_KEYS;
#if DEFAULT_ENCODE_KEEP_BUFFER > 0 #if DEFAULT_ENCODE_KEEP_BUFFER > 0
strbuf_init(&cfg->encode_buf, 0); strbuf_init(&cfg->encode_buf, 0);
@@ -623,23 +672,27 @@ static void json_encode_exception(lua_State *l, json_encode_t *ctx, int lindex,
{ {
if (!ctx->cfg->encode_keep_buffer) if (!ctx->cfg->encode_keep_buffer)
strbuf_free(ctx->json); strbuf_free(ctx->json);
if (ctx->options->sort_keys) {
strbuf_free(&ctx->options->keybuf.buf);
free(ctx->options->keybuf.keys);
}
luaL_error(l, "Cannot serialise %s: %s", luaL_error(l, "Cannot serialise %s: %s",
lua_typename(l, lua_type(l, lindex)), reason); lua_typename(l, lua_type(l, lindex)), reason);
} }
/* json_append_string args: static void json_append_string_contents(lua_State *l, json_encode_t *ctx,
* - lua_State int lindex, int use_keybuf)
* - JSON strbuf
* - String (Lua stack index)
*
* Returns nothing. Doesn't remove string from Lua stack */
static void json_append_string(lua_State *l, json_encode_t *ctx, int lindex)
{ {
const char *escstr; const char *escstr;
const char *str; const char *str;
size_t len; size_t len;
size_t i; size_t i;
strbuf_t *json = ctx->json; strbuf_t *json = ctx->json;
if (use_keybuf) {
json = &ctx->options->keybuf.buf;
}
str = lua_tolstring(l, lindex, &len); str = lua_tolstring(l, lindex, &len);
@@ -647,11 +700,10 @@ static void json_append_string(lua_State *l, json_encode_t *ctx, int lindex)
* This buffer is reused constantly for small strings * This buffer is reused constantly for small strings
* If there are any excess pages, they won't be hit anyway. * If there are any excess pages, they won't be hit anyway.
* This gains ~5% speedup. */ * This gains ~5% speedup. */
if (len > SIZE_MAX / 6 - 3) if (len >= SIZE_MAX / 6)
abort(); /* Overflow check */ abort(); /* Overflow check */
strbuf_ensure_empty_length(json, len * 6 + 2); strbuf_ensure_empty_length(json, len * 6);
strbuf_append_char_unsafe(json, '\"');
for (i = 0; i < len; i++) { for (i = 0; i < len; i++) {
escstr = (*ctx->options->char2escape)[(unsigned char)str[i]]; escstr = (*ctx->options->char2escape)[(unsigned char)str[i]];
if (escstr) if (escstr)
@@ -659,7 +711,19 @@ static void json_append_string(lua_State *l, json_encode_t *ctx, int lindex)
else else
strbuf_append_char_unsafe(json, str[i]); strbuf_append_char_unsafe(json, str[i]);
} }
strbuf_append_char_unsafe(json, '\"'); }
/* json_append_string args:
* - lua_State
* - JSON encode ctx
* - String (Lua stack index)
*
* Returns nothing. Doesn't remove string from Lua stack */
static void json_append_string(lua_State *l, json_encode_t *ctx, int lindex)
{
strbuf_append_char(ctx->json, '\"');
json_append_string_contents(l, ctx, lindex, false);
strbuf_append_char(ctx->json, '\"');
} }
/* Find the size of the array on the top of the Lua stack /* Find the size of the array on the top of the Lua stack
@@ -795,7 +859,7 @@ static void json_append_array(lua_State *l, json_encode_t *ctx, int current_dept
} }
static void json_append_number(lua_State *l, json_encode_t *ctx, static void json_append_number(lua_State *l, json_encode_t *ctx,
int lindex) int lindex, int use_keybuf)
{ {
int len; int len;
#if LUA_VERSION_NUM >= 503 #if LUA_VERSION_NUM >= 503
@@ -810,6 +874,9 @@ static void json_append_number(lua_State *l, json_encode_t *ctx,
double num = lua_tonumber(l, lindex); double num = lua_tonumber(l, lindex);
json_config_t *cfg = ctx->cfg; json_config_t *cfg = ctx->cfg;
strbuf_t *json = ctx->json; strbuf_t *json = ctx->json;
if (use_keybuf) {
json = &ctx->options->keybuf.buf;
}
if (cfg->encode_invalid_numbers == 0) { if (cfg->encode_invalid_numbers == 0) {
/* Prevent encoding invalid numbers */ /* Prevent encoding invalid numbers */
@@ -843,6 +910,18 @@ static void json_append_number(lua_State *l, json_encode_t *ctx,
strbuf_extend_length(json, len); strbuf_extend_length(json, len);
} }
/* Compare key_entry_t for qsort. */
static int cmp_key_entries(const void *a, const void *b) {
const key_entry_t *ka = a;
const key_entry_t *kb = b;
int res = memcmp(ka->buf->buf + ka->offset,
kb->buf->buf + kb->offset,
ka->length < kb->length ? ka->length : kb->length);
if (res == 0)
return (ka->length - kb->length);
return res;
}
static void json_append_object(lua_State *l, json_encode_t *ctx, static void json_append_object(lua_State *l, json_encode_t *ctx,
int current_depth) int current_depth)
{ {
@@ -853,17 +932,91 @@ static void json_append_object(lua_State *l, json_encode_t *ctx,
/* Object */ /* Object */
strbuf_append_char(json, '{'); strbuf_append_char(json, '{');
lua_pushnil(l);
/* table, startkey */
comma = 0; comma = 0;
lua_pushnil(l);
if (ctx->options->sort_keys) {
keybuf_t *keybuf = &ctx->options->keybuf;
size_t init_keybuf_size = keybuf->size;
size_t init_keybuf_length = strbuf_length(&keybuf->buf);
while (lua_next(l, -2) != 0) {
has_items = 1;
if (keybuf->size == keybuf->capacity) {
keybuf->capacity *= 2;
key_entry_t *tmp = realloc(keybuf->keys,
keybuf->capacity * sizeof(key_entry_t));
if (!tmp)
json_encode_exception(l, ctx, -1, "out of memory");
keybuf->keys = tmp;
}
keytype = lua_type(l, -2);
key_entry_t key_entry = {
.buf = &keybuf->buf,
.offset = strbuf_length(&keybuf->buf),
.raw_typ = keytype,
};
if (keytype == LUA_TSTRING) {
json_append_string_contents(l, ctx, -2, true);
key_entry.raw.string = lua_tostring(l, -2);
} else if (keytype == LUA_TNUMBER) {
json_append_number(l, ctx, -2, true);
key_entry.raw.number = lua_tointeger(l, -2);
} else {
json_encode_exception(l, ctx, -2,
"table key must be number or string");
}
key_entry.length = strbuf_length(&keybuf->buf) - key_entry.offset;
keybuf->keys[keybuf->size++] = key_entry;
lua_pop(l, 1);
}
size_t keys_count = keybuf->size - init_keybuf_size;
qsort(keybuf->keys + init_keybuf_size, keys_count,
sizeof (key_entry_t), cmp_key_entries);
for (size_t i = init_keybuf_size; i < init_keybuf_size + keys_count; i++) {
key_entry_t *current_key = &keybuf->keys[i];
json_pos = strbuf_length(json);
if (comma++ > 0)
strbuf_append_char(json, ',');
if (ctx->options->indent)
json_append_newline_and_indent(json, ctx, current_depth);
strbuf_ensure_empty_length(json, current_key->length + 3);
strbuf_append_char_unsafe(json, '"');
strbuf_append_mem_unsafe(json, keybuf->buf.buf + current_key->offset,
current_key->length);
strbuf_append_mem_unsafe(json, "\":", 2);
if (ctx->options->indent)
strbuf_append_char(json, ' ');
if (current_key->raw_typ == LUA_TSTRING)
lua_pushstring(l, current_key->raw.string);
else
lua_pushnumber(l, current_key->raw.number);
lua_gettable(l, -2);
err = json_append_data(l, ctx, current_depth);
if (err) {
strbuf_set_length(json, json_pos);
if (comma == 1)
comma = 0;
}
lua_pop(l, 1);
}
/* resize encode_keybuf to reuse allocated memory for forward keys */
strbuf_set_length(&keybuf->buf, init_keybuf_length);
keybuf->size = init_keybuf_size;
} else {
while (lua_next(l, -2) != 0) { while (lua_next(l, -2) != 0) {
has_items = 1; has_items = 1;
json_pos = strbuf_length(json); json_pos = strbuf_length(json);
if (comma++ > 0) if (comma++ > 0)
strbuf_append_char(json, ','); strbuf_append_char(json, ',');
else
comma = 1;
if (ctx->options->indent) if (ctx->options->indent)
json_append_newline_and_indent(json, ctx, current_depth); json_append_newline_and_indent(json, ctx, current_depth);
@@ -872,7 +1025,7 @@ static void json_append_object(lua_State *l, json_encode_t *ctx,
keytype = lua_type(l, -2); keytype = lua_type(l, -2);
if (keytype == LUA_TNUMBER) { if (keytype == LUA_TNUMBER) {
strbuf_append_char(json, '"'); strbuf_append_char(json, '"');
json_append_number(l, ctx, -2); json_append_number(l, ctx, -2, false);
strbuf_append_mem(json, "\":", 2); strbuf_append_mem(json, "\":", 2);
} else if (keytype == LUA_TSTRING) { } else if (keytype == LUA_TSTRING) {
json_append_string(l, ctx, -2); json_append_string(l, ctx, -2);
@@ -889,14 +1042,14 @@ static void json_append_object(lua_State *l, json_encode_t *ctx,
err = json_append_data(l, ctx, current_depth); err = json_append_data(l, ctx, current_depth);
if (err) { if (err) {
strbuf_set_length(json, json_pos); strbuf_set_length(json, json_pos);
if (comma == 1) { if (comma == 1)
comma = 0; comma = 0;
} }
}
lua_pop(l, 1); lua_pop(l, 1);
/* table, key */ /* table, key */
} }
}
if (has_items && ctx->options->indent) if (has_items && ctx->options->indent)
json_append_newline_and_indent(json, ctx, current_depth-1); json_append_newline_and_indent(json, ctx, current_depth-1);
@@ -921,7 +1074,7 @@ static int json_append_data(lua_State *l, json_encode_t *ctx,
json_append_string(l, ctx, -1); json_append_string(l, ctx, -1);
break; break;
case LUA_TNUMBER: case LUA_TNUMBER:
json_append_number(l, ctx, -1); json_append_number(l, ctx, -1, false);
break; break;
case LUA_TBOOLEAN: case LUA_TBOOLEAN:
if (lua_toboolean(l, -1)) if (lua_toboolean(l, -1))
@@ -1029,6 +1182,7 @@ static int json_encode(lua_State *l)
json_encode_options_t options = { json_encode_options_t options = {
.char2escape = { char2escape }, .char2escape = { char2escape },
.indent = DEFAULT_ENCODE_INDENT, .indent = DEFAULT_ENCODE_INDENT,
.sort_keys = DEFAULT_ENCODE_SORT_KEYS,
}; };
json_encode_t ctx = { .options = &options, .cfg = cfg }; json_encode_t ctx = { .options = &options, .cfg = cfg };
strbuf_t local_encode_buf; strbuf_t local_encode_buf;
@@ -1063,6 +1217,23 @@ static int json_encode(lua_State *l)
options.indent = luaL_checkstring(l, -1); options.indent = luaL_checkstring(l, -1);
if (options.indent[0] == '\0') options.indent = NULL; if (options.indent[0] == '\0') options.indent = NULL;
} }
lua_pop(l, 1);
lua_getfield(l, 2, "sort_keys");
if (!lua_isnil(l, -1)) {
luaL_checktype(l, -1, LUA_TBOOLEAN);
int sort_keys = lua_toboolean(l, -1);
if (sort_keys) {
options.sort_keys = sort_keys;
strbuf_init(&options.keybuf.buf, 0);
options.keybuf.size = 0;
options.keybuf.capacity = KEYBUF_DEFAULT_CAPACITY;
options.keybuf.keys = malloc(options.keybuf.capacity * sizeof(key_entry_t));
if (!options.keybuf.keys)
return luaL_error (l, "out of memory");
}
}
/* Also pop the opts table */ /* Also pop the opts table */
lua_pop(l, 2); lua_pop(l, 2);
@@ -1090,6 +1261,11 @@ static int json_encode(lua_State *l)
if (!cfg->encode_keep_buffer) if (!cfg->encode_keep_buffer)
strbuf_free(encode_buf); strbuf_free(encode_buf);
if (options.sort_keys) {
strbuf_free(&options.keybuf.buf);
free(options.keybuf.keys);
}
return 1; return 1;
} }
@@ -1785,6 +1961,7 @@ int lua_cjson_new(lua_State *l)
{ "encode_escape_forward_slash", json_cfg_encode_escape_forward_slash }, { "encode_escape_forward_slash", json_cfg_encode_escape_forward_slash },
{ "encode_skip_unsupported_value_types", json_cfg_encode_skip_unsupported_value_types }, { "encode_skip_unsupported_value_types", json_cfg_encode_skip_unsupported_value_types },
{ "encode_indent", json_cfg_encode_indent }, { "encode_indent", json_cfg_encode_indent },
{ "encode_sort_keys", json_cfg_encode_sort_keys },
*/ */
{ "new", lua_cjson_new }, { "new", lua_cjson_new },
{ NULL, NULL } { NULL, NULL }

View File

@@ -227,6 +227,44 @@ describe('vim.json.encode()', function()
) )
end) end)
it('sort_keys', function()
eq('"string"', exec_lua([[return vim.json.encode('string', { sort_keys = true })]]))
eq('[]', exec_lua([[return vim.json.encode({}, { sort_keys = true })]]))
eq('{}', exec_lua([[return vim.json.encode(vim.empty_dict(), { sort_keys = true })]]))
eq(
'{"$":0,"%":0,"1":0,"4":0,"a":0,"ab":0,"b":0}',
exec_lua(
[[return vim.json.encode({ a = 0, b = 0, ab = 0, [1] = 0, ["$"] = 0, [4] = 0, ["%"] = 0 }, { sort_keys = true })]]
)
)
eq(
'{"aa":1,"ab":2,"ba":3,"bc":4,"cc":5}',
exec_lua(
[[return vim.json.encode({ aa = 1, ba = 3, ab = 2, bc = 4, cc = 5 }, { sort_keys = true })]]
)
)
eq(
'{"a":{"a":1,"b":2,"c":3},"b":{"a":{"a":0,"b":0},"b":{"a":0,"b":0}},"c":0}',
exec_lua(
[[return vim.json.encode({ a = { b = 2, a = 1, c = 3 }, c = 0, b = { b = { a = 0, b = 0 }, a = { a = 0, b = 0 } } }, { sort_keys = true })]]
)
)
eq(
'[{"1":0,"4":0,"a":0,"b":0},{"10":0,"5":0,"f":0,"x":0},{"-2":0,"2":0,"c":0,"d":0}]',
exec_lua([[return vim.json.encode({
{ a = 0, [1] = 0, [4] = 0, b = 0 },
{ f = 0, [5] = 0, [10] = 0, x = 0 },
{ c = 0, [-2] = 0, [2] = 0, d = 0 },
}, { sort_keys = true })]])
)
eq(
'{"a":2,"ß":3,"é":1,"中":4}',
exec_lua(
[[return vim.json.encode({ ["é"] = 1, ["a"] = 2, ["ß"] = 3, ["中"] = 4 }, { sort_keys = true })]]
)
)
end)
it('dumps strings', function() it('dumps strings', function()
eq('"Test"', exec_lua([[return vim.json.encode('Test')]])) eq('"Test"', exec_lua([[return vim.json.encode('Test')]]))
eq('""', exec_lua([[return vim.json.encode('')]])) eq('""', exec_lua([[return vim.json.encode('')]]))