shada,functests: Improve detection of invalid ShaDa files

It appears that large portion of non-ShaDa ASCII text files may be parsed as
a ShaDa file because it is mostly recognized as a sequence of unknown entries:
all ASCII non-control characters are recognized as FIXUINT shada objects, so
text like

    #!/bin/sh

    powerline "$@" 2>&1 | tee -a powerline

(with trailing newline) will be recognized as a correct ShaDa file containing
single unknown entry with type 0x23 (dec 35, '#'), timestamp 0x21 (dec 33, '!')
and length 0x2F (dec 47, '/') without this commit. With it parsing this entry
will fail.
This commit is contained in:
ZyX
2015-08-08 13:27:55 +03:00
parent 1e06792099
commit 1542fc221e
2 changed files with 218 additions and 76 deletions

View File

@@ -2001,6 +2001,93 @@ static int compare_file_marks(const void *a, const void *b)
: 1)); : 1));
} }
/// Parse msgpack object that has given length
///
/// @param[in] sd_reader Structure containing file reader definition.
/// @param[in] length Object length.
/// @param[out] ret_unpacked Location where read result should be saved. If
/// NULL then unpacked data will be freed. Must be
/// NULL if `ret_buf` is NULL.
/// @param[out] ret_buf Buffer containing parsed string.
///
/// @return kSDReadStatusNotShaDa, kSDReadStatusReadError or
/// kSDReadStatusSuccess.
static inline ShaDaReadResult shada_parse_msgpack(
ShaDaReadDef *const sd_reader, const size_t length,
msgpack_unpacked *ret_unpacked, char **const ret_buf)
FUNC_ATTR_WARN_UNUSED_RESULT FUNC_ATTR_NONNULL_ARG(1)
{
const uintmax_t initial_fpos = sd_reader->fpos;
char *const buf = xmalloc(length);
const ShaDaReadResult fl_ret = fread_len(sd_reader, buf, length);
if (fl_ret != kSDReadStatusSuccess) {
xfree(buf);
return fl_ret;
}
bool did_try_to_free = false;
shada_parse_msgpack_read_next: {}
size_t off = 0;
msgpack_unpacked unpacked;
msgpack_unpacked_init(&unpacked);
const msgpack_unpack_return result =
msgpack_unpack_next(&unpacked, buf, length, &off);
ShaDaReadResult ret = kSDReadStatusSuccess;
switch (result) {
case MSGPACK_UNPACK_SUCCESS: {
if (off < length) {
goto shada_parse_msgpack_extra_bytes;
}
break;
}
case MSGPACK_UNPACK_PARSE_ERROR: {
emsgu(_(RCERR "Failed to parse ShaDa file due to a msgpack parser error "
"at position %" PRIu64),
(uint64_t) initial_fpos);
ret = kSDReadStatusNotShaDa;
break;
}
case MSGPACK_UNPACK_NOMEM_ERROR: {
if (!did_try_to_free) {
did_try_to_free = true;
try_to_free_memory();
goto shada_parse_msgpack_read_next;
}
EMSG(_(e_outofmem));
ret = kSDReadStatusReadError;
break;
}
case MSGPACK_UNPACK_CONTINUE: {
emsgu(_(RCERR "Failed to parse ShaDa file: incomplete msgpack string "
"at position %" PRIu64),
(uint64_t) initial_fpos);
ret = kSDReadStatusNotShaDa;
break;
}
case MSGPACK_UNPACK_EXTRA_BYTES: {
shada_parse_msgpack_extra_bytes:
emsgu(_(RCERR "Failed to parse ShaDa file: extra bytes in msgpack string "
"at position %" PRIu64),
(uint64_t) initial_fpos);
ret = kSDReadStatusNotShaDa;
break;
}
}
if (ret_buf != NULL && ret == kSDReadStatusSuccess) {
if (ret_unpacked == NULL) {
msgpack_unpacked_destroy(&unpacked);
} else {
*ret_unpacked = unpacked;
}
*ret_buf = buf;
} else {
assert(ret_buf == NULL || ret != kSDReadStatusSuccess);
msgpack_unpacked_destroy(&unpacked);
xfree(buf);
}
return ret;
}
/// Write ShaDa file /// Write ShaDa file
/// ///
/// @param[in] sd_writer Structure containing file writer definition. /// @param[in] sd_writer Structure containing file writer definition.
@@ -3258,10 +3345,24 @@ shada_read_next_item_start:
? !(flags & kSDReadUnknown) ? !(flags & kSDReadUnknown)
: !((unsigned) (1 << type_u64) & flags)) : !((unsigned) (1 << type_u64) & flags))
|| (max_kbyte && length > max_kbyte * 1024)) { || (max_kbyte && length > max_kbyte * 1024)) {
// First entry is unknown or equal to "\n" (10)? Most likely this means that
// current file is not a ShaDa file because first item should normally be
// a header (excluding tests where first item is tested item). Check this by
// parsing entry contents: in non-ShaDa files this will most likely result
// in incomplete MessagePack string.
if (initial_fpos == 0
&& (type_u64 == '\n' || type_u64 > SHADA_LAST_ENTRY)) {
const ShaDaReadResult spm_ret = shada_parse_msgpack(sd_reader, length,
NULL, NULL);
if (spm_ret != kSDReadStatusSuccess) {
return spm_ret;
}
} else {
const ShaDaReadResult fl_ret = fread_len(sd_reader, NULL, length); const ShaDaReadResult fl_ret = fread_len(sd_reader, NULL, length);
if (fl_ret != kSDReadStatusSuccess) { if (fl_ret != kSDReadStatusSuccess) {
return fl_ret; return fl_ret;
} }
}
goto shada_read_next_item_start; goto shada_read_next_item_start;
} }
@@ -3269,73 +3370,34 @@ shada_read_next_item_start:
entry->type = kSDItemUnknown; entry->type = kSDItemUnknown;
entry->data.unknown_item.size = length; entry->data.unknown_item.size = length;
entry->data.unknown_item.type = type_u64; entry->data.unknown_item.type = type_u64;
if (initial_fpos == 0) {
const ShaDaReadResult spm_ret = shada_parse_msgpack(
sd_reader, length, NULL, &entry->data.unknown_item.contents);
if (spm_ret != kSDReadStatusSuccess) {
entry->type = kSDItemMissing;
}
return spm_ret;
} else {
entry->data.unknown_item.contents = xmalloc(length); entry->data.unknown_item.contents = xmalloc(length);
const ShaDaReadResult fl_ret = fread_len(sd_reader, const ShaDaReadResult fl_ret = fread_len(
entry->data.unknown_item.contents, sd_reader, entry->data.unknown_item.contents, length);
length);
if (fl_ret != kSDReadStatusSuccess) { if (fl_ret != kSDReadStatusSuccess) {
shada_free_shada_entry(entry); shada_free_shada_entry(entry);
entry->type = kSDItemMissing; entry->type = kSDItemMissing;
} }
return fl_ret; return fl_ret;
} }
char *const buf = xmalloc(length);
{
const ShaDaReadResult fl_ret = fread_len(sd_reader, buf, length);
if (fl_ret != kSDReadStatusSuccess) {
xfree(buf);
return fl_ret;
}
} }
msgpack_unpacked unpacked; msgpack_unpacked unpacked;
msgpack_unpacked_init(&unpacked); char *buf = NULL;
bool did_try_to_free = false; const ShaDaReadResult spm_ret = shada_parse_msgpack(sd_reader, length,
shada_read_next_item_read_next: {} &unpacked, &buf);
size_t off = 0; if (spm_ret != kSDReadStatusSuccess) {
const msgpack_unpack_return result = ret = spm_ret;
msgpack_unpack_next(&unpacked, buf, length, &off);
ret = kSDReadStatusNotShaDa;
switch (result) {
case MSGPACK_UNPACK_SUCCESS: {
if (off < length) {
goto shada_read_next_item_extra_bytes;
}
break;
}
case MSGPACK_UNPACK_PARSE_ERROR: {
emsgu(_(RCERR "Failed to parse ShaDa file due to a msgpack parser error "
"at position %" PRIu64),
(uint64_t) initial_fpos);
goto shada_read_next_item_error; goto shada_read_next_item_error;
} }
case MSGPACK_UNPACK_NOMEM_ERROR: {
if (!did_try_to_free) {
did_try_to_free = true;
try_to_free_memory();
goto shada_read_next_item_read_next;
}
EMSG(_(e_outofmem));
ret = kSDReadStatusReadError;
goto shada_read_next_item_error;
}
case MSGPACK_UNPACK_CONTINUE: {
emsgu(_(RCERR "Failed to parse ShaDa file: incomplete msgpack string "
"at position %" PRIu64),
(uint64_t) initial_fpos);
goto shada_read_next_item_error;
}
case MSGPACK_UNPACK_EXTRA_BYTES: {
shada_read_next_item_extra_bytes:
emsgu(_(RCERR "Failed to parse ShaDa file: extra bytes in msgpack string "
"at position %" PRIu64),
(uint64_t) initial_fpos);
goto shada_read_next_item_error;
}
}
ret = kSDReadStatusMalformed; ret = kSDReadStatusMalformed;
#define CHECK_KEY(key, expected) \ #define CHECK_KEY(key, expected) \
(key.via.str.size == sizeof(expected) - 1 \ (key.via.str.size == sizeof(expected) - 1 \
@@ -3968,7 +4030,6 @@ shada_read_next_item_hist_no_conv:
} }
} }
entry->type = (ShadaEntryType) type_u64; entry->type = (ShadaEntryType) type_u64;
goto shada_read_next_item_end;
#undef BIN_CONVERTED #undef BIN_CONVERTED
#undef CONVERTED #undef CONVERTED
#undef CHECK_KEY #undef CHECK_KEY
@@ -3989,17 +4050,16 @@ shada_read_next_item_hist_no_conv:
#undef TOSIZE #undef TOSIZE
#undef SET_ADDITIONAL_DATA #undef SET_ADDITIONAL_DATA
#undef SET_ADDITIONAL_ELEMENTS #undef SET_ADDITIONAL_ELEMENTS
shada_read_next_item_error: ret = kSDReadStatusSuccess;
msgpack_unpacked_destroy(&unpacked);
xfree(buf);
entry->type = (ShadaEntryType) type_u64;
shada_free_shada_entry(entry);
entry->type = kSDItemMissing;
return ret;
shada_read_next_item_end: shada_read_next_item_end:
msgpack_unpacked_destroy(&unpacked); msgpack_unpacked_destroy(&unpacked);
xfree(buf); xfree(buf);
return kSDReadStatusSuccess; return ret;
shada_read_next_item_error:
entry->type = (ShadaEntryType) type_u64;
shada_free_shada_entry(entry);
entry->type = kSDItemMissing;
goto shada_read_next_item_end;
} }
/// Check whether "name" is on removable media (according to 'shada') /// Check whether "name" is on removable media (according to 'shada')

View File

@@ -65,7 +65,7 @@ describe('ShaDa error handling', function()
it('fails on search pattern item with zero length', function() it('fails on search pattern item with zero length', function()
wshada('\002\000\000') wshada('\002\000\000')
eq('Vim(rshada):E576: Failed to parse ShaDa file: incomplete msgpack string at position 0', exc_exec(sdrcmd())) eq('Vim(rshada):E576: Failed to parse ShaDa file: incomplete msgpack string at position 3', exc_exec(sdrcmd()))
end) end)
it('fails on search pattern item with -2 timestamp', function() it('fails on search pattern item with -2 timestamp', function()
@@ -95,12 +95,12 @@ describe('ShaDa error handling', function()
-- get MSGPACK_UNPACK_PARSE_ERROR and not MSGPACK_UNPACK_CONTINUE or -- get MSGPACK_UNPACK_PARSE_ERROR and not MSGPACK_UNPACK_CONTINUE or
-- MSGPACK_UNPACK_EXTRA_BYTES. -- MSGPACK_UNPACK_EXTRA_BYTES.
wshada('\002\000\001\193') wshada('\002\000\001\193')
eq('Vim(rshada):E576: Failed to parse ShaDa file due to a msgpack parser error at position 0', exc_exec(sdrcmd())) eq('Vim(rshada):E576: Failed to parse ShaDa file due to a msgpack parser error at position 3', exc_exec(sdrcmd()))
end) end)
it('fails on search pattern item with incomplete map', function() it('fails on search pattern item with incomplete map', function()
wshada('\002\000\001\129') wshada('\002\000\001\129')
eq('Vim(rshada):E576: Failed to parse ShaDa file: incomplete msgpack string at position 0', exc_exec(sdrcmd())) eq('Vim(rshada):E576: Failed to parse ShaDa file: incomplete msgpack string at position 3', exc_exec(sdrcmd()))
end) end)
it('fails on search pattern item without a pattern', function() it('fails on search pattern item without a pattern', function()
@@ -110,7 +110,7 @@ describe('ShaDa error handling', function()
it('fails on search pattern with extra bytes', function() it('fails on search pattern with extra bytes', function()
wshada('\002\000\002\128\000') wshada('\002\000\002\128\000')
eq('Vim(rshada):E576: Failed to parse ShaDa file: extra bytes in msgpack string at position 0', exc_exec(sdrcmd())) eq('Vim(rshada):E576: Failed to parse ShaDa file: extra bytes in msgpack string at position 3', exc_exec(sdrcmd()))
end) end)
it('fails on search pattern item with NIL value', function() it('fails on search pattern item with NIL value', function()
@@ -414,4 +414,86 @@ describe('ShaDa error handling', function()
wshada('\009\000\017\146\129\161f\196\001/\130\161f\196\002/a\161c\192') wshada('\009\000\017\146\129\161f\196\001/\130\161f\196\002/a\161c\192')
eq('Vim(rshada):E575: Error while reading ShaDa file: buffer list entry entry at position 0 has c key value which is not an integer', exc_exec(sdrcmd())) eq('Vim(rshada):E575: Error while reading ShaDa file: buffer list entry entry at position 0 has c key value which is not an integer', exc_exec(sdrcmd()))
end) end)
it('fails on invalid ShaDa file (viminfo file)', function()
wshada([[# This viminfo file was generated by Vim 7.4.
# You may edit it if you're careful!
# Value of 'encoding' when this file was written
*encoding=utf-8
# hlsearch on (H) or off (h):
~h
# Last Search Pattern:
~MSle0~/buffer=abuf
# Last Substitute Search Pattern:
~MSle0&^$
# Last Substitute String:
$
# Command Line History (newest to oldest):
:cq
# Search String History (newest to oldest):
? \<TMUX\>
# Expression History (newest to oldest):
=system('echo "\xAB"')
# Input Line History (newest to oldest):
@i
# Input Line History (newest to oldest):
# Registers:
"0 LINE 0
case FLAG_B: puts("B"); break;
"1 LINE 0
pick 874a489 shada,functests: Test compatibility support
""- CHAR 0
.
# global variables:
!STUF_HISTORY_TRANSLIT LIS []
!TR3_INPUT_HISTORY LIS []
# File marks:
'A 8320 12 ~/a.a/Proj/c/neovim-2076/src/nvim/ex_docmd.c
'0 66 5 ~/a.a/Proj/c/neovim/.git/rebase-merge/git-rebase-todo
'1 7 0 ~/.vam/powerline/.git/MERGE_MSG
'2 64 4 ~/a.a/Proj/c/neovim/.git/rebase-merge/git-rebase-todo
'3 9 0 ~/a.a/Proj/c/neovim/.git/COMMIT_EDITMSG
'4 62 0 ~/a.a/Proj/c/neovim/.git/rebase-merge/git-rebase-todo
'5 57 4 ~/a.a/Proj/c/neovim/.git/rebase-merge/git-rebase-todo
'6 1 0 ~/a.a/Proj/c/neovim/.git/rebase-merge/git-rebase-todo
'7 399 7 /usr/share/vim/vim74/doc/motion.txt
'8 1 0 ~/a.a/Proj/c/zpython/build/CMakeFiles/3.2.2/CMakeCCompiler.cmake
'9 1 0 ~/a.a/Proj/c/vim/README.txt
# Jumplist (newest first):
-' 66 5 ~/a.a/Proj/c/neovim/.git/rebase-merge/git-rebase-todo
# History of marks within files (newest to oldest):
> ~/a.a/Proj/c/neovim/.git/rebase-merge/git-rebase-todo
" 66 5
^ 66 6
. 66 5
+ 65 0
+ 65 0
]])
eq('Vim(rshada):E576: Failed to parse ShaDa file: extra bytes in msgpack string at position 3', exc_exec(sdrcmd()))
eq('Vim(wshada):E576: Failed to parse ShaDa file: extra bytes in msgpack string at position 3', exc_exec('wshada ' .. shada_fname))
eq(0, exc_exec('wshada! ' .. shada_fname))
end)
it('fails on invalid ShaDa file (wrapper script)', function()
wshada('#!/bin/sh\n\npowerline "$@" 2>&1 | tee -a powerline\n')
eq('Vim(rshada):E576: Failed to parse ShaDa file: extra bytes in msgpack string at position 3', exc_exec(sdrcmd()))
eq('Vim(wshada):E576: Failed to parse ShaDa file: extra bytes in msgpack string at position 3', exc_exec('wshada ' .. shada_fname))
eq(0, exc_exec('wshada! ' .. shada_fname))
end)
end) end)