vim-patch:9.2.0513: [security]: memory safety issues in spellfile.c (#39960)

Problem:  [security]: memory safety issues in spellfile.c
          (tacdm)
Solution: Add recursion limit to read_tree_node(), add length limit
          check in tree_count_words(), use alloc_clear() in
          spell_read_tree().

Github Security Advisory:
https://github.com/vim/vim/security/advisories/GHSA-3h95-3962-mmvf

25e4e46c58

Co-authored-by: Christian Brabandt <cb@256bit.org>
Co-authored-by: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
(cherry picked from commit 7a1bad27ca)
This commit is contained in:
zeertzjq
2026-05-23 08:51:47 +08:00
committed by github-actions[bot]
parent ae9f7accdd
commit 15a3318e13
4 changed files with 51 additions and 14 deletions

View File

@@ -177,6 +177,7 @@ struct slang_S {
// Info from the .sug file. Loaded on demand.
time_t sl_sugtime; ///< timestamp for .sug file
uint8_t *sl_sbyts; ///< soundfolded word bytes
int sl_sbyts_len; ///< length of sl_sbyts
idx_T *sl_sidxs; ///< soundfolded word indexes
buf_T *sl_sugbuf; ///< buffer with word number table
bool sl_sugloaded; ///< true when .sug file was loaded or failed to load

View File

@@ -852,7 +852,7 @@ endOK:
// Fill in the wordcount fields for a trie.
// Returns the total number of words.
static void tree_count_words(const uint8_t *byts, idx_T *idxs)
static void tree_count_words(const uint8_t *byts, int byts_len, idx_T *idxs)
{
idx_T arridx[MAXWLEN];
int curi[MAXWLEN];
@@ -883,8 +883,8 @@ static void tree_count_words(const uint8_t *byts, idx_T *idxs)
wordcount[depth]++;
// Skip over any other NUL bytes (same word with different
// flags).
while (byts[n + 1] == 0) {
// flags). But don't go over the end
while (n + 1 < byts_len && byts[n + 1] == 0) {
n++;
curi[depth]++;
}
@@ -956,8 +956,8 @@ void suggest_load_files(void)
// <SUGWORDTREE>: <wordtree>
// Read the trie with the soundfolded words.
if (spell_read_tree(fd, &slang->sl_sbyts, NULL, &slang->sl_sidxs,
false, 0) != 0) {
if (spell_read_tree(fd, &slang->sl_sbyts, &slang->sl_sbyts_len,
&slang->sl_sidxs, false, 0) != 0) {
someerror:
semsg(_(e_error_while_reading_sug_file_str),
slang->sl_fname);
@@ -1002,8 +1002,8 @@ someerror:
// Need to put word counts in the word tries, so that we can find
// a word by its number.
tree_count_words(slang->sl_fbyts, slang->sl_fidxs);
tree_count_words(slang->sl_sbyts, slang->sl_sidxs);
tree_count_words(slang->sl_fbyts, slang->sl_fbyts_len, slang->sl_fidxs);
tree_count_words(slang->sl_sbyts, slang->sl_sbyts_len, slang->sl_sidxs);
nextone:
if (fd != NULL) {
@@ -1683,8 +1683,11 @@ static int spell_read_tree(FILE *fd, uint8_t **bytsp, int *bytsp_len, idx_T **id
return 0;
}
// Allocate the byte array.
uint8_t *bp = xmalloc((size_t)len);
// Allocate the byte array. Zero-initialize so that any position the
// tree does not visit reads as 0; a stray BY_INDEX shared reference
// into such a slot then behaves as end-of-word in spellsuggest()
// instead of consuming an arbitrary heap byte as a siblingcount.
uint8_t *bp = xcalloc(1, (size_t)len);
*bytsp = bp;
if (bytsp_len != NULL) {
*bytsp_len = len;
@@ -1695,10 +1698,13 @@ static int spell_read_tree(FILE *fd, uint8_t **bytsp, int *bytsp_len, idx_T **id
*idxsp = ip;
// Recursively read the tree and store it in the array.
int idx = read_tree_node(fd, bp, ip, len, 0, prefixtree, prefixcnt);
int idx = read_tree_node(fd, bp, ip, len, 0, prefixtree, prefixcnt, 0);
if (idx < 0) {
return idx;
}
if (idx != len) {
return SP_FORMERROR;
}
return 0;
}
@@ -1715,12 +1721,20 @@ static int spell_read_tree(FILE *fd, uint8_t **bytsp, int *bytsp_len, idx_T **id
/// @param startidx current index in "byts" and "idxs"
/// @param prefixtree true for reading PREFIXTREE
/// @param maxprefcondnr maximum for <prefcondnr>
/// @param depth recursion level
static idx_T read_tree_node(FILE *fd, uint8_t *byts, idx_T *idxs, int maxidx, idx_T startidx,
bool prefixtree, int maxprefcondnr)
bool prefixtree, int maxprefcondnr, int depth)
{
idx_T idx = startidx;
#define SHARED_MASK 0x8000000
// Bail out on a crafted .spl whose tree recurses beyond the maximum
// word length: each tree level corresponds to one byte of a word, so
// any well-formed file has depth <= MAXWLEN.
if (depth > MAXWLEN) {
return SP_FORMERROR;
}
int len = getc(fd); // <siblingcount>
if (len <= 0) {
return SP_TRUNCERROR;
@@ -1801,7 +1815,8 @@ static idx_T read_tree_node(FILE *fd, uint8_t *byts, idx_T *idxs, int maxidx, id
idxs[startidx + i] &= ~SHARED_MASK;
} else {
idxs[startidx + i] = idx;
idx = read_tree_node(fd, byts, idxs, maxidx, idx, prefixtree, maxprefcondnr);
idx = read_tree_node(fd, byts, idxs, maxidx, idx,
prefixtree, maxprefcondnr, depth + 1);
if (idx < 0) {
break;
}
@@ -4939,7 +4954,7 @@ static int sug_filltree(spellinfo_T *spin, slang_T *slang)
spin->si_blocks_cnt = 0;
// Skip over any other NUL bytes (same word with different
// flags). But don't go over the end.
// flags). But don't go over the end
while (n + 1 < slang->sl_fbyts_len && byts[n + 1] == 0) {
n++;
curi[depth]++;

View File

@@ -923,7 +923,10 @@ func Test_spellsuggest_too_deep()
" This was incrementing "depth" over MAXWLEN.
new
norm s000G00<EFBFBD>000000000000
sil norm ..vzG................vvzG0 v z=
try
sil norm ..vzG................vvzG0 v z=
catch /E759:/
endtry
bwipe!
endfunc

View File

@@ -375,6 +375,24 @@ func Test_spellfile_format_error()
" LWORDTREE: incorrect sibling node count
call Spellfile_Test(0zFF00000001040000000000000000, 'E759:')
" LWORDTREE: declared nodecount larger than the tree actually fills.
" Root has two siblings: 'x' (recurses into an end-of-word at idx 3..4)
" and BY_INDEX targeting position 9. Tree fills positions 0..4, leaving
" 5..9 unwritten — byts[9] would be uninitialized without the fix.
call Spellfile_Test(0zFF0000000A02780100000979010000000000000000000000, 'E759:')
" LWORDTREE: recursion depth past MAXWLEN. A linear chain of 254
" (siblingcount=1, byte='a') frames drives read_tree_node to depth
" MAXWLEN where the new guard rejects. The trailing (01 00) gives the
" chain a clean end-of-word so an *unguarded* parser would accept the
" file silently — that's what makes this a meaningful regression test
" for the depth check specifically (a deeper chain would also crash
" unguarded builds via stack overflow, which we don't want in CI).
let v = eval('0zFF00000200' .. repeat('0161', 255)
\ .. '0100' .. repeat('00', 8))
call Spellfile_Test(v, 'E759:')
" KWORDTREE: missing tree node
call Spellfile_Test(0zFF0000000000000004, 'E758:')