vim-patch:9.2.0513: [security]: memory safety issues in spellfile.c (#39960)

Problem: [security]: memory safety issues in spellfile.c (tacdm) Solution: Add recursion limit to read_tree_node(), add length limit check in tree_count_words(), use alloc_clear() in spell_read_tree(). Github Security Advisory: https://github.com/vim/vim/security/advisories/GHSA-3h95-3962-mmvf 25e4e46c58 Co-authored-by: Christian Brabandt <cb@256bit.org> Co-authored-by: Claude Opus 4.7 (1M context) <noreply@anthropic.com> (cherry picked from commit 7a1bad27ca)
2026-08-01 05:09:08 +00:00 · 2026-05-23 08:51:47 +08:00
parent ae9f7accdd
commit 15a3318e13
4 changed files with 51 additions and 14 deletions
--- a/src/nvim/spell_defs.h
+++ b/src/nvim/spell_defs.h
@@ -177,6 +177,7 @@ struct slang_S {
  // Info from the .sug file.  Loaded on demand.
  time_t sl_sugtime;       ///< timestamp for .sug file
  uint8_t *sl_sbyts;       ///< soundfolded word bytes
+  int sl_sbyts_len;        ///< length of sl_sbyts
  idx_T *sl_sidxs;         ///< soundfolded word indexes
  buf_T *sl_sugbuf;        ///< buffer with word number table
  bool sl_sugloaded;       ///< true when .sug file was loaded or failed to load
--- a/src/nvim/spellfile.c
+++ b/src/nvim/spellfile.c
@@ -852,7 +852,7 @@ endOK:

 // Fill in the wordcount fields for a trie.
 // Returns the total number of words.
-static void tree_count_words(const uint8_t *byts, idx_T *idxs)
+static void tree_count_words(const uint8_t *byts, int byts_len, idx_T *idxs)
 {
  idx_T arridx[MAXWLEN];
  int curi[MAXWLEN];
@@ -883,8 +883,8 @@ static void tree_count_words(const uint8_t *byts, idx_T *idxs)
        wordcount[depth]++;

        // Skip over any other NUL bytes (same word with different
-        // flags).
-        while (byts[n + 1] == 0) {
+        // flags).  But don't go over the end
+        while (n + 1 < byts_len && byts[n + 1] == 0) {
          n++;
          curi[depth]++;
        }
@@ -956,8 +956,8 @@ void suggest_load_files(void)

      // <SUGWORDTREE>: <wordtree>
      // Read the trie with the soundfolded words.
-      if (spell_read_tree(fd, &slang->sl_sbyts, NULL, &slang->sl_sidxs,
-                          false, 0) != 0) {
+      if (spell_read_tree(fd, &slang->sl_sbyts, &slang->sl_sbyts_len,
+                          &slang->sl_sidxs, false, 0) != 0) {
 someerror:
        semsg(_(e_error_while_reading_sug_file_str),
              slang->sl_fname);
@@ -1002,8 +1002,8 @@ someerror:

      // Need to put word counts in the word tries, so that we can find
      // a word by its number.
-      tree_count_words(slang->sl_fbyts, slang->sl_fidxs);
-      tree_count_words(slang->sl_sbyts, slang->sl_sidxs);
+      tree_count_words(slang->sl_fbyts, slang->sl_fbyts_len, slang->sl_fidxs);
+      tree_count_words(slang->sl_sbyts, slang->sl_sbyts_len, slang->sl_sidxs);

 nextone:
      if (fd != NULL) {
@@ -1683,8 +1683,11 @@ static int spell_read_tree(FILE *fd, uint8_t **bytsp, int *bytsp_len, idx_T **id
    return 0;
  }

-  // Allocate the byte array.
-  uint8_t *bp = xmalloc((size_t)len);
+  // Allocate the byte array.  Zero-initialize so that any position the
+  // tree does not visit reads as 0; a stray BY_INDEX shared reference
+  // into such a slot then behaves as end-of-word in spellsuggest()
+  // instead of consuming an arbitrary heap byte as a siblingcount.
+  uint8_t *bp = xcalloc(1, (size_t)len);
  *bytsp = bp;
  if (bytsp_len != NULL) {
    *bytsp_len = len;
@@ -1695,10 +1698,13 @@ static int spell_read_tree(FILE *fd, uint8_t **bytsp, int *bytsp_len, idx_T **id
  *idxsp = ip;

  // Recursively read the tree and store it in the array.
-  int idx = read_tree_node(fd, bp, ip, len, 0, prefixtree, prefixcnt);
+  int idx = read_tree_node(fd, bp, ip, len, 0, prefixtree, prefixcnt, 0);
  if (idx < 0) {
    return idx;
  }
+  if (idx != len) {
+    return SP_FORMERROR;
+  }
  return 0;
 }

@@ -1715,12 +1721,20 @@ static int spell_read_tree(FILE *fd, uint8_t **bytsp, int *bytsp_len, idx_T **id
 /// @param startidx  current index in "byts" and "idxs"
 /// @param prefixtree  true for reading PREFIXTREE
 /// @param maxprefcondnr  maximum for <prefcondnr>
+/// @param depth  recursion level
 static idx_T read_tree_node(FILE *fd, uint8_t *byts, idx_T *idxs, int maxidx, idx_T startidx,
-                            bool prefixtree, int maxprefcondnr)
+                            bool prefixtree, int maxprefcondnr, int depth)
 {
  idx_T idx = startidx;
 #define SHARED_MASK     0x8000000

+  // Bail out on a crafted .spl whose tree recurses beyond the maximum
+  // word length: each tree level corresponds to one byte of a word, so
+  // any well-formed file has depth <= MAXWLEN.
+  if (depth > MAXWLEN) {
+    return SP_FORMERROR;
+  }
+
  int len = getc(fd);                                       // <siblingcount>
  if (len <= 0) {
    return SP_TRUNCERROR;
@@ -1801,7 +1815,8 @@ static idx_T read_tree_node(FILE *fd, uint8_t *byts, idx_T *idxs, int maxidx, id
        idxs[startidx + i] &= ~SHARED_MASK;
      } else {
        idxs[startidx + i] = idx;
-        idx = read_tree_node(fd, byts, idxs, maxidx, idx, prefixtree, maxprefcondnr);
+        idx = read_tree_node(fd, byts, idxs, maxidx, idx,
+                             prefixtree, maxprefcondnr, depth + 1);
        if (idx < 0) {
          break;
        }
@@ -4939,7 +4954,7 @@ static int sug_filltree(spellinfo_T *spin, slang_T *slang)
        spin->si_blocks_cnt = 0;

        // Skip over any other NUL bytes (same word with different
-        // flags).  But don't go over the end.
+        // flags).  But don't go over the end
        while (n + 1 < slang->sl_fbyts_len && byts[n + 1] == 0) {
          n++;
          curi[depth]++;
--- a/test/old/testdir/test_spell.vim
+++ b/test/old/testdir/test_spell.vim
@@ -923,7 +923,10 @@ func Test_spellsuggest_too_deep()
  " This was incrementing "depth" over MAXWLEN.
  new
  norm s000G00<EFBFBD>000000000000
-  sil norm ..vzG................vvzG0     v z=
+  try
+    sil norm ..vzG................vvzG0     v z=
+  catch /E759:/
+  endtry
  bwipe!
 endfunc

--- a/test/old/testdir/test_spellfile.vim
+++ b/test/old/testdir/test_spellfile.vim
@@ -375,6 +375,24 @@ func Test_spellfile_format_error()
  " LWORDTREE: incorrect sibling node count
  call Spellfile_Test(0zFF00000001040000000000000000, 'E759:')

+  " LWORDTREE: declared nodecount larger than the tree actually fills.
+  " Root has two siblings: 'x' (recurses into an end-of-word at idx 3..4)
+  " and BY_INDEX targeting position 9.  Tree fills positions 0..4, leaving
+  " 5..9 unwritten — byts[9] would be uninitialized without the fix.
+  call Spellfile_Test(0zFF0000000A02780100000979010000000000000000000000, 'E759:')
+
+  " LWORDTREE: recursion depth past MAXWLEN.  A linear chain of 254
+  " (siblingcount=1, byte='a') frames drives read_tree_node to depth
+  " MAXWLEN where the new guard rejects.  The trailing (01 00) gives the
+  " chain a clean end-of-word so an *unguarded* parser would accept the
+  " file silently — that's what makes this a meaningful regression test
+  " for the depth check specifically (a deeper chain would also crash
+  " unguarded builds via stack overflow, which we don't want in CI).
+  let v = eval('0zFF00000200' .. repeat('0161', 255)
+               \ .. '0100' ..  repeat('00', 8))
+
+  call Spellfile_Test(v, 'E759:')
+
  " KWORDTREE: missing tree node
  call Spellfile_Test(0zFF0000000000000004, 'E758:')