From e5667b9c157a26731d4bbcaf8a9cf69e4c73e8fb Mon Sep 17 00:00:00 2001
From: zeertzjq <zeertzjq@outlook.com>
Date: Thu, 12 Mar 2026 07:42:31 +0800
Subject: [PATCH] vim-patch:9.2.0137: [security]: crash with composing char in
 collection range (#38261)

Problem:  Using a composing character as the end of a range inside a
          collection may corrupt the NFA postfix stack
          (Nathan Mills, after v9.1.0011)
Solution: When a character is used as the endpoint of a range, do not emit
          its composing characters separately. Range handling only uses
          the base codepoint.

supported by AI

Github Advisory:
https://github.com/vim/vim/security/advisories/GHSA-9phh-423r-778r

https://github.com/vim/vim/commit/36d6e87542cf823d833e451e09a90ee429899cec

Co-authored-by: Christian Brabandt <cb@256bit.org>
---
 src/nvim/regexp.c                     | 18 ++++++++++++++++--
 test/old/testdir/test_regexp_utf8.vim | 18 ++++++++++++++++++
 2 files changed, 34 insertions(+), 2 deletions(-)
diff --git a/src/nvim/regexp.c b/src/nvim/regexp.c
index d547cb17e1..ba3d9cb5d6 100644
--- a/src/nvim/regexp.c
+++ b/src/nvim/regexp.c
@@ -10430,6 +10430,7 @@ collection:
     p = (uint8_t *)regparse;
     endp = (uint8_t *)skip_anyof((char *)p);
     if (*endp == ']') {
+      bool range_endpoint;
       // Try to reverse engineer character classes. For example,
       // recognize that [0-9] stands for \d and [A-Za-z_] for \h,
       // and perform the necessary substitutions in the NFA.
@@ -10466,6 +10467,7 @@ collection:
       emit_range = false;
       while ((uint8_t *)regparse < endp) {
         int oldstartc = startc;
+        range_endpoint = false;
         startc = -1;
         got_coll_char = false;
         if (*regparse == '[') {
@@ -10609,6 +10611,7 @@ collection:
         // Previous char was '-', so this char is end of range.
         if (emit_range) {
           int endc = startc;
+          range_endpoint = true;
           startc = oldstartc;
           if (startc > endc) {
             EMSG_RET_FAIL(_(e_reverse_range));
@@ -10673,7 +10676,14 @@ collection:
         }
 
         int plen;
-        if (utf_ptr2len(regparse) != (plen = utfc_ptr2len(regparse))) {
+        //
+        // If this character was consumed as the end of a range, do not emit its
+        // composing characters separately.  Range handling only uses the base
+        // codepoint; emitting the composing part again would duplicate the
+        // character in the postfix stream and corrupt the NFA stack.
+        //
+        if (!range_endpoint
+            && utf_ptr2len(regparse) != (plen = utfc_ptr2len(regparse))) {
           int i = utf_ptr2len(regparse);
 
           c = utf_ptr2char(regparse + i);
@@ -11839,7 +11849,11 @@ static int nfa_max_width(nfa_state_T *startstate, int depth)
       // Matches some character, including composing chars.
       len += MB_MAXBYTES;
       if (state->c != NFA_ANY) {
-        // Skip over the characters.
+        // Skip over the compiled collection.
+        // malformed NFAs must not crash width estimation.
+        if (state->out1 == NULL || state->out1->out == NULL) {
+          return -1;
+        }
         state = state->out1->out;
         continue;
       }
diff --git a/test/old/testdir/test_regexp_utf8.vim b/test/old/testdir/test_regexp_utf8.vim
index 5cd9c6e967..d6284a559f 100644
--- a/test/old/testdir/test_regexp_utf8.vim
+++ b/test/old/testdir/test_regexp_utf8.vim
@@ -638,4 +638,22 @@ func Test_replace_multibyte_match_in_multi_lines()
   set ignorecase&vim re&vim
 endfun
 
+func Test_regex_collection_range_with_composing_crash()
+  " Regression test: composing char in collection range caused NFA crash/E874
+  new
+  call setline(1, ['00', '0ֻ', '01'])
+  let patterns = [ '0[0-0ֻ]\@<!','0[0ֻ]\@<!']
+
+  for pat in patterns
+    " Should compile and execute without crash or error
+    for re in range(3)
+      let regex = '\%#=' .. re .. pat
+      call search(regex)
+      call assert_fails($"/{regex}\<cr>", 'E486:')
+    endfor
+  endfor
+
+  bwipe!
+endfunc
+
 " vim: shiftwidth=2 sts=2 expandtab