mirror of
https://github.com/neovim/neovim.git
synced 2026-04-23 07:45:32 +00:00
vim-patch:9.2.0137: [security]: crash with composing char in collection range (#38261)
Problem: Using a composing character as the end of a range inside a
collection may corrupt the NFA postfix stack
(Nathan Mills, after v9.1.0011)
Solution: When a character is used as the endpoint of a range, do not emit
its composing characters separately. Range handling only uses
the base codepoint.
supported by AI
Github Advisory:
https://github.com/vim/vim/security/advisories/GHSA-9phh-423r-778r
36d6e87542
Co-authored-by: Christian Brabandt <cb@256bit.org>
(cherry picked from commit e5667b9c15)
This commit is contained in:
committed by
github-actions[bot]
parent
e0ea90070a
commit
976db1ba4b
@@ -10410,6 +10410,7 @@ collection:
|
|||||||
p = (uint8_t *)regparse;
|
p = (uint8_t *)regparse;
|
||||||
endp = (uint8_t *)skip_anyof((char *)p);
|
endp = (uint8_t *)skip_anyof((char *)p);
|
||||||
if (*endp == ']') {
|
if (*endp == ']') {
|
||||||
|
bool range_endpoint;
|
||||||
// Try to reverse engineer character classes. For example,
|
// Try to reverse engineer character classes. For example,
|
||||||
// recognize that [0-9] stands for \d and [A-Za-z_] for \h,
|
// recognize that [0-9] stands for \d and [A-Za-z_] for \h,
|
||||||
// and perform the necessary substitutions in the NFA.
|
// and perform the necessary substitutions in the NFA.
|
||||||
@@ -10446,6 +10447,7 @@ collection:
|
|||||||
emit_range = false;
|
emit_range = false;
|
||||||
while ((uint8_t *)regparse < endp) {
|
while ((uint8_t *)regparse < endp) {
|
||||||
int oldstartc = startc;
|
int oldstartc = startc;
|
||||||
|
range_endpoint = false;
|
||||||
startc = -1;
|
startc = -1;
|
||||||
got_coll_char = false;
|
got_coll_char = false;
|
||||||
if (*regparse == '[') {
|
if (*regparse == '[') {
|
||||||
@@ -10584,6 +10586,7 @@ collection:
|
|||||||
// Previous char was '-', so this char is end of range.
|
// Previous char was '-', so this char is end of range.
|
||||||
if (emit_range) {
|
if (emit_range) {
|
||||||
int endc = startc;
|
int endc = startc;
|
||||||
|
range_endpoint = true;
|
||||||
startc = oldstartc;
|
startc = oldstartc;
|
||||||
if (startc > endc) {
|
if (startc > endc) {
|
||||||
EMSG_RET_FAIL(_(e_reverse_range));
|
EMSG_RET_FAIL(_(e_reverse_range));
|
||||||
@@ -10648,7 +10651,14 @@ collection:
|
|||||||
}
|
}
|
||||||
|
|
||||||
int plen;
|
int plen;
|
||||||
if (utf_ptr2len(regparse) != (plen = utfc_ptr2len(regparse))) {
|
//
|
||||||
|
// If this character was consumed as the end of a range, do not emit its
|
||||||
|
// composing characters separately. Range handling only uses the base
|
||||||
|
// codepoint; emitting the composing part again would duplicate the
|
||||||
|
// character in the postfix stream and corrupt the NFA stack.
|
||||||
|
//
|
||||||
|
if (!range_endpoint
|
||||||
|
&& utf_ptr2len(regparse) != (plen = utfc_ptr2len(regparse))) {
|
||||||
int i = utf_ptr2len(regparse);
|
int i = utf_ptr2len(regparse);
|
||||||
|
|
||||||
c = utf_ptr2char(regparse + i);
|
c = utf_ptr2char(regparse + i);
|
||||||
@@ -11814,7 +11824,11 @@ static int nfa_max_width(nfa_state_T *startstate, int depth)
|
|||||||
// Matches some character, including composing chars.
|
// Matches some character, including composing chars.
|
||||||
len += MB_MAXBYTES;
|
len += MB_MAXBYTES;
|
||||||
if (state->c != NFA_ANY) {
|
if (state->c != NFA_ANY) {
|
||||||
// Skip over the characters.
|
// Skip over the compiled collection.
|
||||||
|
// malformed NFAs must not crash width estimation.
|
||||||
|
if (state->out1 == NULL || state->out1->out == NULL) {
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
state = state->out1->out;
|
state = state->out1->out;
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -642,4 +642,22 @@ func Test_replace_multibyte_match_in_multi_lines()
|
|||||||
set ignorecase&vim re&vim
|
set ignorecase&vim re&vim
|
||||||
endfun
|
endfun
|
||||||
|
|
||||||
|
func Test_regex_collection_range_with_composing_crash()
|
||||||
|
" Regression test: composing char in collection range caused NFA crash/E874
|
||||||
|
new
|
||||||
|
call setline(1, ['00', '0ֻ', '01'])
|
||||||
|
let patterns = [ '0[0-0ֻ]\@<!','0[0ֻ]\@<!']
|
||||||
|
|
||||||
|
for pat in patterns
|
||||||
|
" Should compile and execute without crash or error
|
||||||
|
for re in range(3)
|
||||||
|
let regex = '\%#=' .. re .. pat
|
||||||
|
call search(regex)
|
||||||
|
call assert_fails($"/{regex}\<cr>", 'E486:')
|
||||||
|
endfor
|
||||||
|
endfor
|
||||||
|
|
||||||
|
bwipe!
|
||||||
|
endfunc
|
||||||
|
|
||||||
" vim: shiftwidth=2 sts=2 expandtab
|
" vim: shiftwidth=2 sts=2 expandtab
|
||||||
|
|||||||
Reference in New Issue
Block a user