vim-patch:8.1.0098: segfault when pattern with \z() is very slow (#9283)

Problem:    Segfault when pattern with \z() is very slow.
Solution:   Check for NULL regprog.  Add "nfa_fail" to test_override() to be
            able to test this.  Fix that 'searchhl' resets called_emsg.
bcf9442307

closes #8788
This commit is contained in:
Justin M. Keyes
2018-11-29 01:51:26 +01:00
committed by GitHub
parent 98eaf60a98
commit 5a752c97d5
6 changed files with 63 additions and 32 deletions

View File

@@ -485,7 +485,7 @@ static char_u e_unmatchedpp[] = N_("E53: Unmatched %s%%(");
static char_u e_unmatchedp[] = N_("E54: Unmatched %s("); static char_u e_unmatchedp[] = N_("E54: Unmatched %s(");
static char_u e_unmatchedpar[] = N_("E55: Unmatched %s)"); static char_u e_unmatchedpar[] = N_("E55: Unmatched %s)");
static char_u e_z_not_allowed[] = N_("E66: \\z( not allowed here"); static char_u e_z_not_allowed[] = N_("E66: \\z( not allowed here");
static char_u e_z1_not_allowed[] = N_("E67: \\z1 et al. not allowed here"); static char_u e_z1_not_allowed[] = N_("E67: \\z1 - \\z9 not allowed here");
static char_u e_missing_sb[] = N_("E69: Missing ] after %s%%["); static char_u e_missing_sb[] = N_("E69: Missing ] after %s%%[");
static char_u e_empty_sb[] = N_("E70: Empty %s%%[]"); static char_u e_empty_sb[] = N_("E70: Empty %s%%[]");
#define NOT_MULTI 0 #define NOT_MULTI 0
@@ -1952,7 +1952,7 @@ static char_u *regatom(int *flagp)
{ {
c = no_Magic(getchr()); c = no_Magic(getchr());
switch (c) { switch (c) {
case '(': if (reg_do_extmatch != REX_SET) case '(': if ((reg_do_extmatch & REX_SET) == 0)
EMSG_RET_NULL(_(e_z_not_allowed)); EMSG_RET_NULL(_(e_z_not_allowed));
if (one_exactly) if (one_exactly)
EMSG_ONE_RET_NULL; EMSG_ONE_RET_NULL;
@@ -1971,7 +1971,7 @@ static char_u *regatom(int *flagp)
case '6': case '6':
case '7': case '7':
case '8': case '8':
case '9': if (reg_do_extmatch != REX_USE) case '9': if ((reg_do_extmatch & REX_USE) == 0)
EMSG_RET_NULL(_(e_z1_not_allowed)); EMSG_RET_NULL(_(e_z1_not_allowed));
ret = regnode(ZREF + c - '0'); ret = regnode(ZREF + c - '0');
re_has_z = REX_USE; re_has_z = REX_USE;
@@ -7257,15 +7257,13 @@ int vim_regexec_nl(regmatch_T *rmp, char_u *line, colnr_T col)
return vim_regexec_both(rmp, line, col, true); return vim_regexec_both(rmp, line, col, true);
} }
/* /// Match a regexp against multiple lines.
* Match a regexp against multiple lines. /// "rmp->regprog" must be a compiled regexp as returned by vim_regcomp().
* "rmp->regprog" is a compiled regexp as returned by vim_regcomp(). /// Note: "rmp->regprog" may be freed and changed, even set to NULL.
* Note: "rmp->regprog" may be freed and changed. /// Uses curbuf for line count and 'iskeyword'.
* Uses curbuf for line count and 'iskeyword'. ///
* /// Return zero if there is no match. Return number of lines contained in the
* Return zero if there is no match. Return number of lines contained in the /// match otherwise.
* match otherwise.
*/
long vim_regexec_multi( long vim_regexec_multi(
regmmatch_T *rmp, regmmatch_T *rmp,
win_T *win, /* window in which to search or NULL */ win_T *win, /* window in which to search or NULL */
@@ -7297,7 +7295,12 @@ long vim_regexec_multi(
p_re = BACKTRACKING_ENGINE; p_re = BACKTRACKING_ENGINE;
vim_regfree(rmp->regprog); vim_regfree(rmp->regprog);
report_re_switch(pat); report_re_switch(pat);
// checking for \z misuse was already done when compiling for NFA,
// allow all here
reg_do_extmatch = REX_ALL;
rmp->regprog = vim_regcomp(pat, re_flags); rmp->regprog = vim_regcomp(pat, re_flags);
reg_do_extmatch = 0;
if (rmp->regprog != NULL) { if (rmp->regprog != NULL) {
result = rmp->regprog->engine->regexec_multi(rmp, win, buf, lnum, col, result = rmp->regprog->engine->regexec_multi(rmp, win, buf, lnum, col,
tm); tm);

View File

@@ -5,19 +5,20 @@
#include "nvim/buffer_defs.h" #include "nvim/buffer_defs.h"
#include "nvim/regexp_defs.h" #include "nvim/regexp_defs.h"
/* Second argument for vim_regcomp(). */ // Second argument for vim_regcomp().
#define RE_MAGIC 1 /* 'magic' option */ #define RE_MAGIC 1 ///< 'magic' option
#define RE_STRING 2 /* match in string instead of buffer text */ #define RE_STRING 2 ///< match in string instead of buffer text
#define RE_STRICT 4 /* don't allow [abc] without ] */ #define RE_STRICT 4 ///< don't allow [abc] without ]
#define RE_AUTO 8 /* automatic engine selection */ #define RE_AUTO 8 ///< automatic engine selection
/* values for reg_do_extmatch */ // values for reg_do_extmatch
#define REX_SET 1 /* to allow \z\(...\), */ #define REX_SET 1 ///< to allow \z\(...\),
#define REX_USE 2 /* to allow \z\1 et al. */ #define REX_USE 2 ///< to allow \z\1 et al.
#define REX_ALL (REX_SET | REX_USE)
/* regexp.c */ // regexp.c
#ifdef INCLUDE_GENERATED_DECLARATIONS #ifdef INCLUDE_GENERATED_DECLARATIONS
# include "regexp.h.generated.h" # include "regexp.h.generated.h"
#endif #endif
#endif /* NVIM_REGEXP_H */ #endif // NVIM_REGEXP_H

View File

@@ -1367,20 +1367,23 @@ static int nfa_regatom(void)
case '7': case '7':
case '8': case '8':
case '9': case '9':
/* \z1...\z9 */ // \z1...\z9
if (reg_do_extmatch != REX_USE) if ((reg_do_extmatch & REX_USE) == 0) {
EMSG_RET_FAIL(_(e_z1_not_allowed)); EMSG_RET_FAIL(_(e_z1_not_allowed));
}
EMIT(NFA_ZREF1 + (no_Magic(c) - '1')); EMIT(NFA_ZREF1 + (no_Magic(c) - '1'));
/* No need to set nfa_has_backref, the sub-matches don't /* No need to set nfa_has_backref, the sub-matches don't
* change when \z1 .. \z9 matches or not. */ * change when \z1 .. \z9 matches or not. */
re_has_z = REX_USE; re_has_z = REX_USE;
break; break;
case '(': case '(':
/* \z( */ // \z(
if (reg_do_extmatch != REX_SET) if (reg_do_extmatch != REX_SET) {
EMSG_RET_FAIL(_(e_z_not_allowed)); EMSG_RET_FAIL(_(e_z_not_allowed));
if (nfa_reg(REG_ZPAREN) == FAIL) }
return FAIL; /* cascaded error */ if (nfa_reg(REG_ZPAREN) == FAIL) {
return FAIL; // cascaded error
}
re_has_z = REX_SET; re_has_z = REX_SET;
break; break;
default: default:
@@ -5052,10 +5055,11 @@ static int nfa_regmatch(nfa_regprog_T *prog, nfa_state_T *start,
/* swap lists */ /* swap lists */
thislist = &list[flag]; thislist = &list[flag];
nextlist = &list[flag ^= 1]; nextlist = &list[flag ^= 1];
nextlist->n = 0; /* clear nextlist */ nextlist->n = 0; // clear nextlist
nextlist->has_pim = FALSE; nextlist->has_pim = false;
++nfa_listid; nfa_listid++;
if (prog->re_engine == AUTOMATIC_ENGINE && nfa_listid >= NFA_MAX_STATES) { if (prog->re_engine == AUTOMATIC_ENGINE
&& (nfa_listid >= NFA_MAX_STATES)) {
// Too many states, retry with old engine. // Too many states, retry with old engine.
nfa_match = NFA_TOO_EXPENSIVE; nfa_match = NFA_TOO_EXPENSIVE;
goto theend; goto theend;

View File

@@ -5603,6 +5603,7 @@ next_search_hl (
linenr_T l; linenr_T l;
colnr_T matchcol; colnr_T matchcol;
long nmatched = 0; long nmatched = 0;
int save_called_emsg = called_emsg;
if (shl->lnum != 0) { if (shl->lnum != 0) {
/* Check for three situations: /* Check for three situations:
@@ -5695,6 +5696,9 @@ next_search_hl (
shl->lnum += shl->rm.startpos[0].lnum; shl->lnum += shl->rm.startpos[0].lnum;
break; /* useful match found */ break; /* useful match found */
} }
// Restore called_emsg for assert_fails().
called_emsg = save_called_emsg;
} }
} }

View File

@@ -2894,6 +2894,13 @@ static int syn_regexec(regmmatch_T *rmp, linenr_T lnum, colnr_T col, syn_time_T
pt = profile_start(); pt = profile_start();
} }
if (rmp->regprog == NULL) {
// This can happen if a previous call to vim_regexec_multi() tried to
// use the NFA engine, which resulted in NFA_TOO_EXPENSIVE, and
// compiling the pattern with the other engine fails.
return false;
}
rmp->rmm_maxcol = syn_buf->b_p_smc; rmp->rmm_maxcol = syn_buf->b_p_smc;
r = vim_regexec_multi(rmp, syn_win, syn_buf, lnum, col, NULL); r = vim_regexec_multi(rmp, syn_win, syn_buf, lnum, col, NULL);

View File

@@ -482,3 +482,15 @@ fun Test_synstack_synIDtrans()
syn clear syn clear
bw! bw!
endfunc endfunc
" Using \z() in a region with NFA failing should not crash.
func Test_syn_wrong_z_one()
new
call setline(1, ['just some text', 'with foo and bar to match with'])
syn region FooBar start="foo\z(.*\)bar" end="\z1"
" call test_override("nfa_fail", 1)
redraw!
redraw!
" call test_override("ALL", 0)
bwipe!
endfunc