mirror of
https://github.com/neovim/neovim.git
synced 2025-09-30 15:08:35 +00:00
vim-patch:7.4.497 #2295
Problem: With some regexp patterns the NFA engine uses many states and becomes very slow. To the user it looks like Vim freezes. Solution: When the number of states reaches a limit fall back to the old engine. (Christian Brabandt) https://github.com/vim/vim/releases/tag/v7-4-497 Helped-by: David Bürgin <676c7473@gmail.com> Helped-by: Justin M. Keyes <justinkz@gmail.com> Helped-by: Scott Prager <splinterofchaos@gmail.com>
This commit is contained in:

committed by
Justin M. Keyes

parent
a69e464f70
commit
d3bb177f1e
@@ -6914,13 +6914,8 @@ static regengine_T bt_regengine =
|
||||
bt_regcomp,
|
||||
bt_regfree,
|
||||
bt_regexec_nl,
|
||||
bt_regexec_multi
|
||||
#ifdef REGEXP_DEBUG
|
||||
,(char_u *)""
|
||||
#endif
|
||||
#ifdef DEBUG
|
||||
,NULL
|
||||
#endif
|
||||
bt_regexec_multi,
|
||||
(char_u *)""
|
||||
};
|
||||
|
||||
|
||||
@@ -6934,21 +6929,14 @@ static regengine_T nfa_regengine =
|
||||
nfa_regcomp,
|
||||
nfa_regfree,
|
||||
nfa_regexec_nl,
|
||||
nfa_regexec_multi
|
||||
#ifdef REGEXP_DEBUG
|
||||
,(char_u *)""
|
||||
#endif
|
||||
#ifdef DEBUG
|
||||
, NULL
|
||||
#endif
|
||||
nfa_regexec_multi,
|
||||
(char_u *)""
|
||||
};
|
||||
|
||||
/* Which regexp engine to use? Needed for vim_regcomp().
|
||||
* Must match with 'regexpengine'. */
|
||||
static int regexp_engine = 0;
|
||||
#define AUTOMATIC_ENGINE 0
|
||||
#define BACKTRACKING_ENGINE 1
|
||||
#define NFA_ENGINE 2
|
||||
|
||||
#ifdef REGEXP_DEBUG
|
||||
static char_u regname[][30] = {
|
||||
"AUTOMATIC Regexp Engine",
|
||||
@@ -6990,10 +6978,8 @@ regprog_T *vim_regcomp(char_u *expr_arg, int re_flags)
|
||||
regexp_engine = AUTOMATIC_ENGINE;
|
||||
}
|
||||
}
|
||||
#ifdef REGEXP_DEBUG
|
||||
bt_regengine.expr = expr;
|
||||
nfa_regengine.expr = expr;
|
||||
#endif
|
||||
|
||||
/*
|
||||
* First try the NFA engine, unless backtracking was requested.
|
||||
@@ -7003,11 +6989,12 @@ regprog_T *vim_regcomp(char_u *expr_arg, int re_flags)
|
||||
else
|
||||
prog = bt_regengine.regcomp(expr, re_flags);
|
||||
|
||||
if (prog == NULL) { /* error compiling regexp with initial engine */
|
||||
// Check for error compiling regexp with initial engine.
|
||||
if (prog == NULL) {
|
||||
#ifdef BT_REGEXP_DEBUG_LOG
|
||||
if (regexp_engine != BACKTRACKING_ENGINE) { /* debugging log for NFA */
|
||||
FILE *f;
|
||||
f = fopen(BT_REGEXP_DEBUG_LOG_NAME, "a");
|
||||
// Debugging log for NFA.
|
||||
if (regexp_engine != BACKTRACKING_ENGINE) {
|
||||
FILE *f = fopen(BT_REGEXP_DEBUG_LOG_NAME, "a");
|
||||
if (f) {
|
||||
fprintf(f, "Syntax error in \"%s\"\n", expr);
|
||||
fclose(f);
|
||||
@@ -7016,12 +7003,22 @@ regprog_T *vim_regcomp(char_u *expr_arg, int re_flags)
|
||||
BT_REGEXP_DEBUG_LOG_NAME);
|
||||
}
|
||||
#endif
|
||||
/*
|
||||
* If the NFA engine failed, the backtracking engine won't work either.
|
||||
*
|
||||
if (regexp_engine == AUTOMATIC_ENGINE)
|
||||
prog = bt_regengine.regcomp(expr, re_flags);
|
||||
*/
|
||||
// If the NFA engine failed, try the backtracking engine.
|
||||
// Disabled for now, both engines fail on the same patterns.
|
||||
// Re-enable when regcomp() fails when the pattern would work better
|
||||
// with the other engine.
|
||||
//
|
||||
// if (regexp_engine == AUTOMATIC_ENGINE) {
|
||||
// prog = bt_regengine.regcomp(expr, re_flags);
|
||||
// regexp_engine = BACKTRACKING_ENGINE;
|
||||
// }
|
||||
}
|
||||
|
||||
if (prog != NULL) {
|
||||
// Store the info needed to call regcomp() again when the engine turns out
|
||||
// to be very slow when executing it.
|
||||
prog->re_engine = regexp_engine;
|
||||
prog->re_flags = re_flags;
|
||||
}
|
||||
|
||||
return prog;
|
||||
@@ -7036,29 +7033,62 @@ void vim_regfree(regprog_T *prog)
|
||||
prog->engine->regfree(prog);
|
||||
}
|
||||
|
||||
/*
|
||||
* Match a regexp against a string.
|
||||
* "rmp->regprog" is a compiled regexp as returned by vim_regcomp().
|
||||
* Uses curbuf for line count and 'iskeyword'.
|
||||
*
|
||||
* Return TRUE if there is a match, FALSE if not.
|
||||
*/
|
||||
int
|
||||
vim_regexec (
|
||||
regmatch_T *rmp,
|
||||
char_u *line, /* string to match against */
|
||||
colnr_T col /* column to start looking for match */
|
||||
)
|
||||
static void report_re_switch(char_u *pat)
|
||||
{
|
||||
return rmp->regprog->engine->regexec_nl(rmp, line, col, false);
|
||||
if (p_verbose > 0) {
|
||||
verbose_enter();
|
||||
MSG_PUTS(_("Switching to backtracking RE engine for pattern: "));
|
||||
MSG_PUTS(pat);
|
||||
verbose_leave();
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Like vim_regexec(), but consider a "\n" in "line" to be a line break.
|
||||
*/
|
||||
/// Matches a regexp against a string.
|
||||
/// "rmp->regprog" is a compiled regexp as returned by vim_regcomp().
|
||||
/// Uses curbuf for line count and 'iskeyword'.
|
||||
/// When "nl" is true consider a "\n" in "line" to be a line break.
|
||||
///
|
||||
/// @param rmp
|
||||
/// @param line the string to match against
|
||||
/// @param col the column to start looking for match
|
||||
/// @param nl
|
||||
///
|
||||
/// @return TRUE if there is a match, FALSE if not.
|
||||
static int vim_regexec_both(regmatch_T *rmp, char_u *line, colnr_T col, bool nl)
|
||||
{
|
||||
int result = rmp->regprog->engine->regexec_nl(rmp, line, col, nl);
|
||||
|
||||
// NFA engine aborted because it's very slow, use backtracking engine instead.
|
||||
if (rmp->regprog->re_engine == AUTOMATIC_ENGINE
|
||||
&& result == NFA_TOO_EXPENSIVE) {
|
||||
int save_p_re = p_re;
|
||||
int re_flags = rmp->regprog->re_flags;
|
||||
char_u *pat = vim_strsave(((nfa_regprog_T *)rmp->regprog)->pattern);
|
||||
|
||||
p_re = BACKTRACKING_ENGINE;
|
||||
vim_regfree(rmp->regprog);
|
||||
report_re_switch(pat);
|
||||
rmp->regprog = vim_regcomp(pat, re_flags);
|
||||
if (rmp->regprog != NULL) {
|
||||
result = rmp->regprog->engine->regexec_nl(rmp, line, col, nl);
|
||||
}
|
||||
|
||||
free(pat);
|
||||
p_re = save_p_re;
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
int vim_regexec(regmatch_T *rmp, char_u *line, colnr_T col)
|
||||
{
|
||||
return vim_regexec_both(rmp, line, col, false);
|
||||
}
|
||||
|
||||
// Like vim_regexec(), but consider a "\n" in "line" to be a line break.
|
||||
int vim_regexec_nl(regmatch_T *rmp, char_u *line, colnr_T col)
|
||||
{
|
||||
return rmp->regprog->engine->regexec_nl(rmp, line, col, true);
|
||||
return vim_regexec_both(rmp, line, col, true);
|
||||
}
|
||||
|
||||
/*
|
||||
@@ -7078,5 +7108,28 @@ long vim_regexec_multi(
|
||||
proftime_T *tm /* timeout limit or NULL */
|
||||
)
|
||||
{
|
||||
return rmp->regprog->engine->regexec_multi(rmp, win, buf, lnum, col, tm);
|
||||
int result = rmp->regprog->engine->regexec_multi(rmp, win, buf, lnum, col,
|
||||
tm);
|
||||
|
||||
// NFA engine aborted because it's very slow, use backtracking engine instead.
|
||||
if (rmp->regprog->re_engine == AUTOMATIC_ENGINE
|
||||
&& result == NFA_TOO_EXPENSIVE) {
|
||||
int save_p_re = p_re;
|
||||
int re_flags = rmp->regprog->re_flags;
|
||||
char_u *pat = vim_strsave(((nfa_regprog_T *)rmp->regprog)->pattern);
|
||||
|
||||
p_re = BACKTRACKING_ENGINE;
|
||||
vim_regfree(rmp->regprog);
|
||||
report_re_switch(pat);
|
||||
rmp->regprog = vim_regcomp(pat, re_flags);
|
||||
if (rmp->regprog != NULL) {
|
||||
result = rmp->regprog->engine->regexec_multi(rmp, win, buf, lnum, col,
|
||||
tm);
|
||||
}
|
||||
|
||||
free(pat);
|
||||
p_re = save_p_re;
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
@@ -30,6 +30,16 @@
|
||||
*/
|
||||
#define NFA_MAX_BRACES 20
|
||||
|
||||
// In the NFA engine: how many states are allowed.
|
||||
#define NFA_MAX_STATES 100000
|
||||
#define NFA_TOO_EXPENSIVE -1
|
||||
|
||||
// Which regexp engine to use? Needed for vim_regcomp().
|
||||
// Must match with 'regexpengine'.
|
||||
#define AUTOMATIC_ENGINE 0
|
||||
#define BACKTRACKING_ENGINE 1
|
||||
#define NFA_ENGINE 2
|
||||
|
||||
typedef struct regengine regengine_T;
|
||||
|
||||
/*
|
||||
@@ -38,8 +48,10 @@ typedef struct regengine regengine_T;
|
||||
* structures are used. See code below.
|
||||
*/
|
||||
typedef struct regprog {
|
||||
regengine_T *engine;
|
||||
regengine_T *engine;
|
||||
unsigned regflags;
|
||||
unsigned re_engine; ///< Automatic, backtracking or NFA engine.
|
||||
unsigned re_flags; ///< Second argument for vim_regcomp().
|
||||
} regprog_T;
|
||||
|
||||
/*
|
||||
@@ -48,9 +60,11 @@ typedef struct regprog {
|
||||
* See regexp.c for an explanation.
|
||||
*/
|
||||
typedef struct {
|
||||
/* These two members implement regprog_T */
|
||||
regengine_T *engine;
|
||||
// These four members implement regprog_T.
|
||||
regengine_T *engine;
|
||||
unsigned regflags;
|
||||
unsigned re_engine;
|
||||
unsigned re_flags; ///< Second argument for vim_regcomp().
|
||||
|
||||
int regstart;
|
||||
char_u reganch;
|
||||
@@ -78,9 +92,11 @@ struct nfa_state {
|
||||
* Structure used by the NFA matcher.
|
||||
*/
|
||||
typedef struct {
|
||||
/* These two members implement regprog_T */
|
||||
regengine_T *engine;
|
||||
// These four members implement regprog_T.
|
||||
regengine_T *engine;
|
||||
unsigned regflags;
|
||||
unsigned re_engine;
|
||||
unsigned re_flags; ///< Second argument for vim_regcomp().
|
||||
|
||||
nfa_state_T *start; /* points into state[] */
|
||||
|
||||
@@ -91,9 +107,7 @@ typedef struct {
|
||||
int has_zend; /* pattern contains \ze */
|
||||
int has_backref; /* pattern contains \1 .. \9 */
|
||||
int reghasz;
|
||||
#ifdef DEBUG
|
||||
char_u *pattern;
|
||||
#endif
|
||||
int nsubexp; /* number of () */
|
||||
int nstate;
|
||||
nfa_state_T state[1]; /* actually longer.. */
|
||||
@@ -143,9 +157,7 @@ struct regengine {
|
||||
int (*regexec_nl)(regmatch_T*, char_u*, colnr_T, bool);
|
||||
long (*regexec_multi)(regmmatch_T*, win_T*, buf_T*, linenr_T, colnr_T,
|
||||
proftime_T*);
|
||||
#ifdef DEBUG
|
||||
char_u *expr;
|
||||
#endif
|
||||
};
|
||||
|
||||
#endif // NVIM_REGEXP_DEFS_H
|
||||
|
@@ -2347,7 +2347,6 @@ static void nfa_set_code(int c)
|
||||
|
||||
}
|
||||
|
||||
#ifdef REGEXP_DEBUG
|
||||
static FILE *log_fd;
|
||||
|
||||
/*
|
||||
@@ -2468,7 +2467,6 @@ static void nfa_dump(nfa_regprog_T *prog)
|
||||
}
|
||||
}
|
||||
#endif /* REGEXP_DEBUG */
|
||||
#endif /* REGEXP_DEBUG */
|
||||
|
||||
/*
|
||||
* Parse r.e. @expr and convert it into postfix form.
|
||||
@@ -4908,6 +4906,12 @@ static int nfa_regmatch(nfa_regprog_T *prog, nfa_state_T *start, regsubs_T *subm
|
||||
nextlist->n = 0; /* clear nextlist */
|
||||
nextlist->has_pim = FALSE;
|
||||
++nfa_listid;
|
||||
if (prog->re_engine == AUTOMATIC_ENGINE && nfa_listid >= NFA_MAX_STATES) {
|
||||
// Too many states, retry with old engine.
|
||||
nfa_match = NFA_TOO_EXPENSIVE;
|
||||
goto theend;
|
||||
}
|
||||
|
||||
thislist->id = nfa_listid;
|
||||
nextlist->id = nfa_listid + 1;
|
||||
|
||||
@@ -5082,6 +5086,10 @@ static int nfa_regmatch(nfa_regprog_T *prog, nfa_state_T *start, regsubs_T *subm
|
||||
*/
|
||||
result = recursive_regmatch(t->state, NULL, prog,
|
||||
submatch, m, &listids);
|
||||
if (result == NFA_TOO_EXPENSIVE) {
|
||||
nfa_match = result;
|
||||
goto theend;
|
||||
}
|
||||
|
||||
/* for \@! and \@<! it is a match when the result is
|
||||
* FALSE */
|
||||
@@ -5180,6 +5188,10 @@ static int nfa_regmatch(nfa_regprog_T *prog, nfa_state_T *start, regsubs_T *subm
|
||||
/* First try matching the pattern. */
|
||||
result = recursive_regmatch(t->state, NULL, prog,
|
||||
submatch, m, &listids);
|
||||
if (result == NFA_TOO_EXPENSIVE) {
|
||||
nfa_match = result;
|
||||
goto theend;
|
||||
}
|
||||
if (result) {
|
||||
int bytelen;
|
||||
|
||||
@@ -6019,6 +6031,7 @@ nextchar:
|
||||
log_fd = NULL;
|
||||
#endif
|
||||
|
||||
theend:
|
||||
/* Free memory */
|
||||
free(list[0].t);
|
||||
free(list[1].t);
|
||||
@@ -6068,8 +6081,12 @@ static long nfa_regtry(nfa_regprog_T *prog, colnr_T col)
|
||||
clear_sub(&subs.synt);
|
||||
clear_sub(&m.synt);
|
||||
|
||||
if (nfa_regmatch(prog, start, &subs, &m) == FALSE)
|
||||
int result = nfa_regmatch(prog, start, &subs, &m);
|
||||
if (result == FALSE) {
|
||||
return 0;
|
||||
} else if (result == NFA_TOO_EXPENSIVE) {
|
||||
return result;
|
||||
}
|
||||
|
||||
cleanup_subexpr();
|
||||
if (REG_MULTI) {
|
||||
@@ -6186,9 +6203,7 @@ nfa_regexec_both (
|
||||
nfa_nsubexpr = prog->nsubexp;
|
||||
nfa_listid = 1;
|
||||
nfa_alt_listid = 2;
|
||||
#ifdef REGEXP_DEBUG
|
||||
nfa_regengine.expr = prog->pattern;
|
||||
#endif
|
||||
|
||||
if (prog->reganch && col > 0)
|
||||
return 0L;
|
||||
@@ -6228,9 +6243,7 @@ nfa_regexec_both (
|
||||
|
||||
retval = nfa_regtry(prog, col);
|
||||
|
||||
#ifdef REGEXP_DEBUG
|
||||
nfa_regengine.expr = NULL;
|
||||
#endif
|
||||
|
||||
theend:
|
||||
return retval;
|
||||
@@ -6248,9 +6261,7 @@ static regprog_T *nfa_regcomp(char_u *expr, int re_flags)
|
||||
if (expr == NULL)
|
||||
return NULL;
|
||||
|
||||
#ifdef REGEXP_DEBUG
|
||||
nfa_regengine.expr = expr;
|
||||
#endif
|
||||
|
||||
init_class_tab();
|
||||
|
||||
@@ -6325,10 +6336,8 @@ static regprog_T *nfa_regcomp(char_u *expr, int re_flags)
|
||||
#endif
|
||||
/* Remember whether this pattern has any \z specials in it. */
|
||||
prog->reghasz = re_has_z;
|
||||
#ifdef REGEXP_DEBUG
|
||||
prog->pattern = vim_strsave(expr);
|
||||
nfa_regengine.expr = NULL;
|
||||
#endif
|
||||
|
||||
out:
|
||||
free(post_start);
|
||||
@@ -6342,9 +6351,7 @@ fail:
|
||||
#ifdef REGEXP_DEBUG
|
||||
nfa_postfix_dump(expr, FAIL);
|
||||
#endif
|
||||
#ifdef REGEXP_DEBUG
|
||||
nfa_regengine.expr = NULL;
|
||||
#endif
|
||||
goto out;
|
||||
}
|
||||
|
||||
@@ -6355,9 +6362,7 @@ static void nfa_regfree(regprog_T *prog)
|
||||
{
|
||||
if (prog != NULL) {
|
||||
free(((nfa_regprog_T *)prog)->match_text);
|
||||
#ifdef REGEXP_DEBUG
|
||||
free(((nfa_regprog_T *)prog)->pattern);
|
||||
#endif
|
||||
free(prog);
|
||||
}
|
||||
}
|
||||
|
@@ -243,7 +243,7 @@ static int included_patches[] = {
|
||||
500,
|
||||
499,
|
||||
//498 NA
|
||||
//497,
|
||||
497,
|
||||
//496 NA
|
||||
//495 NA
|
||||
494,
|
||||
|
Reference in New Issue
Block a user