vim-patch:7.4.497 #2295

Problem:    With some regexp patterns the NFA engine uses many states and
            becomes very slow.  To the user it looks like Vim freezes.
Solution:   When the number of states reaches a limit fall back to the old
            engine. (Christian Brabandt)

https://github.com/vim/vim/releases/tag/v7-4-497

Helped-by: David Bürgin <676c7473@gmail.com>
Helped-by: Justin M. Keyes <justinkz@gmail.com>
Helped-by: Scott Prager <splinterofchaos@gmail.com>
This commit is contained in:
David Bürgin
2015-03-28 22:28:37 +01:00
committed by Justin M. Keyes
parent a69e464f70
commit d3bb177f1e
10 changed files with 239 additions and 73 deletions

View File

@@ -6914,13 +6914,8 @@ static regengine_T bt_regengine =
bt_regcomp,
bt_regfree,
bt_regexec_nl,
bt_regexec_multi
#ifdef REGEXP_DEBUG
,(char_u *)""
#endif
#ifdef DEBUG
,NULL
#endif
bt_regexec_multi,
(char_u *)""
};
@@ -6934,21 +6929,14 @@ static regengine_T nfa_regengine =
nfa_regcomp,
nfa_regfree,
nfa_regexec_nl,
nfa_regexec_multi
#ifdef REGEXP_DEBUG
,(char_u *)""
#endif
#ifdef DEBUG
, NULL
#endif
nfa_regexec_multi,
(char_u *)""
};
/* Which regexp engine to use? Needed for vim_regcomp().
* Must match with 'regexpengine'. */
static int regexp_engine = 0;
#define AUTOMATIC_ENGINE 0
#define BACKTRACKING_ENGINE 1
#define NFA_ENGINE 2
#ifdef REGEXP_DEBUG
static char_u regname[][30] = {
"AUTOMATIC Regexp Engine",
@@ -6990,10 +6978,8 @@ regprog_T *vim_regcomp(char_u *expr_arg, int re_flags)
regexp_engine = AUTOMATIC_ENGINE;
}
}
#ifdef REGEXP_DEBUG
bt_regengine.expr = expr;
nfa_regengine.expr = expr;
#endif
/*
* First try the NFA engine, unless backtracking was requested.
@@ -7003,11 +6989,12 @@ regprog_T *vim_regcomp(char_u *expr_arg, int re_flags)
else
prog = bt_regengine.regcomp(expr, re_flags);
if (prog == NULL) { /* error compiling regexp with initial engine */
// Check for error compiling regexp with initial engine.
if (prog == NULL) {
#ifdef BT_REGEXP_DEBUG_LOG
if (regexp_engine != BACKTRACKING_ENGINE) { /* debugging log for NFA */
FILE *f;
f = fopen(BT_REGEXP_DEBUG_LOG_NAME, "a");
// Debugging log for NFA.
if (regexp_engine != BACKTRACKING_ENGINE) {
FILE *f = fopen(BT_REGEXP_DEBUG_LOG_NAME, "a");
if (f) {
fprintf(f, "Syntax error in \"%s\"\n", expr);
fclose(f);
@@ -7016,12 +7003,22 @@ regprog_T *vim_regcomp(char_u *expr_arg, int re_flags)
BT_REGEXP_DEBUG_LOG_NAME);
}
#endif
/*
* If the NFA engine failed, the backtracking engine won't work either.
*
if (regexp_engine == AUTOMATIC_ENGINE)
prog = bt_regengine.regcomp(expr, re_flags);
*/
// If the NFA engine failed, try the backtracking engine.
// Disabled for now, both engines fail on the same patterns.
// Re-enable when regcomp() fails when the pattern would work better
// with the other engine.
//
// if (regexp_engine == AUTOMATIC_ENGINE) {
// prog = bt_regengine.regcomp(expr, re_flags);
// regexp_engine = BACKTRACKING_ENGINE;
// }
}
if (prog != NULL) {
// Store the info needed to call regcomp() again when the engine turns out
// to be very slow when executing it.
prog->re_engine = regexp_engine;
prog->re_flags = re_flags;
}
return prog;
@@ -7036,29 +7033,62 @@ void vim_regfree(regprog_T *prog)
prog->engine->regfree(prog);
}
/*
* Match a regexp against a string.
* "rmp->regprog" is a compiled regexp as returned by vim_regcomp().
* Uses curbuf for line count and 'iskeyword'.
*
* Return TRUE if there is a match, FALSE if not.
*/
int
vim_regexec (
regmatch_T *rmp,
char_u *line, /* string to match against */
colnr_T col /* column to start looking for match */
)
static void report_re_switch(char_u *pat)
{
return rmp->regprog->engine->regexec_nl(rmp, line, col, false);
if (p_verbose > 0) {
verbose_enter();
MSG_PUTS(_("Switching to backtracking RE engine for pattern: "));
MSG_PUTS(pat);
verbose_leave();
}
}
/*
* Like vim_regexec(), but consider a "\n" in "line" to be a line break.
*/
/// Matches a regexp against a string.
/// "rmp->regprog" is a compiled regexp as returned by vim_regcomp().
/// Uses curbuf for line count and 'iskeyword'.
/// When "nl" is true consider a "\n" in "line" to be a line break.
///
/// @param rmp
/// @param line the string to match against
/// @param col the column to start looking for match
/// @param nl
///
/// @return TRUE if there is a match, FALSE if not.
static int vim_regexec_both(regmatch_T *rmp, char_u *line, colnr_T col, bool nl)
{
int result = rmp->regprog->engine->regexec_nl(rmp, line, col, nl);
// NFA engine aborted because it's very slow, use backtracking engine instead.
if (rmp->regprog->re_engine == AUTOMATIC_ENGINE
&& result == NFA_TOO_EXPENSIVE) {
int save_p_re = p_re;
int re_flags = rmp->regprog->re_flags;
char_u *pat = vim_strsave(((nfa_regprog_T *)rmp->regprog)->pattern);
p_re = BACKTRACKING_ENGINE;
vim_regfree(rmp->regprog);
report_re_switch(pat);
rmp->regprog = vim_regcomp(pat, re_flags);
if (rmp->regprog != NULL) {
result = rmp->regprog->engine->regexec_nl(rmp, line, col, nl);
}
free(pat);
p_re = save_p_re;
}
return result;
}
int vim_regexec(regmatch_T *rmp, char_u *line, colnr_T col)
{
return vim_regexec_both(rmp, line, col, false);
}
// Like vim_regexec(), but consider a "\n" in "line" to be a line break.
int vim_regexec_nl(regmatch_T *rmp, char_u *line, colnr_T col)
{
return rmp->regprog->engine->regexec_nl(rmp, line, col, true);
return vim_regexec_both(rmp, line, col, true);
}
/*
@@ -7078,5 +7108,28 @@ long vim_regexec_multi(
proftime_T *tm /* timeout limit or NULL */
)
{
return rmp->regprog->engine->regexec_multi(rmp, win, buf, lnum, col, tm);
int result = rmp->regprog->engine->regexec_multi(rmp, win, buf, lnum, col,
tm);
// NFA engine aborted because it's very slow, use backtracking engine instead.
if (rmp->regprog->re_engine == AUTOMATIC_ENGINE
&& result == NFA_TOO_EXPENSIVE) {
int save_p_re = p_re;
int re_flags = rmp->regprog->re_flags;
char_u *pat = vim_strsave(((nfa_regprog_T *)rmp->regprog)->pattern);
p_re = BACKTRACKING_ENGINE;
vim_regfree(rmp->regprog);
report_re_switch(pat);
rmp->regprog = vim_regcomp(pat, re_flags);
if (rmp->regprog != NULL) {
result = rmp->regprog->engine->regexec_multi(rmp, win, buf, lnum, col,
tm);
}
free(pat);
p_re = save_p_re;
}
return result;
}

View File

@@ -30,6 +30,16 @@
*/
#define NFA_MAX_BRACES 20
// In the NFA engine: how many states are allowed.
#define NFA_MAX_STATES 100000
#define NFA_TOO_EXPENSIVE -1
// Which regexp engine to use? Needed for vim_regcomp().
// Must match with 'regexpengine'.
#define AUTOMATIC_ENGINE 0
#define BACKTRACKING_ENGINE 1
#define NFA_ENGINE 2
typedef struct regengine regengine_T;
/*
@@ -38,8 +48,10 @@ typedef struct regengine regengine_T;
* structures are used. See code below.
*/
typedef struct regprog {
regengine_T *engine;
regengine_T *engine;
unsigned regflags;
unsigned re_engine; ///< Automatic, backtracking or NFA engine.
unsigned re_flags; ///< Second argument for vim_regcomp().
} regprog_T;
/*
@@ -48,9 +60,11 @@ typedef struct regprog {
* See regexp.c for an explanation.
*/
typedef struct {
/* These two members implement regprog_T */
regengine_T *engine;
// These four members implement regprog_T.
regengine_T *engine;
unsigned regflags;
unsigned re_engine;
unsigned re_flags; ///< Second argument for vim_regcomp().
int regstart;
char_u reganch;
@@ -78,9 +92,11 @@ struct nfa_state {
* Structure used by the NFA matcher.
*/
typedef struct {
/* These two members implement regprog_T */
regengine_T *engine;
// These four members implement regprog_T.
regengine_T *engine;
unsigned regflags;
unsigned re_engine;
unsigned re_flags; ///< Second argument for vim_regcomp().
nfa_state_T *start; /* points into state[] */
@@ -91,9 +107,7 @@ typedef struct {
int has_zend; /* pattern contains \ze */
int has_backref; /* pattern contains \1 .. \9 */
int reghasz;
#ifdef DEBUG
char_u *pattern;
#endif
int nsubexp; /* number of () */
int nstate;
nfa_state_T state[1]; /* actually longer.. */
@@ -143,9 +157,7 @@ struct regengine {
int (*regexec_nl)(regmatch_T*, char_u*, colnr_T, bool);
long (*regexec_multi)(regmmatch_T*, win_T*, buf_T*, linenr_T, colnr_T,
proftime_T*);
#ifdef DEBUG
char_u *expr;
#endif
};
#endif // NVIM_REGEXP_DEFS_H

View File

@@ -2347,7 +2347,6 @@ static void nfa_set_code(int c)
}
#ifdef REGEXP_DEBUG
static FILE *log_fd;
/*
@@ -2468,7 +2467,6 @@ static void nfa_dump(nfa_regprog_T *prog)
}
}
#endif /* REGEXP_DEBUG */
#endif /* REGEXP_DEBUG */
/*
* Parse r.e. @expr and convert it into postfix form.
@@ -4908,6 +4906,12 @@ static int nfa_regmatch(nfa_regprog_T *prog, nfa_state_T *start, regsubs_T *subm
nextlist->n = 0; /* clear nextlist */
nextlist->has_pim = FALSE;
++nfa_listid;
if (prog->re_engine == AUTOMATIC_ENGINE && nfa_listid >= NFA_MAX_STATES) {
// Too many states, retry with old engine.
nfa_match = NFA_TOO_EXPENSIVE;
goto theend;
}
thislist->id = nfa_listid;
nextlist->id = nfa_listid + 1;
@@ -5082,6 +5086,10 @@ static int nfa_regmatch(nfa_regprog_T *prog, nfa_state_T *start, regsubs_T *subm
*/
result = recursive_regmatch(t->state, NULL, prog,
submatch, m, &listids);
if (result == NFA_TOO_EXPENSIVE) {
nfa_match = result;
goto theend;
}
/* for \@! and \@<! it is a match when the result is
* FALSE */
@@ -5180,6 +5188,10 @@ static int nfa_regmatch(nfa_regprog_T *prog, nfa_state_T *start, regsubs_T *subm
/* First try matching the pattern. */
result = recursive_regmatch(t->state, NULL, prog,
submatch, m, &listids);
if (result == NFA_TOO_EXPENSIVE) {
nfa_match = result;
goto theend;
}
if (result) {
int bytelen;
@@ -6019,6 +6031,7 @@ nextchar:
log_fd = NULL;
#endif
theend:
/* Free memory */
free(list[0].t);
free(list[1].t);
@@ -6068,8 +6081,12 @@ static long nfa_regtry(nfa_regprog_T *prog, colnr_T col)
clear_sub(&subs.synt);
clear_sub(&m.synt);
if (nfa_regmatch(prog, start, &subs, &m) == FALSE)
int result = nfa_regmatch(prog, start, &subs, &m);
if (result == FALSE) {
return 0;
} else if (result == NFA_TOO_EXPENSIVE) {
return result;
}
cleanup_subexpr();
if (REG_MULTI) {
@@ -6186,9 +6203,7 @@ nfa_regexec_both (
nfa_nsubexpr = prog->nsubexp;
nfa_listid = 1;
nfa_alt_listid = 2;
#ifdef REGEXP_DEBUG
nfa_regengine.expr = prog->pattern;
#endif
if (prog->reganch && col > 0)
return 0L;
@@ -6228,9 +6243,7 @@ nfa_regexec_both (
retval = nfa_regtry(prog, col);
#ifdef REGEXP_DEBUG
nfa_regengine.expr = NULL;
#endif
theend:
return retval;
@@ -6248,9 +6261,7 @@ static regprog_T *nfa_regcomp(char_u *expr, int re_flags)
if (expr == NULL)
return NULL;
#ifdef REGEXP_DEBUG
nfa_regengine.expr = expr;
#endif
init_class_tab();
@@ -6325,10 +6336,8 @@ static regprog_T *nfa_regcomp(char_u *expr, int re_flags)
#endif
/* Remember whether this pattern has any \z specials in it. */
prog->reghasz = re_has_z;
#ifdef REGEXP_DEBUG
prog->pattern = vim_strsave(expr);
nfa_regengine.expr = NULL;
#endif
out:
free(post_start);
@@ -6342,9 +6351,7 @@ fail:
#ifdef REGEXP_DEBUG
nfa_postfix_dump(expr, FAIL);
#endif
#ifdef REGEXP_DEBUG
nfa_regengine.expr = NULL;
#endif
goto out;
}
@@ -6355,9 +6362,7 @@ static void nfa_regfree(regprog_T *prog)
{
if (prog != NULL) {
free(((nfa_regprog_T *)prog)->match_text);
#ifdef REGEXP_DEBUG
free(((nfa_regprog_T *)prog)->pattern);
#endif
free(prog);
}
}

View File

@@ -243,7 +243,7 @@ static int included_patches[] = {
500,
499,
//498 NA
//497,
497,
//496 NA
//495 NA
494,