vim-patch:8.0.0020

Problem:    The regexp engines are not reentrant.
Solution:   Add regexec_T and save/restore the state when needed.

6100d02aab
This commit is contained in:
Jurica Bradaric
2017-07-25 14:18:08 +02:00
committed by James McCoy
parent dc3c06e73d
commit fe0bcc0800
6 changed files with 674 additions and 558 deletions

File diff suppressed because it is too large Load Diff

View File

@@ -4882,7 +4882,7 @@ static long find_match_text(colnr_T startcol, int regstart, char_u *match_text)
int c2_len = PTR2LEN(s2);
int c2 = PTR2CHAR(s2);
if ((c1 != c2 && (!ireg_ic || mb_tolower(c1) != mb_tolower(c2)))
if ((c1 != c2 && (!rex.reg_ic || mb_tolower(c1) != mb_tolower(c2)))
|| c1_len != c2_len) {
match = false;
break;
@@ -4895,13 +4895,13 @@ static long find_match_text(colnr_T startcol, int regstart, char_u *match_text)
&& !(enc_utf8 && utf_iscomposing(PTR2CHAR(s2)))) {
cleanup_subexpr();
if (REG_MULTI) {
reg_startpos[0].lnum = reglnum;
reg_startpos[0].col = col;
reg_endpos[0].lnum = reglnum;
reg_endpos[0].col = s2 - regline;
rex.reg_startpos[0].lnum = reglnum;
rex.reg_startpos[0].col = col;
rex.reg_endpos[0].lnum = reglnum;
rex.reg_endpos[0].col = s2 - regline;
} else {
reg_startp[0] = regline + col;
reg_endp[0] = s2;
rex.reg_startp[0] = regline + col;
rex.reg_endp[0] = s2;
}
return 1L;
}
@@ -5116,8 +5116,8 @@ static int nfa_regmatch(nfa_regprog_T *prog, nfa_state_T *start,
case NFA_MATCH:
{
// If the match ends before a composing characters and
// ireg_icombine is not set, that is not really a match.
if (enc_utf8 && !ireg_icombine && utf_iscomposing(curc)) {
// rex.reg_icombine is not set, that is not really a match.
if (enc_utf8 && !rex.reg_icombine && utf_iscomposing(curc)) {
break;
}
nfa_match = true;
@@ -5400,15 +5400,15 @@ static int nfa_regmatch(nfa_regprog_T *prog, nfa_state_T *start,
int this_class;
// Get class of current and previous char (if it exists).
this_class = mb_get_class_tab(reginput, reg_buf->b_chartab);
this_class = mb_get_class_tab(reginput, rex.reg_buf->b_chartab);
if (this_class <= 1) {
result = false;
} else if (reg_prev_class() == this_class) {
result = false;
}
} else if (!vim_iswordc_buf(curc, reg_buf)
} else if (!vim_iswordc_buf(curc, rex.reg_buf)
|| (reginput > regline
&& vim_iswordc_buf(reginput[-1], reg_buf))) {
&& vim_iswordc_buf(reginput[-1], rex.reg_buf))) {
result = false;
}
if (result) {
@@ -5425,15 +5425,15 @@ static int nfa_regmatch(nfa_regprog_T *prog, nfa_state_T *start,
int this_class, prev_class;
// Get class of current and previous char (if it exists).
this_class = mb_get_class_tab(reginput, reg_buf->b_chartab);
this_class = mb_get_class_tab(reginput, rex.reg_buf->b_chartab);
prev_class = reg_prev_class();
if (this_class == prev_class
|| prev_class == 0 || prev_class == 1) {
result = false;
}
} else if (!vim_iswordc_buf(reginput[-1], reg_buf)
} else if (!vim_iswordc_buf(reginput[-1], rex.reg_buf)
|| (reginput[0] != NUL
&& vim_iswordc_buf(curc, reg_buf))) {
&& vim_iswordc_buf(curc, rex.reg_buf))) {
result = false;
}
if (result) {
@@ -5444,14 +5444,14 @@ static int nfa_regmatch(nfa_regprog_T *prog, nfa_state_T *start,
case NFA_BOF:
if (reglnum == 0 && reginput == regline
&& (!REG_MULTI || reg_firstlnum == 1)) {
&& (!REG_MULTI || rex.reg_firstlnum == 1)) {
add_here = true;
add_state = t->state->out;
}
break;
case NFA_EOF:
if (reglnum == reg_maxline && curc == NUL) {
if (reglnum == rex.reg_maxline && curc == NUL) {
add_here = true;
add_state = t->state->out;
}
@@ -5475,7 +5475,7 @@ static int nfa_regmatch(nfa_regprog_T *prog, nfa_state_T *start,
// (no preceding character).
len += mb_char2len(mc);
}
if (ireg_icombine && len == 0) {
if (rex.reg_icombine && len == 0) {
// If \Z was present, then ignore composing characters.
// When ignoring the base character this always matches.
if (sta->c != curc) {
@@ -5526,14 +5526,14 @@ static int nfa_regmatch(nfa_regprog_T *prog, nfa_state_T *start,
}
case NFA_NEWL:
if (curc == NUL && !reg_line_lbr && REG_MULTI
&& reglnum <= reg_maxline) {
if (curc == NUL && !rex.reg_line_lbr && REG_MULTI
&& reglnum <= rex.reg_maxline) {
go_to_nextline = true;
// Pass -1 for the offset, which means taking the position
// at the start of the next line.
add_state = t->state->out;
add_off = -1;
} else if (curc == '\n' && reg_line_lbr) {
} else if (curc == '\n' && rex.reg_line_lbr) {
// match \n as if it is an ordinary character
add_state = t->state->out;
add_off = 1;
@@ -5574,7 +5574,7 @@ static int nfa_regmatch(nfa_regprog_T *prog, nfa_state_T *start,
result = result_if_matched;
break;
}
if (ireg_ic) {
if (rex.reg_ic) {
int curc_low = mb_tolower(curc);
int done = false;
@@ -5591,7 +5591,7 @@ static int nfa_regmatch(nfa_regprog_T *prog, nfa_state_T *start,
}
} else if (state->c < 0 ? check_char_class(state->c, curc)
: (curc == state->c
|| (ireg_ic && mb_tolower(curc)
|| (rex.reg_ic && mb_tolower(curc)
== mb_tolower(state->c)))) {
result = result_if_matched;
break;
@@ -5639,13 +5639,13 @@ static int nfa_regmatch(nfa_regprog_T *prog, nfa_state_T *start,
break;
case NFA_KWORD: // \k
result = vim_iswordp_buf(reginput, reg_buf);
result = vim_iswordp_buf(reginput, rex.reg_buf);
ADD_STATE_IF_MATCH(t->state);
break;
case NFA_SKWORD: // \K
result = !ascii_isdigit(curc)
&& vim_iswordp_buf(reginput, reg_buf);
&& vim_iswordp_buf(reginput, rex.reg_buf);
ADD_STATE_IF_MATCH(t->state);
break;
@@ -5760,24 +5760,24 @@ static int nfa_regmatch(nfa_regprog_T *prog, nfa_state_T *start,
break;
case NFA_LOWER_IC: // [a-z]
result = ri_lower(curc) || (ireg_ic && ri_upper(curc));
result = ri_lower(curc) || (rex.reg_ic && ri_upper(curc));
ADD_STATE_IF_MATCH(t->state);
break;
case NFA_NLOWER_IC: // [^a-z]
result = curc != NUL
&& !(ri_lower(curc) || (ireg_ic && ri_upper(curc)));
&& !(ri_lower(curc) || (rex.reg_ic && ri_upper(curc)));
ADD_STATE_IF_MATCH(t->state);
break;
case NFA_UPPER_IC: // [A-Z]
result = ri_upper(curc) || (ireg_ic && ri_lower(curc));
result = ri_upper(curc) || (rex.reg_ic && ri_lower(curc));
ADD_STATE_IF_MATCH(t->state);
break;
case NFA_NUPPER_IC: // [^A-Z]
result = curc != NUL
&& !(ri_upper(curc) || (ireg_ic && ri_lower(curc)));
&& !(ri_upper(curc) || (rex.reg_ic && ri_lower(curc)));
ADD_STATE_IF_MATCH(t->state);
break;
@@ -5851,13 +5851,15 @@ static int nfa_regmatch(nfa_regprog_T *prog, nfa_state_T *start,
case NFA_LNUM_GT:
case NFA_LNUM_LT:
assert(t->state->val >= 0
&& !((reg_firstlnum > 0 && reglnum > LONG_MAX - reg_firstlnum)
|| (reg_firstlnum <0 && reglnum < LONG_MIN + reg_firstlnum))
&& reglnum + reg_firstlnum >= 0);
&& !((rex.reg_firstlnum > 0
&& reglnum > LONG_MAX - rex.reg_firstlnum)
|| (rex.reg_firstlnum < 0
&& reglnum < LONG_MIN + rex.reg_firstlnum))
&& reglnum + rex.reg_firstlnum >= 0);
result = (REG_MULTI
&& nfa_re_num_cmp((uintmax_t)t->state->val,
t->state->c - NFA_LNUM,
(uintmax_t)(reglnum + reg_firstlnum)));
(uintmax_t)(reglnum + rex.reg_firstlnum)));
if (result) {
add_here = true;
add_state = t->state->out;
@@ -5893,7 +5895,7 @@ static int nfa_regmatch(nfa_regprog_T *prog, nfa_state_T *start,
}
result = false;
win_T *wp = reg_win == NULL ? curwin : reg_win;
win_T *wp = rex.reg_win == NULL ? curwin : rex.reg_win;
if (op == 1 && col - 1 > t->state->val && col > 100) {
long ts = wp->w_buffer->b_p_ts;
@@ -5920,18 +5922,18 @@ static int nfa_regmatch(nfa_regprog_T *prog, nfa_state_T *start,
case NFA_MARK_GT:
case NFA_MARK_LT:
{
pos_T *pos = getmark_buf(reg_buf, t->state->val, FALSE);
pos_T *pos = getmark_buf(rex.reg_buf, t->state->val, false);
// Compare the mark position to the match position.
result = (pos != NULL // mark doesn't exist
&& pos->lnum > 0 // mark isn't set in reg_buf
&& (pos->lnum == reglnum + reg_firstlnum
&& (pos->lnum == reglnum + rex.reg_firstlnum
? (pos->col == (colnr_T)(reginput - regline)
? t->state->c == NFA_MARK
: (pos->col < (colnr_T)(reginput - regline)
? t->state->c == NFA_MARK_GT
: t->state->c == NFA_MARK_LT))
: (pos->lnum < reglnum + reg_firstlnum
: (pos->lnum < reglnum + rex.reg_firstlnum
? t->state->c == NFA_MARK_GT
: t->state->c == NFA_MARK_LT)));
if (result) {
@@ -5942,10 +5944,10 @@ static int nfa_regmatch(nfa_regprog_T *prog, nfa_state_T *start,
}
case NFA_CURSOR:
result = (reg_win != NULL
&& (reglnum + reg_firstlnum == reg_win->w_cursor.lnum)
result = (rex.reg_win != NULL
&& (reglnum + rex.reg_firstlnum == rex.reg_win->w_cursor.lnum)
&& ((colnr_T)(reginput - regline)
== reg_win->w_cursor.col));
== rex.reg_win->w_cursor.col));
if (result) {
add_here = true;
add_state = t->state->out;
@@ -5995,13 +5997,13 @@ static int nfa_regmatch(nfa_regprog_T *prog, nfa_state_T *start,
#endif
result = (c == curc);
if (!result && ireg_ic) {
if (!result && rex.reg_ic) {
result = mb_tolower(c) == mb_tolower(curc);
}
// If ireg_icombine is not set only skip over the character
// If rex.reg_icombine is not set only skip over the character
// itself. When it is set skip over composing characters.
if (result && enc_utf8 && !ireg_icombine) {
if (result && enc_utf8 && !rex.reg_icombine) {
clen = utf_ptr2len(reginput);
}
@@ -6109,8 +6111,8 @@ static int nfa_regmatch(nfa_regprog_T *prog, nfa_state_T *start,
&& ((toplevel
&& reglnum == 0
&& clen != 0
&& (ireg_maxcol == 0
|| (colnr_T)(reginput - regline) < ireg_maxcol))
&& (rex.reg_maxcol == 0
|| (colnr_T)(reginput - regline) < rex.reg_maxcol))
|| (nfa_endp != NULL
&& (REG_MULTI
? (reglnum < nfa_endp->se_u.pos.lnum
@@ -6145,7 +6147,7 @@ static int nfa_regmatch(nfa_regprog_T *prog, nfa_state_T *start,
// Checking if the required start character matches is
// cheaper than adding a state that won't match.
c = PTR2CHAR(reginput + clen);
if (c != prog->regstart && (!ireg_ic || mb_tolower(c)
if (c != prog->regstart && (!rex.reg_ic || mb_tolower(c)
!= mb_tolower(prog->regstart))) {
#ifdef REGEXP_DEBUG
fprintf(log_fd,
@@ -6271,34 +6273,37 @@ static long nfa_regtry(nfa_regprog_T *prog, colnr_T col, proftime_T *tm)
cleanup_subexpr();
if (REG_MULTI) {
for (i = 0; i < subs.norm.in_use; i++) {
reg_startpos[i].lnum = subs.norm.list.multi[i].start_lnum;
reg_startpos[i].col = subs.norm.list.multi[i].start_col;
rex.reg_startpos[i].lnum = subs.norm.list.multi[i].start_lnum;
rex.reg_startpos[i].col = subs.norm.list.multi[i].start_col;
reg_endpos[i].lnum = subs.norm.list.multi[i].end_lnum;
reg_endpos[i].col = subs.norm.list.multi[i].end_col;
rex.reg_endpos[i].lnum = subs.norm.list.multi[i].end_lnum;
rex.reg_endpos[i].col = subs.norm.list.multi[i].end_col;
}
if (reg_startpos[0].lnum < 0) {
reg_startpos[0].lnum = 0;
reg_startpos[0].col = col;
if (rex.reg_startpos[0].lnum < 0) {
rex.reg_startpos[0].lnum = 0;
rex.reg_startpos[0].col = col;
}
if (rex.reg_endpos[0].lnum < 0) {
// pattern has a \ze but it didn't match, use current end
rex.reg_endpos[0].lnum = reglnum;
rex.reg_endpos[0].col = (int)(reginput - regline);
} else {
// Use line number of "\ze".
reglnum = rex.reg_endpos[0].lnum;
}
if (reg_endpos[0].lnum < 0) {
/* pattern has a \ze but it didn't match, use current end */
reg_endpos[0].lnum = reglnum;
reg_endpos[0].col = (int)(reginput - regline);
} else
/* Use line number of "\ze". */
reglnum = reg_endpos[0].lnum;
} else {
for (i = 0; i < subs.norm.in_use; i++) {
reg_startp[i] = subs.norm.list.line[i].start;
reg_endp[i] = subs.norm.list.line[i].end;
rex.reg_startp[i] = subs.norm.list.line[i].start;
rex.reg_endp[i] = subs.norm.list.line[i].end;
}
if (reg_startp[0] == NULL)
reg_startp[0] = regline + col;
if (reg_endp[0] == NULL)
reg_endp[0] = reginput;
if (rex.reg_startp[0] == NULL) {
rex.reg_startp[0] = regline + col;
}
if (rex.reg_endp[0] == NULL) {
rex.reg_endp[0] = reginput;
}
}
/* Package any found \z(...\) matches for export. Default is none. */
@@ -6352,14 +6357,14 @@ static long nfa_regexec_both(char_u *line, colnr_T startcol, proftime_T *tm)
colnr_T col = startcol;
if (REG_MULTI) {
prog = (nfa_regprog_T *)reg_mmatch->regprog;
line = reg_getline((linenr_T)0); /* relative to the cursor */
reg_startpos = reg_mmatch->startpos;
reg_endpos = reg_mmatch->endpos;
prog = (nfa_regprog_T *)rex.reg_mmatch->regprog;
line = reg_getline((linenr_T)0); // relative to the cursor
rex.reg_startpos = rex.reg_mmatch->startpos;
rex.reg_endpos = rex.reg_mmatch->endpos;
} else {
prog = (nfa_regprog_T *)reg_match->regprog;
reg_startp = reg_match->startp;
reg_endp = reg_match->endp;
prog = (nfa_regprog_T *)rex.reg_match->regprog;
rex.reg_startp = rex.reg_match->startp;
rex.reg_endp = rex.reg_match->endp;
}
/* Be paranoid... */
@@ -6368,15 +6373,17 @@ static long nfa_regexec_both(char_u *line, colnr_T startcol, proftime_T *tm)
goto theend;
}
/* If pattern contains "\c" or "\C": overrule value of ireg_ic */
if (prog->regflags & RF_ICASE)
ireg_ic = TRUE;
else if (prog->regflags & RF_NOICASE)
ireg_ic = FALSE;
// If pattern contains "\c" or "\C": overrule value of rex.reg_ic
if (prog->regflags & RF_ICASE) {
rex.reg_ic = true;
} else if (prog->regflags & RF_NOICASE) {
rex.reg_ic = false;
}
/* If pattern contains "\Z" overrule value of ireg_icombine */
if (prog->regflags & RF_ICOMBINE)
ireg_icombine = TRUE;
// If pattern contains "\Z" overrule value of rex.reg_icombine
if (prog->regflags & RF_ICOMBINE) {
rex.reg_icombine = true;
}
regline = line;
reglnum = 0; /* relative to line */
@@ -6405,17 +6412,17 @@ static long nfa_regexec_both(char_u *line, colnr_T startcol, proftime_T *tm)
if (skip_to_start(prog->regstart, &col) == FAIL)
return 0L;
/* If match_text is set it contains the full text that must match.
* Nothing else to try. Doesn't handle combining chars well. */
if (prog->match_text != NULL
&& !ireg_icombine
)
// If match_text is set it contains the full text that must match.
// Nothing else to try. Doesn't handle combining chars well.
if (prog->match_text != NULL && !rex.reg_icombine) {
return find_match_text(col, prog->regstart, prog->match_text);
}
}
/* If the start column is past the maximum column: no need to try. */
if (ireg_maxcol > 0 && col >= ireg_maxcol)
// If the start column is past the maximum column: no need to try.
if (rex.reg_maxcol > 0 && col >= rex.reg_maxcol) {
goto theend;
}
nstate = prog->nstate;
for (i = 0; i < nstate; ++i) {
@@ -6567,15 +6574,15 @@ nfa_regexec_nl (
bool line_lbr
)
{
reg_match = rmp;
reg_mmatch = NULL;
reg_maxline = 0;
reg_line_lbr = line_lbr;
reg_buf = curbuf;
reg_win = NULL;
ireg_ic = rmp->rm_ic;
ireg_icombine = FALSE;
ireg_maxcol = 0;
rex.reg_match = rmp;
rex.reg_mmatch = NULL;
rex.reg_maxline = 0;
rex.reg_line_lbr = line_lbr;
rex.reg_buf = curbuf;
rex.reg_win = NULL;
rex.reg_ic = rmp->rm_ic;
rex.reg_icombine = false;
rex.reg_maxcol = 0;
return nfa_regexec_both(line, col, NULL);
}
@@ -6616,16 +6623,16 @@ nfa_regexec_nl (
static long nfa_regexec_multi(regmmatch_T *rmp, win_T *win, buf_T *buf,
linenr_T lnum, colnr_T col, proftime_T *tm)
{
reg_match = NULL;
reg_mmatch = rmp;
reg_buf = buf;
reg_win = win;
reg_firstlnum = lnum;
reg_maxline = reg_buf->b_ml.ml_line_count - lnum;
reg_line_lbr = FALSE;
ireg_ic = rmp->rmm_ic;
ireg_icombine = FALSE;
ireg_maxcol = rmp->rmm_maxcol;
rex.reg_match = NULL;
rex.reg_mmatch = rmp;
rex.reg_buf = buf;
rex.reg_win = win;
rex.reg_firstlnum = lnum;
rex.reg_maxline = rex.reg_buf->b_ml.ml_line_count - lnum;
rex.reg_line_lbr = false;
rex.reg_ic = rmp->rmm_ic;
rex.reg_icombine = false;
rex.reg_maxcol = rmp->rmm_maxcol;
return nfa_regexec_both(NULL, col, tm);
}

View File

@@ -384,9 +384,10 @@ func Test_substitute_expr()
\ {-> submatch(2) . submatch(3) . submatch(1)}, ''))
func Recurse()
return substitute('yyy', 'y*', {-> g:val}, '')
return substitute('yyy', 'y\(.\)y', {-> submatch(1)}, '')
endfunc
call assert_equal('--', substitute('xxx', 'x*', {-> '-' . Recurse() . '-'}, ''))
" recursive call works
call assert_equal('-y-x-', substitute('xxx', 'x\(.\)x', {-> '-' . Recurse() . '-' . submatch(1) . '-'}, ''))
endfunc
func Test_invalid_submatch()

View File

@@ -709,7 +709,7 @@ static const int included_patches[] = {
23,
// 22 NA
// 21,
// 20,
20,
19,
// 18,
17,