mirror of
https://github.com/neovim/neovim.git
synced 2025-09-28 22:18:33 +00:00
vim-patch:9.1.0297: Patch 9.1.0296 causes too many issues (#28263)
Problem: Patch 9.1.0296 causes too many issues
(Tony Mechelynck, chdiza, CI)
Solution: Back out the change for now
Revert "patch 9.1.0296: regexp: engines do not handle case-folding well"
This reverts commit 7a27c108e0509f3255ebdcb6558e896c223e4d23 it causes
issues with syntax highlighting and breaks the FreeBSD and MacOS CI. It
needs more work.
fixes: vim/vim#14487
c97f4d61cd
Co-authored-by: Christian Brabandt <cb@256bit.org>
This commit is contained in:
@@ -204,7 +204,6 @@ information.
|
|||||||
mb_ptr2char utf_ptr2char
|
mb_ptr2char utf_ptr2char
|
||||||
mb_head_off utf_head_off
|
mb_head_off utf_head_off
|
||||||
mb_tail_off utf_cp_bounds
|
mb_tail_off utf_cp_bounds
|
||||||
mb_strnicmp2 utf_strnicmp
|
|
||||||
mb_lefthalve grid_lefthalve
|
mb_lefthalve grid_lefthalve
|
||||||
mb_fix_col grid_fix_col
|
mb_fix_col grid_fix_col
|
||||||
utf_off2cells grid_off2cells
|
utf_off2cells grid_off2cells
|
||||||
|
@@ -1387,7 +1387,7 @@ bool mb_isalpha(int a)
|
|||||||
return mb_islower(a) || mb_isupper(a);
|
return mb_islower(a) || mb_isupper(a);
|
||||||
}
|
}
|
||||||
|
|
||||||
int utf_strnicmp(const char *s1, const char *s2, size_t n1, size_t n2)
|
static int utf_strnicmp(const char *s1, const char *s2, size_t n1, size_t n2)
|
||||||
{
|
{
|
||||||
int c1, c2;
|
int c1, c2;
|
||||||
char buffer[6];
|
char buffer[6];
|
||||||
|
@@ -1627,9 +1627,7 @@ static void mb_decompose(int c, int *c1, int *c2, int *c3)
|
|||||||
|
|
||||||
/// Compare two strings, ignore case if rex.reg_ic set.
|
/// Compare two strings, ignore case if rex.reg_ic set.
|
||||||
/// Return 0 if strings match, non-zero otherwise.
|
/// Return 0 if strings match, non-zero otherwise.
|
||||||
/// Correct the length "*n" when composing characters are ignored
|
/// Correct the length "*n" when composing characters are ignored.
|
||||||
/// or for utf8 when both utf codepoints are considered equal because of
|
|
||||||
/// case-folding but have different length (e.g. 's' and 'ſ')
|
|
||||||
static int cstrncmp(char *s1, char *s2, int *n)
|
static int cstrncmp(char *s1, char *s2, int *n)
|
||||||
{
|
{
|
||||||
int result;
|
int result;
|
||||||
@@ -1637,11 +1635,8 @@ static int cstrncmp(char *s1, char *s2, int *n)
|
|||||||
if (!rex.reg_ic) {
|
if (!rex.reg_ic) {
|
||||||
result = strncmp(s1, s2, (size_t)(*n));
|
result = strncmp(s1, s2, (size_t)(*n));
|
||||||
} else {
|
} else {
|
||||||
int l2 = utfc_ptr2len(s2);
|
assert(*n >= 0);
|
||||||
result = utf_strnicmp(s1, s2, (size_t)(*n), (size_t)l2);
|
result = mb_strnicmp(s1, s2, (size_t)(*n));
|
||||||
if (result == 0 && l2 < *n) {
|
|
||||||
*n = l2;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// if it failed and it's utf8 and we want to combineignore:
|
// if it failed and it's utf8 and we want to combineignore:
|
||||||
@@ -6495,9 +6490,11 @@ static bool regmatch(uint8_t *scan, const proftime_T *tm, int *timed_out)
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
if (cstrncmp((char *)opnd, (char *)rex.input, &len) != 0) {
|
for (i = 0; i < len; i++) {
|
||||||
status = RA_NOMATCH;
|
if (opnd[i] != rex.input[i]) {
|
||||||
break;
|
status = RA_NOMATCH;
|
||||||
|
break;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
rex.input += len;
|
rex.input += len;
|
||||||
@@ -13849,25 +13846,19 @@ static int skip_to_start(int c, colnr_T *colp)
|
|||||||
static int find_match_text(colnr_T *startcol, int regstart, uint8_t *match_text)
|
static int find_match_text(colnr_T *startcol, int regstart, uint8_t *match_text)
|
||||||
{
|
{
|
||||||
colnr_T col = *startcol;
|
colnr_T col = *startcol;
|
||||||
const int regstart_len = utf_char2len(regstart);
|
const int regstart_len = utf_ptr2len((char *)rex.line + col);
|
||||||
|
|
||||||
while (true) {
|
while (true) {
|
||||||
bool match = true;
|
bool match = true;
|
||||||
uint8_t *s1 = match_text;
|
uint8_t *s1 = match_text;
|
||||||
// skip regstart
|
uint8_t *s2 = rex.line + col + regstart_len; // skip regstart
|
||||||
uint8_t *s2 = rex.line + col + regstart_len;
|
|
||||||
if (regstart_len > 1
|
|
||||||
&& utf_char2len(utf_ptr2char((char *)rex.line + col)) != regstart_len) {
|
|
||||||
// because of case-folding of the previously matched text, we may need
|
|
||||||
// to skip fewer bytes than utf_char2len(regstart)
|
|
||||||
s2 = rex.line + col + utf_char2len(utf_fold(regstart));
|
|
||||||
}
|
|
||||||
while (*s1) {
|
while (*s1) {
|
||||||
int c1_len = utf_ptr2len((char *)s1);
|
int c1_len = utf_ptr2len((char *)s1);
|
||||||
int c1 = utf_ptr2char((char *)s1);
|
int c1 = utf_ptr2char((char *)s1);
|
||||||
int c2_len = utf_ptr2len((char *)s2);
|
int c2_len = utf_ptr2len((char *)s2);
|
||||||
int c2 = utf_ptr2char((char *)s2);
|
int c2 = utf_ptr2char((char *)s2);
|
||||||
if (c1 != c2 && (!rex.reg_ic || utf_fold(c1) != utf_fold(c2))) {
|
if ((c1 != c2 && (!rex.reg_ic || utf_fold(c1) != utf_fold(c2)))
|
||||||
|
|| c1_len != c2_len) {
|
||||||
match = false;
|
match = false;
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
@@ -15531,7 +15522,7 @@ static int nfa_regexec_both(uint8_t *line, colnr_T startcol, proftime_T *tm, int
|
|||||||
|
|
||||||
// If match_text is set it contains the full text that must match.
|
// If match_text is set it contains the full text that must match.
|
||||||
// Nothing else to try. Doesn't handle combining chars well.
|
// Nothing else to try. Doesn't handle combining chars well.
|
||||||
if (prog->match_text != NULL && *prog->match_text != NUL && !rex.reg_icombine) {
|
if (prog->match_text != NULL && !rex.reg_icombine) {
|
||||||
retval = find_match_text(&col, prog->regstart, prog->match_text);
|
retval = find_match_text(&col, prog->regstart, prog->match_text);
|
||||||
if (REG_MULTI) {
|
if (REG_MULTI) {
|
||||||
rex.reg_mmatch->rmm_matchcol = col;
|
rex.reg_mmatch->rmm_matchcol = col;
|
||||||
|
@@ -611,32 +611,4 @@ func Test_combining_chars_in_collection()
|
|||||||
bw!
|
bw!
|
||||||
endfunc
|
endfunc
|
||||||
|
|
||||||
func Test_search_multibyte_match_ascii()
|
|
||||||
new
|
|
||||||
" Match single 'ſ' and 's'
|
|
||||||
call setline(1, 'das abc heraus abc ſich abc ſind')
|
|
||||||
for i in range(0, 2)
|
|
||||||
exe "set re="..i
|
|
||||||
let ic_match = matchbufline('%', '\c\%u17f', 1, '$')->mapnew({idx, val -> val.text})
|
|
||||||
let noic_match = matchbufline('%', '\C\%u17f', 1, '$')->mapnew({idx, val -> val.text})
|
|
||||||
call assert_equal(['s', 's', 'ſ','ſ'], ic_match, "Ignorecase Regex-engine: " .. &re)
|
|
||||||
call assert_equal(['ſ','ſ'], noic_match, "No-Ignorecase Regex-engine: " .. &re)
|
|
||||||
endfor
|
|
||||||
" Match several 'ſſ' and 'ss'
|
|
||||||
call setline(1, 'das abc herauss abc ſſich abc ſind')
|
|
||||||
for i in range(0, 2)
|
|
||||||
exe "set re="..i
|
|
||||||
let ic_match = matchbufline('%', '\c\%u17f\%u17f', 1, '$')->mapnew({idx, val -> val.text})
|
|
||||||
let noic_match = matchbufline('%', '\C\%u17f\%u17f', 1, '$')->mapnew({idx, val -> val.text})
|
|
||||||
let ic_match2 = matchbufline('%', '\c\%u17f\+', 1, '$')->mapnew({idx, val -> val.text})
|
|
||||||
let noic_match2 = matchbufline('%', '\C\%u17f\+', 1, '$')->mapnew({idx, val -> val.text})
|
|
||||||
|
|
||||||
call assert_equal(['ss', 'ſſ'], ic_match, "Ignorecase Regex-engine: " .. &re)
|
|
||||||
call assert_equal(['ſſ'], noic_match, "No-Ignorecase Regex-engine: " .. &re)
|
|
||||||
call assert_equal(['s', 'ss', 'ſſ', 'ſ'], ic_match2, "Ignorecase Regex-engine: " .. &re)
|
|
||||||
call assert_equal(['ſſ','ſ'], noic_match2, "No-Ignorecase Regex-engine: " .. &re)
|
|
||||||
endfor
|
|
||||||
bw!
|
|
||||||
endfunc
|
|
||||||
|
|
||||||
" vim: shiftwidth=2 sts=2 expandtab
|
" vim: shiftwidth=2 sts=2 expandtab
|
||||||
|
Reference in New Issue
Block a user