vim-patch:9.1.1645: fuzzy.c can be further improved (#35371)

Problem:  fuzzy.c can be further improved
Solution: Fix memory leak and refactor it (glepnir).

Optimize performance and memory allocation:

- Fix memory leak in fuzzy_match_in_list.
- using single memory allocation in match_positions
- Improve has_match performance and add null pointer checks

closes: vim/vim#18012

59799f3afa

Co-authored-by: glepnir <glephunter@gmail.com>
This commit is contained in:
zeertzjq
2025-08-18 11:03:08 +08:00
committed by GitHub
parent e4d3812c8b
commit 37119ad0d2

View File

@@ -212,6 +212,7 @@ static void fuzzy_match_in_list(list_T *const l, char *const str, const bool mat
char *itemstr = NULL; char *itemstr = NULL;
bool itemstr_allocate = false; bool itemstr_allocate = false;
typval_T rettv; typval_T rettv;
rettv.v_type = VAR_UNKNOWN; rettv.v_type = VAR_UNKNOWN;
const typval_T *const tv = TV_LIST_ITEM_TV(li); const typval_T *const tv = TV_LIST_ITEM_TV(li);
if (tv->v_type == VAR_STRING) { // list of strings if (tv->v_type == VAR_STRING) { // list of strings
@@ -243,28 +244,33 @@ static void fuzzy_match_in_list(list_T *const l, char *const str, const bool mat
int score; int score;
if (itemstr != NULL if (itemstr != NULL
&& fuzzy_match(itemstr, str, matchseq, &score, matches, FUZZY_MATCH_MAX_LEN)) { && fuzzy_match(itemstr, str, matchseq, &score, matches, FUZZY_MATCH_MAX_LEN)) {
items[match_count].idx = (int)match_count; char *itemstr_copy = itemstr_allocate ? xstrdup(itemstr) : itemstr;
items[match_count].item = li; list_T *match_positions = NULL;
items[match_count].score = score;
items[match_count].pat = str;
items[match_count].startpos = (int)matches[0];
items[match_count].itemstr = itemstr_allocate ? xstrdup(itemstr) : itemstr;
items[match_count].itemstr_allocated = itemstr_allocate;
// Copy the list of matching positions in itemstr to a list, if // Copy the list of matching positions in itemstr to a list, if
// "retmatchpos" is set. // "retmatchpos" is set.
if (retmatchpos) { if (retmatchpos) {
items[match_count].lmatchpos = tv_list_alloc(kListLenMayKnow); match_positions = tv_list_alloc(kListLenMayKnow);
// Fill position information
int j = 0; int j = 0;
const char *p = str; const char *p = str;
while (*p != NUL && j < FUZZY_MATCH_MAX_LEN) { while (*p != NUL && j < FUZZY_MATCH_MAX_LEN) {
if (!ascii_iswhite(utf_ptr2char(p)) || matchseq) { if (!ascii_iswhite(utf_ptr2char(p)) || matchseq) {
tv_list_append_number(items[match_count].lmatchpos, matches[j]); tv_list_append_number(match_positions, matches[j]);
j++; j++;
} }
MB_PTR_ADV(p); MB_PTR_ADV(p);
} }
} }
items[match_count].idx = match_count;
items[match_count].item = li;
items[match_count].score = score;
items[match_count].pat = str;
items[match_count].startpos = (int)matches[0];
items[match_count].itemstr = itemstr_copy;
items[match_count].itemstr_allocated = itemstr_allocate;
items[match_count].lmatchpos = match_positions;
match_count++; match_count++;
} }
tv_clear(&rettv); tv_clear(&rettv);
@@ -724,31 +730,37 @@ theend:
#define SCORE_MATCH_CAPITAL 0.7 #define SCORE_MATCH_CAPITAL 0.7
#define SCORE_MATCH_DOT 0.6 #define SCORE_MATCH_DOT 0.6
static int has_match(const char *needle, const char *haystack) static int has_match(const char *const needle, const char *const haystack)
{ {
while (*needle != NUL) { if (!needle || !haystack || !*needle) {
const int n_char = utf_ptr2char(needle); return FAIL;
const char *p = haystack; }
bool matched = false;
while (*p != NUL) { const char *n_ptr = needle;
const int h_char = utf_ptr2char(p); const char *h_ptr = haystack;
while (*n_ptr) {
const int n_char = utf_ptr2char(n_ptr);
bool found = false;
while (*h_ptr) {
const int h_char = utf_ptr2char(h_ptr);
if (n_char == h_char || mb_toupper(n_char) == h_char) { if (n_char == h_char || mb_toupper(n_char) == h_char) {
matched = true; found = true;
h_ptr += utfc_ptr2len(h_ptr);
break; break;
} }
p += utfc_ptr2len(p); h_ptr += utfc_ptr2len(h_ptr);
} }
if (!matched) { if (!found) {
return 0; return FAIL;
} }
needle += utfc_ptr2len(needle); n_ptr += utfc_ptr2len(n_ptr);
haystack = p + utfc_ptr2len(p);
} }
return 1;
return OK;
} }
struct match_struct { struct match_struct {
@@ -851,7 +863,7 @@ static inline void match_row(const match_struct *match, int row, score_t *curr_D
static score_t match_positions(const char *const needle, const char *const haystack, static score_t match_positions(const char *const needle, const char *const haystack,
uint32_t *const positions) uint32_t *const positions)
{ {
if (!*needle) { if (!needle || !haystack || !*needle) {
return (score_t)SCORE_MIN; return (score_t)SCORE_MIN;
} }
@@ -878,10 +890,19 @@ static score_t match_positions(const char *const needle, const char *const hayst
return (score_t)SCORE_MAX; return (score_t)SCORE_MAX;
} }
// ensure n * MATCH_MAX_LEN * 2 won't overflow
if ((size_t)n > (SIZE_MAX / sizeof(score_t)) / MATCH_MAX_LEN / 2) {
return (score_t)SCORE_MIN;
}
// Allocate for both D and M matrices in one contiguous block
score_t *block = (score_t *)xmalloc(sizeof(score_t) * MATCH_MAX_LEN * (size_t)n * 2);
// D[][] Stores the best score for this position ending with a match. // D[][] Stores the best score for this position ending with a match.
// M[][] Stores the best possible score at this position. // M[][] Stores the best possible score at this position.
score_t(*D)[MATCH_MAX_LEN] = xmalloc(sizeof(score_t) * MATCH_MAX_LEN * (size_t)n); score_t(*D)[MATCH_MAX_LEN] = (score_t(*)[MATCH_MAX_LEN])(block);
score_t(*M)[MATCH_MAX_LEN] = xmalloc(sizeof(score_t) * MATCH_MAX_LEN * (size_t)n); score_t(*M)[MATCH_MAX_LEN] = (score_t(*)[MATCH_MAX_LEN])(block
+ MATCH_MAX_LEN * (size_t)n);
match_row(&match, 0, D[0], M[0], D[0], M[0]); match_row(&match, 0, D[0], M[0], D[0], M[0]);
for (int i = 1; i < n; i++) { for (int i = 1; i < n; i++) {
@@ -915,8 +936,6 @@ static score_t match_positions(const char *const needle, const char *const hayst
score_t result = M[n - 1][m - 1]; score_t result = M[n - 1][m - 1];
xfree(M); xfree(block);
xfree(D);
return result; return result;
} }