mirror of
https://github.com/neovim/neovim.git
synced 2025-10-06 18:06:30 +00:00

Problem: fuzzy.c can be further improved
Solution: Fix memory leak and refactor it (glepnir).
Optimize performance and memory allocation:
- Fix memory leak in fuzzy_match_in_list.
- using single memory allocation in match_positions
- Improve has_match performance and add null pointer checks
closes: vim/vim#18012
59799f3afa
Co-authored-by: glepnir <glephunter@gmail.com>
942 lines
28 KiB
C
942 lines
28 KiB
C
// fuzzy.c: fuzzy matching algorithm and related functions
|
|
//
|
|
// Portions of this file are adapted from fzy (https://github.com/jhawthorn/fzy)
|
|
// Original code:
|
|
// Copyright (c) 2014 John Hawthorn
|
|
// Licensed under the MIT License.
|
|
//
|
|
// Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
// of this software and associated documentation files (the "Software"), to deal
|
|
// in the Software without restriction, including without limitation the rights
|
|
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
// copies of the Software, and to permit persons to whom the Software is
|
|
// furnished to do so, subject to the following conditions:
|
|
//
|
|
// The above copyright notice and this permission notice shall be included in
|
|
// all copies or substantial portions of the Software.
|
|
//
|
|
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
|
// THE SOFTWARE.
|
|
|
|
#include <assert.h>
|
|
#include <limits.h>
|
|
#include <math.h>
|
|
#include <stdbool.h>
|
|
#include <stdint.h>
|
|
#include <stdlib.h>
|
|
#include <string.h>
|
|
|
|
#include "nvim/ascii_defs.h"
|
|
#include "nvim/charset.h"
|
|
#include "nvim/errors.h"
|
|
#include "nvim/eval.h"
|
|
#include "nvim/eval/typval.h"
|
|
#include "nvim/fuzzy.h"
|
|
#include "nvim/garray.h"
|
|
#include "nvim/garray_defs.h"
|
|
#include "nvim/globals.h"
|
|
#include "nvim/insexpand.h"
|
|
#include "nvim/macros_defs.h"
|
|
#include "nvim/mbyte.h"
|
|
#include "nvim/memline.h"
|
|
#include "nvim/memory.h"
|
|
#include "nvim/message.h"
|
|
|
|
typedef double score_t;
|
|
|
|
#define SCORE_MAX INFINITY
|
|
#define SCORE_MIN (-INFINITY)
|
|
#define SCORE_SCALE 1000
|
|
|
|
typedef struct {
|
|
int idx; ///< used for stable sort
|
|
listitem_T *item;
|
|
int score;
|
|
list_T *lmatchpos;
|
|
char *pat;
|
|
char *itemstr;
|
|
bool itemstr_allocated;
|
|
int startpos;
|
|
} fuzzyItem_T;
|
|
|
|
typedef struct match_struct match_struct;
|
|
|
|
#include "fuzzy.c.generated.h"
|
|
|
|
/// fuzzy_match()
|
|
///
|
|
/// @return true if "pat_arg" matches "str". Also returns the match score in
|
|
/// "outScore" and the matching character positions in "matches".
|
|
bool fuzzy_match(char *const str, const char *const pat_arg, const bool matchseq,
|
|
int *const outScore, uint32_t *const matches, const int maxMatches)
|
|
FUNC_ATTR_NONNULL_ALL
|
|
{
|
|
bool complete = false;
|
|
int numMatches = 0;
|
|
|
|
*outScore = 0;
|
|
|
|
char *const save_pat = xstrdup(pat_arg);
|
|
char *pat = save_pat;
|
|
char *p = pat;
|
|
|
|
// Try matching each word in "pat_arg" in "str"
|
|
while (true) {
|
|
if (matchseq) {
|
|
complete = true;
|
|
} else {
|
|
// Extract one word from the pattern (separated by space)
|
|
p = skipwhite(p);
|
|
if (*p == NUL) {
|
|
break;
|
|
}
|
|
pat = p;
|
|
while (*p != NUL && !ascii_iswhite(utf_ptr2char(p))) {
|
|
MB_PTR_ADV(p);
|
|
}
|
|
if (*p == NUL) { // processed all the words
|
|
complete = true;
|
|
}
|
|
*p = NUL;
|
|
}
|
|
|
|
int score = FUZZY_SCORE_NONE;
|
|
if (has_match(pat, str)) {
|
|
score_t fzy_score = match_positions(pat, str, matches + numMatches);
|
|
score = (fzy_score == (score_t)SCORE_MIN
|
|
? INT_MIN + 1
|
|
: (fzy_score == (score_t)SCORE_MAX
|
|
? INT_MAX
|
|
: (fzy_score < 0
|
|
? (int)ceil(fzy_score * SCORE_SCALE - 0.5)
|
|
: (int)floor(fzy_score * SCORE_SCALE + 0.5))));
|
|
}
|
|
|
|
if (score == FUZZY_SCORE_NONE) {
|
|
numMatches = 0;
|
|
*outScore = FUZZY_SCORE_NONE;
|
|
break;
|
|
}
|
|
|
|
if (score > 0 && *outScore > INT_MAX - score) {
|
|
*outScore = INT_MAX;
|
|
} else if (score < 0 && *outScore < INT_MIN + 1 - score) {
|
|
*outScore = INT_MIN + 1;
|
|
} else {
|
|
*outScore += score;
|
|
}
|
|
|
|
numMatches += mb_charlen(pat);
|
|
|
|
if (complete || numMatches >= maxMatches) {
|
|
break;
|
|
}
|
|
|
|
// try matching the next word
|
|
p++;
|
|
}
|
|
|
|
xfree(save_pat);
|
|
return numMatches != 0;
|
|
}
|
|
|
|
/// Sort the fuzzy matches in the descending order of the match score.
|
|
/// For items with same score, retain the order using the index (stable sort)
|
|
static int fuzzy_match_item_compare(const void *const s1, const void *const s2)
|
|
FUNC_ATTR_NONNULL_ALL FUNC_ATTR_WARN_UNUSED_RESULT FUNC_ATTR_PURE
|
|
{
|
|
const int v1 = ((const fuzzyItem_T *)s1)->score;
|
|
const int v2 = ((const fuzzyItem_T *)s2)->score;
|
|
|
|
if (v1 == v2) {
|
|
const char *const pat = ((const fuzzyItem_T *)s1)->pat;
|
|
const size_t patlen = strlen(pat);
|
|
int startpos = ((const fuzzyItem_T *)s1)->startpos;
|
|
const bool exact_match1 = startpos >= 0
|
|
&& strncmp(pat, ((fuzzyItem_T *)s1)->itemstr + startpos, patlen) == 0;
|
|
startpos = ((const fuzzyItem_T *)s2)->startpos;
|
|
const bool exact_match2 = startpos >= 0
|
|
&& strncmp(pat, ((fuzzyItem_T *)s2)->itemstr + startpos, patlen) == 0;
|
|
|
|
if (exact_match1 == exact_match2) {
|
|
const int idx1 = ((const fuzzyItem_T *)s1)->idx;
|
|
const int idx2 = ((const fuzzyItem_T *)s2)->idx;
|
|
return idx1 == idx2 ? 0 : idx1 > idx2 ? 1 : -1;
|
|
} else if (exact_match2) {
|
|
return 1;
|
|
}
|
|
return -1;
|
|
} else {
|
|
return v1 > v2 ? -1 : 1;
|
|
}
|
|
}
|
|
|
|
/// Fuzzy search the string "str" in a list of "items" and return the matching
|
|
/// strings in "fmatchlist".
|
|
/// If "matchseq" is true, then for multi-word search strings, match all the
|
|
/// words in sequence.
|
|
/// If "items" is a list of strings, then search for "str" in the list.
|
|
/// If "items" is a list of dicts, then either use "key" to lookup the string
|
|
/// for each item or use "item_cb" Funcref function to get the string.
|
|
/// If "retmatchpos" is true, then return a list of positions where "str"
|
|
/// matches for each item.
|
|
static void fuzzy_match_in_list(list_T *const l, char *const str, const bool matchseq,
|
|
const char *const key, Callback *const item_cb,
|
|
const bool retmatchpos, list_T *const fmatchlist,
|
|
const int max_matches)
|
|
FUNC_ATTR_NONNULL_ARG(2, 5, 7)
|
|
{
|
|
int len = tv_list_len(l);
|
|
if (len == 0) {
|
|
return;
|
|
}
|
|
if (max_matches > 0 && len > max_matches) {
|
|
len = max_matches;
|
|
}
|
|
|
|
fuzzyItem_T *const items = xcalloc((size_t)len, sizeof(fuzzyItem_T));
|
|
int match_count = 0;
|
|
uint32_t matches[FUZZY_MATCH_MAX_LEN];
|
|
|
|
// For all the string items in items, get the fuzzy matching score
|
|
TV_LIST_ITER(l, li, {
|
|
if (max_matches > 0 && match_count >= max_matches) {
|
|
break;
|
|
}
|
|
|
|
char *itemstr = NULL;
|
|
bool itemstr_allocate = false;
|
|
typval_T rettv;
|
|
|
|
rettv.v_type = VAR_UNKNOWN;
|
|
const typval_T *const tv = TV_LIST_ITEM_TV(li);
|
|
if (tv->v_type == VAR_STRING) { // list of strings
|
|
itemstr = tv->vval.v_string;
|
|
} else if (tv->v_type == VAR_DICT
|
|
&& (key != NULL || item_cb->type != kCallbackNone)) {
|
|
// For a dict, either use the specified key to lookup the string or
|
|
// use the specified callback function to get the string.
|
|
if (key != NULL) {
|
|
itemstr = tv_dict_get_string(tv->vval.v_dict, key, false);
|
|
} else {
|
|
typval_T argv[2];
|
|
|
|
// Invoke the supplied callback (if any) to get the dict item
|
|
tv->vval.v_dict->dv_refcount++;
|
|
argv[0].v_type = VAR_DICT;
|
|
argv[0].vval.v_dict = tv->vval.v_dict;
|
|
argv[1].v_type = VAR_UNKNOWN;
|
|
if (callback_call(item_cb, 1, argv, &rettv)) {
|
|
if (rettv.v_type == VAR_STRING) {
|
|
itemstr = rettv.vval.v_string;
|
|
itemstr_allocate = true;
|
|
}
|
|
}
|
|
tv_dict_unref(tv->vval.v_dict);
|
|
}
|
|
}
|
|
|
|
int score;
|
|
if (itemstr != NULL
|
|
&& fuzzy_match(itemstr, str, matchseq, &score, matches, FUZZY_MATCH_MAX_LEN)) {
|
|
char *itemstr_copy = itemstr_allocate ? xstrdup(itemstr) : itemstr;
|
|
list_T *match_positions = NULL;
|
|
|
|
// Copy the list of matching positions in itemstr to a list, if
|
|
// "retmatchpos" is set.
|
|
if (retmatchpos) {
|
|
match_positions = tv_list_alloc(kListLenMayKnow);
|
|
// Fill position information
|
|
int j = 0;
|
|
const char *p = str;
|
|
while (*p != NUL && j < FUZZY_MATCH_MAX_LEN) {
|
|
if (!ascii_iswhite(utf_ptr2char(p)) || matchseq) {
|
|
tv_list_append_number(match_positions, matches[j]);
|
|
j++;
|
|
}
|
|
MB_PTR_ADV(p);
|
|
}
|
|
}
|
|
items[match_count].idx = match_count;
|
|
items[match_count].item = li;
|
|
items[match_count].score = score;
|
|
items[match_count].pat = str;
|
|
items[match_count].startpos = (int)matches[0];
|
|
items[match_count].itemstr = itemstr_copy;
|
|
items[match_count].itemstr_allocated = itemstr_allocate;
|
|
items[match_count].lmatchpos = match_positions;
|
|
|
|
match_count++;
|
|
}
|
|
tv_clear(&rettv);
|
|
});
|
|
|
|
if (match_count > 0) {
|
|
// Sort the list by the descending order of the match score
|
|
qsort(items, (size_t)match_count, sizeof(fuzzyItem_T), fuzzy_match_item_compare);
|
|
|
|
// For matchfuzzy(), return a list of matched strings.
|
|
// ['str1', 'str2', 'str3']
|
|
// For matchfuzzypos(), return a list with three items.
|
|
// The first item is a list of matched strings. The second item
|
|
// is a list of lists where each list item is a list of matched
|
|
// character positions. The third item is a list of matching scores.
|
|
// [['str1', 'str2', 'str3'], [[1, 3], [1, 3], [1, 3]]]
|
|
list_T *retlist;
|
|
if (retmatchpos) {
|
|
const listitem_T *const li = tv_list_find(fmatchlist, 0);
|
|
assert(li != NULL && TV_LIST_ITEM_TV(li)->vval.v_list != NULL);
|
|
retlist = TV_LIST_ITEM_TV(li)->vval.v_list;
|
|
} else {
|
|
retlist = fmatchlist;
|
|
}
|
|
|
|
// Copy the matching strings to the return list
|
|
for (int i = 0; i < match_count; i++) {
|
|
tv_list_append_tv(retlist, TV_LIST_ITEM_TV(items[i].item));
|
|
}
|
|
|
|
// next copy the list of matching positions
|
|
if (retmatchpos) {
|
|
const listitem_T *li = tv_list_find(fmatchlist, -2);
|
|
assert(li != NULL && TV_LIST_ITEM_TV(li)->vval.v_list != NULL);
|
|
retlist = TV_LIST_ITEM_TV(li)->vval.v_list;
|
|
|
|
for (int i = 0; i < match_count; i++) {
|
|
assert(items[i].lmatchpos != NULL);
|
|
tv_list_append_list(retlist, items[i].lmatchpos);
|
|
items[i].lmatchpos = NULL;
|
|
}
|
|
|
|
// copy the matching scores
|
|
li = tv_list_find(fmatchlist, -1);
|
|
assert(li != NULL && TV_LIST_ITEM_TV(li)->vval.v_list != NULL);
|
|
retlist = TV_LIST_ITEM_TV(li)->vval.v_list;
|
|
for (int i = 0; i < match_count; i++) {
|
|
tv_list_append_number(retlist, items[i].score);
|
|
}
|
|
}
|
|
}
|
|
|
|
for (int i = 0; i < match_count; i++) {
|
|
if (items[i].itemstr_allocated) {
|
|
xfree(items[i].itemstr);
|
|
}
|
|
assert(items[i].lmatchpos == NULL);
|
|
}
|
|
xfree(items);
|
|
}
|
|
|
|
/// Do fuzzy matching. Returns the list of matched strings in "rettv".
|
|
/// If "retmatchpos" is true, also returns the matching character positions.
|
|
static void do_fuzzymatch(const typval_T *const argvars, typval_T *const rettv,
|
|
const bool retmatchpos)
|
|
FUNC_ATTR_NONNULL_ALL
|
|
{
|
|
// validate and get the arguments
|
|
if (argvars[0].v_type != VAR_LIST || argvars[0].vval.v_list == NULL) {
|
|
semsg(_(e_listarg), retmatchpos ? "matchfuzzypos()" : "matchfuzzy()");
|
|
return;
|
|
}
|
|
if (argvars[1].v_type != VAR_STRING || argvars[1].vval.v_string == NULL) {
|
|
semsg(_(e_invarg2), tv_get_string(&argvars[1]));
|
|
return;
|
|
}
|
|
|
|
Callback cb = CALLBACK_NONE;
|
|
const char *key = NULL;
|
|
bool matchseq = false;
|
|
int max_matches = 0;
|
|
if (argvars[2].v_type != VAR_UNKNOWN) {
|
|
if (tv_check_for_nonnull_dict_arg(argvars, 2) == FAIL) {
|
|
return;
|
|
}
|
|
|
|
// To search a dict, either a callback function or a key can be
|
|
// specified.
|
|
dict_T *const d = argvars[2].vval.v_dict;
|
|
const dictitem_T *di;
|
|
if ((di = tv_dict_find(d, "key", -1)) != NULL) {
|
|
if (di->di_tv.v_type != VAR_STRING || di->di_tv.vval.v_string == NULL
|
|
|| *di->di_tv.vval.v_string == NUL) {
|
|
semsg(_(e_invargNval), "key", tv_get_string(&di->di_tv));
|
|
return;
|
|
}
|
|
key = tv_get_string(&di->di_tv);
|
|
} else if (!tv_dict_get_callback(d, "text_cb", -1, &cb)) {
|
|
semsg(_(e_invargval), "text_cb");
|
|
return;
|
|
}
|
|
|
|
if ((di = tv_dict_find(d, "limit", -1)) != NULL) {
|
|
if (di->di_tv.v_type != VAR_NUMBER) {
|
|
semsg(_(e_invargval), "limit");
|
|
return;
|
|
}
|
|
max_matches = (int)tv_get_number_chk(&di->di_tv, NULL);
|
|
}
|
|
|
|
if (tv_dict_has_key(d, "matchseq")) {
|
|
matchseq = true;
|
|
}
|
|
}
|
|
|
|
// get the fuzzy matches
|
|
tv_list_alloc_ret(rettv, retmatchpos ? 3 : kListLenUnknown);
|
|
if (retmatchpos) {
|
|
// For matchfuzzypos(), a list with three items are returned. First
|
|
// item is a list of matching strings, the second item is a list of
|
|
// lists with matching positions within each string and the third item
|
|
// is the list of scores of the matches.
|
|
tv_list_append_list(rettv->vval.v_list, tv_list_alloc(kListLenUnknown));
|
|
tv_list_append_list(rettv->vval.v_list, tv_list_alloc(kListLenUnknown));
|
|
tv_list_append_list(rettv->vval.v_list, tv_list_alloc(kListLenUnknown));
|
|
}
|
|
|
|
fuzzy_match_in_list(argvars[0].vval.v_list, (char *)tv_get_string(&argvars[1]),
|
|
matchseq, key, &cb, retmatchpos, rettv->vval.v_list, max_matches);
|
|
|
|
callback_free(&cb);
|
|
}
|
|
|
|
/// "matchfuzzy()" function
|
|
void f_matchfuzzy(typval_T *argvars, typval_T *rettv, EvalFuncData fptr)
|
|
{
|
|
do_fuzzymatch(argvars, rettv, false);
|
|
}
|
|
|
|
/// "matchfuzzypos()" function
|
|
void f_matchfuzzypos(typval_T *argvars, typval_T *rettv, EvalFuncData fptr)
|
|
{
|
|
do_fuzzymatch(argvars, rettv, true);
|
|
}
|
|
|
|
/// Same as fuzzy_match_item_compare() except for use with a string match
|
|
static int fuzzy_match_str_compare(const void *const s1, const void *const s2)
|
|
FUNC_ATTR_WARN_UNUSED_RESULT FUNC_ATTR_NONNULL_ALL FUNC_ATTR_PURE
|
|
{
|
|
const int v1 = ((fuzmatch_str_T *)s1)->score;
|
|
const int v2 = ((fuzmatch_str_T *)s2)->score;
|
|
const int idx1 = ((fuzmatch_str_T *)s1)->idx;
|
|
const int idx2 = ((fuzmatch_str_T *)s2)->idx;
|
|
|
|
if (v1 == v2) {
|
|
return idx1 == idx2 ? 0 : idx1 > idx2 ? 1 : -1;
|
|
} else {
|
|
return v1 > v2 ? -1 : 1;
|
|
}
|
|
}
|
|
|
|
/// Sort fuzzy matches by score
|
|
static void fuzzy_match_str_sort(fuzmatch_str_T *const fm, const int sz)
|
|
FUNC_ATTR_NONNULL_ALL
|
|
{
|
|
// Sort the list by the descending order of the match score
|
|
qsort(fm, (size_t)sz, sizeof(fuzmatch_str_T), fuzzy_match_str_compare);
|
|
}
|
|
|
|
/// Same as fuzzy_match_item_compare() except for use with a function name
|
|
/// string match. <SNR> functions should be sorted to the end.
|
|
static int fuzzy_match_func_compare(const void *const s1, const void *const s2)
|
|
FUNC_ATTR_WARN_UNUSED_RESULT FUNC_ATTR_NONNULL_ALL FUNC_ATTR_PURE
|
|
{
|
|
const int v1 = ((fuzmatch_str_T *)s1)->score;
|
|
const int v2 = ((fuzmatch_str_T *)s2)->score;
|
|
const int idx1 = ((fuzmatch_str_T *)s1)->idx;
|
|
const int idx2 = ((fuzmatch_str_T *)s2)->idx;
|
|
const char *const str1 = ((fuzmatch_str_T *)s1)->str;
|
|
const char *const str2 = ((fuzmatch_str_T *)s2)->str;
|
|
|
|
if (*str1 != '<' && *str2 == '<') {
|
|
return -1;
|
|
}
|
|
if (*str1 == '<' && *str2 != '<') {
|
|
return 1;
|
|
}
|
|
if (v1 == v2) {
|
|
return idx1 == idx2 ? 0 : idx1 > idx2 ? 1 : -1;
|
|
}
|
|
return v1 > v2 ? -1 : 1;
|
|
}
|
|
|
|
/// Sort fuzzy matches of function names by score.
|
|
/// <SNR> functions should be sorted to the end.
|
|
static void fuzzy_match_func_sort(fuzmatch_str_T *const fm, const int sz)
|
|
FUNC_ATTR_NONNULL_ALL
|
|
{
|
|
// Sort the list by the descending order of the match score
|
|
qsort(fm, (size_t)sz, sizeof(fuzmatch_str_T), fuzzy_match_func_compare);
|
|
}
|
|
|
|
/// Fuzzy match "pat" in "str".
|
|
/// @returns 0 if there is no match. Otherwise, returns the match score.
|
|
int fuzzy_match_str(char *const str, const char *const pat)
|
|
FUNC_ATTR_WARN_UNUSED_RESULT
|
|
{
|
|
if (str == NULL || pat == NULL) {
|
|
return 0;
|
|
}
|
|
|
|
int score = FUZZY_SCORE_NONE;
|
|
uint32_t matchpos[FUZZY_MATCH_MAX_LEN];
|
|
fuzzy_match(str, pat, true, &score, matchpos, ARRAY_SIZE(matchpos));
|
|
|
|
return score;
|
|
}
|
|
|
|
/// Fuzzy match the position of string "pat" in string "str".
|
|
/// @returns a dynamic array of matching positions. If there is no match, returns NULL.
|
|
garray_T *fuzzy_match_str_with_pos(char *const str, const char *const pat)
|
|
{
|
|
if (str == NULL || pat == NULL) {
|
|
return NULL;
|
|
}
|
|
|
|
garray_T *match_positions = xmalloc(sizeof(garray_T));
|
|
ga_init(match_positions, sizeof(uint32_t), 10);
|
|
|
|
int score = FUZZY_SCORE_NONE;
|
|
uint32_t matches[FUZZY_MATCH_MAX_LEN];
|
|
if (!fuzzy_match(str, pat, false, &score, matches, FUZZY_MATCH_MAX_LEN)
|
|
|| score == FUZZY_SCORE_NONE) {
|
|
ga_clear(match_positions);
|
|
xfree(match_positions);
|
|
return NULL;
|
|
}
|
|
|
|
int j = 0;
|
|
for (const char *p = pat; *p != NUL; MB_PTR_ADV(p)) {
|
|
if (!ascii_iswhite(utf_ptr2char(p))) {
|
|
GA_APPEND(uint32_t, match_positions, matches[j]);
|
|
j++;
|
|
}
|
|
}
|
|
|
|
return match_positions;
|
|
}
|
|
|
|
/// This function splits the line pointed to by `*ptr` into words and performs
|
|
/// a fuzzy match for the pattern `pat` on each word. It iterates through the
|
|
/// line, moving `*ptr` to the start of each word during the process.
|
|
///
|
|
/// If a match is found:
|
|
/// - `*ptr` points to the start of the matched word.
|
|
/// - `*len` is set to the length of the matched word.
|
|
/// - `*score` contains the match score.
|
|
///
|
|
/// If no match is found, `*ptr` is updated to the end of the line.
|
|
bool fuzzy_match_str_in_line(char **ptr, char *pat, int *len, pos_T *current_pos, int *score)
|
|
{
|
|
char *str = *ptr;
|
|
char *strBegin = str;
|
|
char *end = NULL;
|
|
char *start = NULL;
|
|
bool found = false;
|
|
|
|
if (str == NULL || pat == NULL) {
|
|
return found;
|
|
}
|
|
char *line_end = find_line_end(str);
|
|
|
|
while (str < line_end) {
|
|
// Skip non-word characters
|
|
start = find_word_start(str);
|
|
if (*start == NUL) {
|
|
break;
|
|
}
|
|
end = find_word_end(start);
|
|
|
|
// Extract the word from start to end
|
|
char save_end = *end;
|
|
*end = NUL;
|
|
|
|
// Perform fuzzy match
|
|
*score = fuzzy_match_str(start, pat);
|
|
*end = save_end;
|
|
|
|
if (*score != FUZZY_SCORE_NONE) {
|
|
*len = (int)(end - start);
|
|
found = true;
|
|
*ptr = start;
|
|
if (current_pos) {
|
|
current_pos->col += (int)(end - strBegin);
|
|
}
|
|
break;
|
|
}
|
|
|
|
// Move to the end of the current word for the next iteration
|
|
str = end;
|
|
// Ensure we continue searching after the current word
|
|
while (*str != NUL && !vim_iswordp(str)) {
|
|
MB_PTR_ADV(str);
|
|
}
|
|
}
|
|
|
|
if (!found) {
|
|
*ptr = line_end;
|
|
}
|
|
|
|
return found;
|
|
}
|
|
|
|
/// Search for the next fuzzy match in the specified buffer.
|
|
/// This function attempts to find the next occurrence of the given pattern
|
|
/// in the buffer, starting from the current position. It handles line wrapping
|
|
/// and direction of search.
|
|
///
|
|
/// Return true if a match is found, otherwise false.
|
|
bool search_for_fuzzy_match(buf_T *buf, pos_T *pos, char *pattern, int dir, pos_T *start_pos,
|
|
int *len, char **ptr, int *score)
|
|
{
|
|
pos_T current_pos = *pos;
|
|
pos_T circly_end;
|
|
bool found_new_match = false;
|
|
bool looped_around = false;
|
|
|
|
bool whole_line = ctrl_x_mode_whole_line();
|
|
|
|
if (buf == curbuf) {
|
|
circly_end = *start_pos;
|
|
} else {
|
|
circly_end.lnum = buf->b_ml.ml_line_count;
|
|
circly_end.col = 0;
|
|
circly_end.coladd = 0;
|
|
}
|
|
|
|
if (whole_line && start_pos->lnum != pos->lnum) {
|
|
current_pos.lnum += dir;
|
|
}
|
|
|
|
while (true) {
|
|
// Check if looped around and back to start position
|
|
if (looped_around && equalpos(current_pos, circly_end)) {
|
|
break;
|
|
}
|
|
|
|
// Ensure current_pos is valid
|
|
if (current_pos.lnum >= 1 && current_pos.lnum <= buf->b_ml.ml_line_count) {
|
|
// Get the current line buffer
|
|
*ptr = ml_get_buf(buf, current_pos.lnum);
|
|
if (!whole_line) {
|
|
*ptr += current_pos.col;
|
|
}
|
|
|
|
// If ptr is end of line is reached, move to next line
|
|
// or previous line based on direction
|
|
if (*ptr != NULL && **ptr != NUL) {
|
|
if (!whole_line) {
|
|
// Try to find a fuzzy match in the current line starting
|
|
// from current position
|
|
found_new_match = fuzzy_match_str_in_line(ptr, pattern,
|
|
len, ¤t_pos, score);
|
|
if (found_new_match) {
|
|
*pos = current_pos;
|
|
break;
|
|
} else if (looped_around && current_pos.lnum == circly_end.lnum) {
|
|
break;
|
|
}
|
|
} else {
|
|
if (fuzzy_match_str(*ptr, pattern) != FUZZY_SCORE_NONE) {
|
|
found_new_match = true;
|
|
*pos = current_pos;
|
|
*len = ml_get_buf_len(buf, current_pos.lnum);
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
// Move to the next line or previous line based on direction
|
|
if (dir == FORWARD) {
|
|
if (++current_pos.lnum > buf->b_ml.ml_line_count) {
|
|
if (p_ws) {
|
|
current_pos.lnum = 1;
|
|
looped_around = true;
|
|
} else {
|
|
break;
|
|
}
|
|
}
|
|
} else {
|
|
if (--current_pos.lnum < 1) {
|
|
if (p_ws) {
|
|
current_pos.lnum = buf->b_ml.ml_line_count;
|
|
looped_around = true;
|
|
} else {
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
current_pos.col = 0;
|
|
}
|
|
|
|
return found_new_match;
|
|
}
|
|
|
|
/// Free an array of fuzzy string matches "fuzmatch[count]".
|
|
void fuzmatch_str_free(fuzmatch_str_T *const fuzmatch, int count)
|
|
{
|
|
if (fuzmatch == NULL) {
|
|
return;
|
|
}
|
|
for (int i = 0; i < count; i++) {
|
|
xfree(fuzmatch[count].str);
|
|
}
|
|
xfree(fuzmatch);
|
|
}
|
|
|
|
/// Copy a list of fuzzy matches into a string list after sorting the matches by
|
|
/// the fuzzy score. Frees the memory allocated for "fuzmatch".
|
|
void fuzzymatches_to_strmatches(fuzmatch_str_T *const fuzmatch, char ***const matches,
|
|
const int count, const bool funcsort)
|
|
FUNC_ATTR_NONNULL_ARG(2)
|
|
{
|
|
if (count <= 0) {
|
|
goto theend;
|
|
}
|
|
|
|
*matches = xmalloc((size_t)count * sizeof(char *));
|
|
|
|
// Sort the list by the descending order of the match score
|
|
if (funcsort) {
|
|
fuzzy_match_func_sort(fuzmatch, count);
|
|
} else {
|
|
fuzzy_match_str_sort(fuzmatch, count);
|
|
}
|
|
|
|
for (int i = 0; i < count; i++) {
|
|
(*matches)[i] = fuzmatch[i].str;
|
|
}
|
|
|
|
theend:
|
|
xfree(fuzmatch);
|
|
}
|
|
|
|
/// Fuzzy match algorithm ported from https://github.com/jhawthorn/fzy.
|
|
/// This implementation extends the original by supporting multibyte characters.
|
|
|
|
#define MATCH_MAX_LEN FUZZY_MATCH_MAX_LEN
|
|
|
|
#define SCORE_GAP_LEADING -0.005
|
|
#define SCORE_GAP_TRAILING -0.005
|
|
#define SCORE_GAP_INNER -0.01
|
|
#define SCORE_MATCH_CONSECUTIVE 1.0
|
|
#define SCORE_MATCH_SLASH 0.9
|
|
#define SCORE_MATCH_WORD 0.8
|
|
#define SCORE_MATCH_CAPITAL 0.7
|
|
#define SCORE_MATCH_DOT 0.6
|
|
|
|
static int has_match(const char *const needle, const char *const haystack)
|
|
{
|
|
if (!needle || !haystack || !*needle) {
|
|
return FAIL;
|
|
}
|
|
|
|
const char *n_ptr = needle;
|
|
const char *h_ptr = haystack;
|
|
|
|
while (*n_ptr) {
|
|
const int n_char = utf_ptr2char(n_ptr);
|
|
bool found = false;
|
|
|
|
while (*h_ptr) {
|
|
const int h_char = utf_ptr2char(h_ptr);
|
|
if (n_char == h_char || mb_toupper(n_char) == h_char) {
|
|
found = true;
|
|
h_ptr += utfc_ptr2len(h_ptr);
|
|
break;
|
|
}
|
|
h_ptr += utfc_ptr2len(h_ptr);
|
|
}
|
|
|
|
if (!found) {
|
|
return FAIL;
|
|
}
|
|
|
|
n_ptr += utfc_ptr2len(n_ptr);
|
|
}
|
|
|
|
return OK;
|
|
}
|
|
|
|
struct match_struct {
|
|
int needle_len;
|
|
int haystack_len;
|
|
int lower_needle[MATCH_MAX_LEN]; ///< stores codepoints
|
|
int lower_haystack[MATCH_MAX_LEN]; ///< stores codepoints
|
|
score_t match_bonus[MATCH_MAX_LEN];
|
|
};
|
|
|
|
#define IS_WORD_SEP(c) ((c) == '-' || (c) == '_' || (c) == ' ')
|
|
#define IS_PATH_SEP(c) ((c) == '/')
|
|
#define IS_DOT(c) ((c) == '.')
|
|
|
|
static score_t compute_bonus_codepoint(int last_c, int c)
|
|
{
|
|
if (ASCII_ISALNUM(c) || vim_iswordc(c)) {
|
|
if (IS_PATH_SEP(last_c)) {
|
|
return SCORE_MATCH_SLASH;
|
|
}
|
|
if (IS_WORD_SEP(last_c)) {
|
|
return SCORE_MATCH_WORD;
|
|
}
|
|
if (IS_DOT(last_c)) {
|
|
return SCORE_MATCH_DOT;
|
|
}
|
|
if (mb_isupper(c) && mb_islower(last_c)) {
|
|
return SCORE_MATCH_CAPITAL;
|
|
}
|
|
}
|
|
return 0;
|
|
}
|
|
|
|
static void setup_match_struct(match_struct *const match, const char *const needle,
|
|
const char *const haystack)
|
|
{
|
|
int i = 0;
|
|
const char *p = needle;
|
|
while (*p != NUL && i < MATCH_MAX_LEN) {
|
|
const int c = utf_ptr2char(p);
|
|
match->lower_needle[i++] = mb_tolower(c);
|
|
MB_PTR_ADV(p);
|
|
}
|
|
match->needle_len = i;
|
|
|
|
i = 0;
|
|
p = haystack;
|
|
int prev_c = '/';
|
|
while (*p != NUL && i < MATCH_MAX_LEN) {
|
|
const int c = utf_ptr2char(p);
|
|
match->lower_haystack[i] = mb_tolower(c);
|
|
match->match_bonus[i] = compute_bonus_codepoint(prev_c, c);
|
|
prev_c = c;
|
|
MB_PTR_ADV(p);
|
|
i++;
|
|
}
|
|
match->haystack_len = i;
|
|
}
|
|
|
|
static inline void match_row(const match_struct *match, int row, score_t *curr_D, score_t *curr_M,
|
|
const score_t *last_D, const score_t *last_M)
|
|
{
|
|
int n = match->needle_len;
|
|
int m = match->haystack_len;
|
|
int i = row;
|
|
|
|
const int *lower_needle = match->lower_needle;
|
|
const int *lower_haystack = match->lower_haystack;
|
|
const score_t *match_bonus = match->match_bonus;
|
|
|
|
score_t prev_score = (score_t)SCORE_MIN;
|
|
score_t gap_score = i == n - 1 ? SCORE_GAP_TRAILING : SCORE_GAP_INNER;
|
|
|
|
// These will not be used with this value, but not all compilers see it
|
|
score_t prev_M = (score_t)SCORE_MIN, prev_D = (score_t)SCORE_MIN;
|
|
|
|
for (int j = 0; j < m; j++) {
|
|
if (lower_needle[i] == lower_haystack[j]) {
|
|
score_t score = (score_t)SCORE_MIN;
|
|
if (!i) {
|
|
score = (j * SCORE_GAP_LEADING) + match_bonus[j];
|
|
} else if (j) { // i > 0 && j > 0
|
|
score = MAX(prev_M + match_bonus[j],
|
|
// consecutive match, doesn't stack with match_bonus
|
|
prev_D + SCORE_MATCH_CONSECUTIVE);
|
|
}
|
|
prev_D = last_D[j];
|
|
prev_M = last_M[j];
|
|
curr_D[j] = score;
|
|
curr_M[j] = prev_score = MAX(score, prev_score + gap_score);
|
|
} else {
|
|
prev_D = last_D[j];
|
|
prev_M = last_M[j];
|
|
curr_D[j] = (score_t)SCORE_MIN;
|
|
curr_M[j] = prev_score = prev_score + gap_score;
|
|
}
|
|
}
|
|
}
|
|
|
|
static score_t match_positions(const char *const needle, const char *const haystack,
|
|
uint32_t *const positions)
|
|
{
|
|
if (!needle || !haystack || !*needle) {
|
|
return (score_t)SCORE_MIN;
|
|
}
|
|
|
|
match_struct match;
|
|
setup_match_struct(&match, needle, haystack);
|
|
|
|
int n = match.needle_len;
|
|
int m = match.haystack_len;
|
|
|
|
if (m > MATCH_MAX_LEN || n > m) {
|
|
// Unreasonably large candidate: return no score
|
|
// If it is a valid match it will still be returned, it will
|
|
// just be ranked below any reasonably sized candidates
|
|
return (score_t)SCORE_MIN;
|
|
} else if (n == m) {
|
|
// Since this method can only be called with a haystack which
|
|
// matches needle. If the lengths of the strings are equal the
|
|
// strings themselves must also be equal (ignoring case).
|
|
if (positions) {
|
|
for (int i = 0; i < n; i++) {
|
|
positions[i] = (uint32_t)i;
|
|
}
|
|
}
|
|
return (score_t)SCORE_MAX;
|
|
}
|
|
|
|
// ensure n * MATCH_MAX_LEN * 2 won't overflow
|
|
if ((size_t)n > (SIZE_MAX / sizeof(score_t)) / MATCH_MAX_LEN / 2) {
|
|
return (score_t)SCORE_MIN;
|
|
}
|
|
|
|
// Allocate for both D and M matrices in one contiguous block
|
|
score_t *block = (score_t *)xmalloc(sizeof(score_t) * MATCH_MAX_LEN * (size_t)n * 2);
|
|
|
|
// D[][] Stores the best score for this position ending with a match.
|
|
// M[][] Stores the best possible score at this position.
|
|
score_t(*D)[MATCH_MAX_LEN] = (score_t(*)[MATCH_MAX_LEN])(block);
|
|
score_t(*M)[MATCH_MAX_LEN] = (score_t(*)[MATCH_MAX_LEN])(block
|
|
+ MATCH_MAX_LEN * (size_t)n);
|
|
|
|
match_row(&match, 0, D[0], M[0], D[0], M[0]);
|
|
for (int i = 1; i < n; i++) {
|
|
match_row(&match, i, D[i], M[i], D[i - 1], M[i - 1]);
|
|
}
|
|
|
|
// backtrace to find the positions of optimal matching
|
|
if (positions) {
|
|
int match_required = 0;
|
|
for (int i = n - 1, j = m - 1; i >= 0; i--) {
|
|
for (; j >= 0; j--) {
|
|
// There may be multiple paths which result in
|
|
// the optimal weight.
|
|
//
|
|
// For simplicity, we will pick the first one
|
|
// we encounter, the latest in the candidate
|
|
// string.
|
|
if (D[i][j] != (score_t)SCORE_MIN
|
|
&& (match_required || D[i][j] == M[i][j])) {
|
|
// If this score was determined using
|
|
// SCORE_MATCH_CONSECUTIVE, the
|
|
// previous character MUST be a match
|
|
match_required = i && j
|
|
&& M[i][j] == D[i - 1][j - 1] + SCORE_MATCH_CONSECUTIVE;
|
|
positions[i] = (uint32_t)(j--);
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
score_t result = M[n - 1][m - 1];
|
|
|
|
xfree(block);
|
|
return result;
|
|
}
|