mirror of
https://github.com/neovim/neovim.git
synced 2025-09-05 19:08:15 +00:00
vim-patch:9.1.1627: fuzzy matching can be improved
Problem: fuzzy-matching can be improved
Solution: Implement a better fuzzy matching algorithm
(Girish Palya)
Replace fuzzy matching algorithm with improved fzy-based implementation
The
[current](https://www.forrestthewoods.com/blog/reverse_engineering_sublime_texts_fuzzy_match/)
fuzzy matching algorithm has several accuracy issues:
* It struggles with CamelCase
* It fails to prioritize matches at the beginning of strings, often
ranking middle matches higher.
After evaluating alternatives (see my comments
[here](https://github.com/vim/vim/issues/17531#issuecomment-3112046897)
and
[here](https://github.com/vim/vim/issues/17531#issuecomment-3121593900)),
I chose to adopt the [fzy](https://github.com/jhawthorn/fzy) algorithm,
which:
* Resolves the aforementioned issues.
* Performs better.
Implementation details
This version is based on the original fzy
[algorithm](https://github.com/jhawthorn/fzy/blob/master/src/match.c),
with one key enhancement: **multibyte character support**.
* The original implementation supports only ASCII.
* This patch replaces ascii lookup tables with function calls, making it
compatible with multibyte character sets.
* Core logic (`match_row()` and `match_positions()`) remains faithful to
the original, but now operates on codepoints rather than single-byte
characters.
Performance
Tested against a dataset of **90,000 Linux kernel filenames**. Results
(in milliseconds) show a **\~2x performance improvement** over the
current fuzzy matching algorithm.
```
Search String Current Algo FZY Algo
-------------------------------------------------
init 131.759 66.916
main 83.688 40.861
sig 98.348 39.699
index 109.222 30.738
ab 72.222 44.357
cd 83.036 54.739
a 58.94 62.242
b 43.612 43.442
c 64.39 67.442
k 40.585 36.371
z 34.708 22.781
w 38.033 30.109
cpa 82.596 38.116
arz 84.251 23.964
zzzz 35.823 22.75
dimag 110.686 29.646
xa 43.188 29.199
nha 73.953 31.001
nedax 94.775 29.568
dbue 79.846 25.902
fp 46.826 31.641
tr 90.951 55.883
kw 38.875 23.194
rp 101.575 55.775
kkkkkkkkkkkkkkkkkkkkkkkkkkkkk 48.519 30.921
```
```vim
vim9script
var haystack = readfile('/Users/gp/linux.files')
var needles = ['init', 'main', 'sig', 'index', 'ab', 'cd', 'a', 'b',
'c', 'k',
'z', 'w', 'cpa', 'arz', 'zzzz', 'dimag', 'xa', 'nha', 'nedax',
'dbue',
'fp', 'tr', 'kw', 'rp', 'kkkkkkkkkkkkkkkkkkkkkkkkkkkkk']
for needle in needles
var start = reltime()
var tmp = matchfuzzy(haystack, needle)
echom $'{needle}' (start->reltime()->reltimefloat() * 1000)
endfor
```
Additional changes
* Removed the "camelcase" option from both matchfuzzy() and
matchfuzzypos(), as it's now obsolete with the improved algorithm.
related: neovim/neovim#34101
fixes vim/vim#17531
closes: vim/vim#17900
7e0df5eee9
Co-authored-by: Girish Palya <girishji@gmail.com>
This commit is contained in:
@@ -327,6 +327,8 @@ These existing features changed their behavior.
|
||||
• |$VIM| and |$VIMRUNTIME| no longer check for Vim version-specific runtime
|
||||
directory `vim{number}` (e.g. `vim82`).
|
||||
• 'scrollback' maximum value increased from 100000 to 1000000
|
||||
• |matchfuzzy()| and |matchfuzzypos()| use an improved fuzzy matching algorithm
|
||||
(same as fzy).
|
||||
|
||||
==============================================================================
|
||||
REMOVED FEATURES *news-removed*
|
||||
|
@@ -1489,6 +1489,9 @@ characters in the search string. If the search string has multiple words, then
|
||||
each word is matched separately. So the words in the search string can be
|
||||
present in any order in a string.
|
||||
|
||||
Vim uses the same improved algorithm as the fzy project:
|
||||
https://github.com/jhawthorn/fzy
|
||||
|
||||
Fuzzy matching assigns a score for each matched string based on the following
|
||||
criteria:
|
||||
- The number of sequentially matching characters.
|
||||
|
@@ -6520,9 +6520,6 @@ matchfuzzy({list}, {str} [, {dict}]) *matchfuzzy()*
|
||||
given sequence.
|
||||
limit Maximum number of matches in {list} to be
|
||||
returned. Zero means no limit.
|
||||
camelcase Use enhanced camel case scoring making results
|
||||
better suited for completion related to
|
||||
programming languages. Defaults to v:true.
|
||||
|
||||
If {list} is a list of dictionaries, then the optional {dict}
|
||||
argument supports the following additional items:
|
||||
|
3
runtime/lua/vim/_meta/vimfn.lua
generated
3
runtime/lua/vim/_meta/vimfn.lua
generated
@@ -5918,9 +5918,6 @@ function vim.fn.matchend(expr, pat, start, count) end
|
||||
--- given sequence.
|
||||
--- limit Maximum number of matches in {list} to be
|
||||
--- returned. Zero means no limit.
|
||||
--- camelcase Use enhanced camel case scoring making results
|
||||
--- better suited for completion related to
|
||||
--- programming languages. Defaults to v:true.
|
||||
---
|
||||
--- If {list} is a list of dictionaries, then the optional {dict}
|
||||
--- argument supports the following additional items:
|
||||
|
@@ -13,6 +13,7 @@ local hashpipe = assert(io.open(funcsfname, 'wb'))
|
||||
|
||||
hashpipe:write([[
|
||||
#include "nvim/arglist.h"
|
||||
#include "nvim/channel.h"
|
||||
#include "nvim/cmdexpand.h"
|
||||
#include "nvim/cmdhist.h"
|
||||
#include "nvim/diff.h"
|
||||
@@ -28,7 +29,10 @@ hashpipe:write([[
|
||||
#include "nvim/ex_docmd.h"
|
||||
#include "nvim/ex_getln.h"
|
||||
#include "nvim/fold.h"
|
||||
#include "nvim/fuzzy.h"
|
||||
#include "nvim/getchar.h"
|
||||
#include "nvim/indent.h"
|
||||
#include "nvim/indent_c.h"
|
||||
#include "nvim/insexpand.h"
|
||||
#include "nvim/mapping.h"
|
||||
#include "nvim/match.h"
|
||||
|
@@ -58,6 +58,7 @@
|
||||
#include "nvim/file_search.h"
|
||||
#include "nvim/fileio.h"
|
||||
#include "nvim/fold.h"
|
||||
#include "nvim/fuzzy.h"
|
||||
#include "nvim/garray.h"
|
||||
#include "nvim/garray_defs.h"
|
||||
#include "nvim/getchar.h"
|
||||
@@ -100,7 +101,6 @@
|
||||
#include "nvim/regexp_defs.h"
|
||||
#include "nvim/runtime.h"
|
||||
#include "nvim/runtime_defs.h"
|
||||
#include "nvim/search.h"
|
||||
#include "nvim/spell.h"
|
||||
#include "nvim/state_defs.h"
|
||||
#include "nvim/statusline.h"
|
||||
@@ -2477,12 +2477,12 @@ int ExpandBufnames(char *pat, int *num_file, char ***file, int options)
|
||||
} else {
|
||||
p = NULL;
|
||||
// first try matching with the short file name
|
||||
if ((score = fuzzy_match_str(buf->b_sfname, pat)) != 0) {
|
||||
if ((score = fuzzy_match_str(buf->b_sfname, pat)) != FUZZY_SCORE_NONE) {
|
||||
p = buf->b_sfname;
|
||||
}
|
||||
if (p == NULL) {
|
||||
// next try matching with the full path file name
|
||||
if ((score = fuzzy_match_str(buf->b_ffname, pat)) != 0) {
|
||||
if ((score = fuzzy_match_str(buf->b_ffname, pat)) != FUZZY_SCORE_NONE) {
|
||||
p = buf->b_ffname;
|
||||
}
|
||||
}
|
||||
|
@@ -29,6 +29,7 @@
|
||||
#include "nvim/ex_cmds_defs.h"
|
||||
#include "nvim/ex_docmd.h"
|
||||
#include "nvim/ex_getln.h"
|
||||
#include "nvim/fuzzy.h"
|
||||
#include "nvim/garray.h"
|
||||
#include "nvim/garray_defs.h"
|
||||
#include "nvim/getchar.h"
|
||||
@@ -3096,7 +3097,7 @@ void ExpandGeneric(const char *const pat, expand_T *xp, regmatch_T *regmatch, ch
|
||||
match = vim_regexec(regmatch, str, 0);
|
||||
} else {
|
||||
score = fuzzy_match_str(str, pat);
|
||||
match = (score != 0);
|
||||
match = (score != FUZZY_SCORE_NONE);
|
||||
}
|
||||
} else {
|
||||
match = true;
|
||||
@@ -3408,7 +3409,7 @@ static int ExpandUserDefined(const char *const pat, expand_T *xp, regmatch_T *re
|
||||
match = vim_regexec(regmatch, s, 0);
|
||||
} else {
|
||||
score = fuzzy_match_str(s, pat);
|
||||
match = (score != 0);
|
||||
match = (score != FUZZY_SCORE_NONE);
|
||||
}
|
||||
} else {
|
||||
match = true; // match everything
|
||||
|
@@ -7272,9 +7272,6 @@ M.funcs = {
|
||||
given sequence.
|
||||
limit Maximum number of matches in {list} to be
|
||||
returned. Zero means no limit.
|
||||
camelcase Use enhanced camel case scoring making results
|
||||
better suited for completion related to
|
||||
programming languages. Defaults to v:true.
|
||||
|
||||
If {list} is a list of dictionaries, then the optional {dict}
|
||||
argument supports the following additional items:
|
||||
|
920
src/nvim/fuzzy.c
Normal file
920
src/nvim/fuzzy.c
Normal file
@@ -0,0 +1,920 @@
|
||||
// fuzzy.c: fuzzy matching algorithm and related functions
|
||||
//
|
||||
// Portions of this file are adapted from fzy (https://github.com/jhawthorn/fzy)
|
||||
// Original code:
|
||||
// Copyright (c) 2014 John Hawthorn
|
||||
// Licensed under the MIT License.
|
||||
//
|
||||
// Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
// of this software and associated documentation files (the "Software"), to deal
|
||||
// in the Software without restriction, including without limitation the rights
|
||||
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
// copies of the Software, and to permit persons to whom the Software is
|
||||
// furnished to do so, subject to the following conditions:
|
||||
//
|
||||
// The above copyright notice and this permission notice shall be included in
|
||||
// all copies or substantial portions of the Software.
|
||||
//
|
||||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
// THE SOFTWARE.
|
||||
|
||||
#include <assert.h>
|
||||
#include <limits.h>
|
||||
#include <math.h>
|
||||
#include <stdbool.h>
|
||||
#include <stdint.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
|
||||
#include "nvim/ascii_defs.h"
|
||||
#include "nvim/charset.h"
|
||||
#include "nvim/errors.h"
|
||||
#include "nvim/eval.h"
|
||||
#include "nvim/eval/typval.h"
|
||||
#include "nvim/fuzzy.h"
|
||||
#include "nvim/garray.h"
|
||||
#include "nvim/garray_defs.h"
|
||||
#include "nvim/globals.h"
|
||||
#include "nvim/insexpand.h"
|
||||
#include "nvim/macros_defs.h"
|
||||
#include "nvim/mbyte.h"
|
||||
#include "nvim/memline.h"
|
||||
#include "nvim/memory.h"
|
||||
#include "nvim/message.h"
|
||||
|
||||
typedef double score_t;
|
||||
|
||||
#define SCORE_MAX INFINITY
|
||||
#define SCORE_MIN -INFINITY
|
||||
#define SCORE_SCALE 1000
|
||||
|
||||
typedef struct {
|
||||
int idx; ///< used for stable sort
|
||||
listitem_T *item;
|
||||
int score;
|
||||
list_T *lmatchpos;
|
||||
char *pat;
|
||||
char *itemstr;
|
||||
bool itemstr_allocated;
|
||||
int startpos;
|
||||
} fuzzyItem_T;
|
||||
|
||||
typedef struct match_struct match_struct;
|
||||
|
||||
#ifdef INCLUDE_GENERATED_DECLARATIONS
|
||||
# include "fuzzy.c.generated.h"
|
||||
#endif
|
||||
|
||||
/// fuzzy_match()
|
||||
///
|
||||
/// @return true if "pat_arg" matches "str". Also returns the match score in
|
||||
/// "outScore" and the matching character positions in "matches".
|
||||
bool fuzzy_match(char *const str, const char *const pat_arg, const bool matchseq,
|
||||
int *const outScore, uint32_t *const matches, const int maxMatches)
|
||||
FUNC_ATTR_NONNULL_ALL
|
||||
{
|
||||
bool complete = false;
|
||||
int numMatches = 0;
|
||||
|
||||
*outScore = 0;
|
||||
|
||||
char *const save_pat = xstrdup(pat_arg);
|
||||
char *pat = save_pat;
|
||||
char *p = pat;
|
||||
|
||||
// Try matching each word in "pat_arg" in "str"
|
||||
while (true) {
|
||||
if (matchseq) {
|
||||
complete = true;
|
||||
} else {
|
||||
// Extract one word from the pattern (separated by space)
|
||||
p = skipwhite(p);
|
||||
if (*p == NUL) {
|
||||
break;
|
||||
}
|
||||
pat = p;
|
||||
while (*p != NUL && !ascii_iswhite(utf_ptr2char(p))) {
|
||||
MB_PTR_ADV(p);
|
||||
}
|
||||
if (*p == NUL) { // processed all the words
|
||||
complete = true;
|
||||
}
|
||||
*p = NUL;
|
||||
}
|
||||
|
||||
int score = FUZZY_SCORE_NONE;
|
||||
if (has_match(pat, str)) {
|
||||
score_t fzy_score = match_positions(pat, str, matches + numMatches);
|
||||
score = (fzy_score == (score_t)SCORE_MIN
|
||||
? INT_MIN + 1
|
||||
: (fzy_score == (score_t)SCORE_MAX
|
||||
? INT_MAX
|
||||
: (fzy_score < 0
|
||||
? (int)ceil(fzy_score * SCORE_SCALE - 0.5)
|
||||
: (int)floor(fzy_score * SCORE_SCALE + 0.5))));
|
||||
}
|
||||
|
||||
if (score == FUZZY_SCORE_NONE) {
|
||||
numMatches = 0;
|
||||
*outScore = FUZZY_SCORE_NONE;
|
||||
break;
|
||||
}
|
||||
|
||||
if (score > 0 && *outScore > INT_MAX - score) {
|
||||
*outScore = INT_MAX;
|
||||
} else if (score < 0 && *outScore < INT_MIN + 1 - score) {
|
||||
*outScore = INT_MIN + 1;
|
||||
} else {
|
||||
*outScore += score;
|
||||
}
|
||||
|
||||
numMatches += mb_charlen(pat);
|
||||
|
||||
if (complete || numMatches >= maxMatches) {
|
||||
break;
|
||||
}
|
||||
|
||||
// try matching the next word
|
||||
p++;
|
||||
}
|
||||
|
||||
xfree(save_pat);
|
||||
return numMatches != 0;
|
||||
}
|
||||
|
||||
/// Sort the fuzzy matches in the descending order of the match score.
|
||||
/// For items with same score, retain the order using the index (stable sort)
|
||||
static int fuzzy_match_item_compare(const void *const s1, const void *const s2)
|
||||
FUNC_ATTR_NONNULL_ALL FUNC_ATTR_WARN_UNUSED_RESULT FUNC_ATTR_PURE
|
||||
{
|
||||
const int v1 = ((const fuzzyItem_T *)s1)->score;
|
||||
const int v2 = ((const fuzzyItem_T *)s2)->score;
|
||||
|
||||
if (v1 == v2) {
|
||||
const char *const pat = ((const fuzzyItem_T *)s1)->pat;
|
||||
const size_t patlen = strlen(pat);
|
||||
int startpos = ((const fuzzyItem_T *)s1)->startpos;
|
||||
const bool exact_match1 = startpos >= 0
|
||||
&& strncmp(pat, ((fuzzyItem_T *)s1)->itemstr + startpos, patlen) == 0;
|
||||
startpos = ((const fuzzyItem_T *)s2)->startpos;
|
||||
const bool exact_match2 = startpos >= 0
|
||||
&& strncmp(pat, ((fuzzyItem_T *)s2)->itemstr + startpos, patlen) == 0;
|
||||
|
||||
if (exact_match1 == exact_match2) {
|
||||
const int idx1 = ((const fuzzyItem_T *)s1)->idx;
|
||||
const int idx2 = ((const fuzzyItem_T *)s2)->idx;
|
||||
return idx1 == idx2 ? 0 : idx1 > idx2 ? 1 : -1;
|
||||
} else if (exact_match2) {
|
||||
return 1;
|
||||
}
|
||||
return -1;
|
||||
} else {
|
||||
return v1 > v2 ? -1 : 1;
|
||||
}
|
||||
}
|
||||
|
||||
/// Fuzzy search the string "str" in a list of "items" and return the matching
|
||||
/// strings in "fmatchlist".
|
||||
/// If "matchseq" is true, then for multi-word search strings, match all the
|
||||
/// words in sequence.
|
||||
/// If "items" is a list of strings, then search for "str" in the list.
|
||||
/// If "items" is a list of dicts, then either use "key" to lookup the string
|
||||
/// for each item or use "item_cb" Funcref function to get the string.
|
||||
/// If "retmatchpos" is true, then return a list of positions where "str"
|
||||
/// matches for each item.
|
||||
static void fuzzy_match_in_list(list_T *const l, char *const str, const bool matchseq,
|
||||
const char *const key, Callback *const item_cb,
|
||||
const bool retmatchpos, list_T *const fmatchlist,
|
||||
const int max_matches)
|
||||
FUNC_ATTR_NONNULL_ARG(2, 5, 7)
|
||||
{
|
||||
int len = tv_list_len(l);
|
||||
if (len == 0) {
|
||||
return;
|
||||
}
|
||||
if (max_matches > 0 && len > max_matches) {
|
||||
len = max_matches;
|
||||
}
|
||||
|
||||
fuzzyItem_T *const items = xcalloc((size_t)len, sizeof(fuzzyItem_T));
|
||||
int match_count = 0;
|
||||
uint32_t matches[FUZZY_MATCH_MAX_LEN];
|
||||
|
||||
// For all the string items in items, get the fuzzy matching score
|
||||
TV_LIST_ITER(l, li, {
|
||||
if (max_matches > 0 && match_count >= max_matches) {
|
||||
break;
|
||||
}
|
||||
|
||||
char *itemstr = NULL;
|
||||
bool itemstr_allocate = false;
|
||||
typval_T rettv;
|
||||
rettv.v_type = VAR_UNKNOWN;
|
||||
const typval_T *const tv = TV_LIST_ITEM_TV(li);
|
||||
if (tv->v_type == VAR_STRING) { // list of strings
|
||||
itemstr = tv->vval.v_string;
|
||||
} else if (tv->v_type == VAR_DICT
|
||||
&& (key != NULL || item_cb->type != kCallbackNone)) {
|
||||
// For a dict, either use the specified key to lookup the string or
|
||||
// use the specified callback function to get the string.
|
||||
if (key != NULL) {
|
||||
itemstr = tv_dict_get_string(tv->vval.v_dict, key, false);
|
||||
} else {
|
||||
typval_T argv[2];
|
||||
|
||||
// Invoke the supplied callback (if any) to get the dict item
|
||||
tv->vval.v_dict->dv_refcount++;
|
||||
argv[0].v_type = VAR_DICT;
|
||||
argv[0].vval.v_dict = tv->vval.v_dict;
|
||||
argv[1].v_type = VAR_UNKNOWN;
|
||||
if (callback_call(item_cb, 1, argv, &rettv)) {
|
||||
if (rettv.v_type == VAR_STRING) {
|
||||
itemstr = rettv.vval.v_string;
|
||||
itemstr_allocate = true;
|
||||
}
|
||||
}
|
||||
tv_dict_unref(tv->vval.v_dict);
|
||||
}
|
||||
}
|
||||
|
||||
int score;
|
||||
if (itemstr != NULL
|
||||
&& fuzzy_match(itemstr, str, matchseq, &score, matches, FUZZY_MATCH_MAX_LEN)) {
|
||||
items[match_count].idx = (int)match_count;
|
||||
items[match_count].item = li;
|
||||
items[match_count].score = score;
|
||||
items[match_count].pat = str;
|
||||
items[match_count].startpos = (int)matches[0];
|
||||
items[match_count].itemstr = itemstr_allocate ? xstrdup(itemstr) : itemstr;
|
||||
items[match_count].itemstr_allocated = itemstr_allocate;
|
||||
|
||||
// Copy the list of matching positions in itemstr to a list.
|
||||
{
|
||||
items[match_count].lmatchpos = tv_list_alloc(kListLenMayKnow);
|
||||
int j = 0;
|
||||
const char *p = str;
|
||||
while (*p != NUL && j < FUZZY_MATCH_MAX_LEN) {
|
||||
if (!ascii_iswhite(utf_ptr2char(p)) || matchseq) {
|
||||
tv_list_append_number(items[match_count].lmatchpos, matches[j]);
|
||||
j++;
|
||||
}
|
||||
MB_PTR_ADV(p);
|
||||
}
|
||||
}
|
||||
match_count++;
|
||||
}
|
||||
tv_clear(&rettv);
|
||||
});
|
||||
|
||||
if (match_count > 0) {
|
||||
// Sort the list by the descending order of the match score
|
||||
qsort(items, (size_t)match_count, sizeof(fuzzyItem_T), fuzzy_match_item_compare);
|
||||
|
||||
// For matchfuzzy(), return a list of matched strings.
|
||||
// ['str1', 'str2', 'str3']
|
||||
// For matchfuzzypos(), return a list with three items.
|
||||
// The first item is a list of matched strings. The second item
|
||||
// is a list of lists where each list item is a list of matched
|
||||
// character positions. The third item is a list of matching scores.
|
||||
// [['str1', 'str2', 'str3'], [[1, 3], [1, 3], [1, 3]]]
|
||||
list_T *retlist;
|
||||
if (retmatchpos) {
|
||||
const listitem_T *const li = tv_list_find(fmatchlist, 0);
|
||||
assert(li != NULL && TV_LIST_ITEM_TV(li)->vval.v_list != NULL);
|
||||
retlist = TV_LIST_ITEM_TV(li)->vval.v_list;
|
||||
} else {
|
||||
retlist = fmatchlist;
|
||||
}
|
||||
|
||||
// Copy the matching strings to the return list
|
||||
for (int i = 0; i < match_count; i++) {
|
||||
tv_list_append_tv(retlist, TV_LIST_ITEM_TV(items[i].item));
|
||||
}
|
||||
|
||||
// next copy the list of matching positions
|
||||
if (retmatchpos) {
|
||||
const listitem_T *li = tv_list_find(fmatchlist, -2);
|
||||
assert(li != NULL && TV_LIST_ITEM_TV(li)->vval.v_list != NULL);
|
||||
retlist = TV_LIST_ITEM_TV(li)->vval.v_list;
|
||||
|
||||
for (int i = 0; i < match_count; i++) {
|
||||
assert(items[i].lmatchpos != NULL);
|
||||
tv_list_append_list(retlist, items[i].lmatchpos);
|
||||
}
|
||||
|
||||
// copy the matching scores
|
||||
li = tv_list_find(fmatchlist, -1);
|
||||
assert(li != NULL && TV_LIST_ITEM_TV(li)->vval.v_list != NULL);
|
||||
retlist = TV_LIST_ITEM_TV(li)->vval.v_list;
|
||||
for (int i = 0; i < match_count; i++) {
|
||||
tv_list_append_number(retlist, items[i].score);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
for (int i = 0; i < match_count; i++) {
|
||||
if (items[i].itemstr_allocated) {
|
||||
xfree(items[i].itemstr);
|
||||
}
|
||||
}
|
||||
xfree(items);
|
||||
}
|
||||
|
||||
/// Do fuzzy matching. Returns the list of matched strings in "rettv".
|
||||
/// If "retmatchpos" is true, also returns the matching character positions.
|
||||
static void do_fuzzymatch(const typval_T *const argvars, typval_T *const rettv,
|
||||
const bool retmatchpos)
|
||||
FUNC_ATTR_NONNULL_ALL
|
||||
{
|
||||
// validate and get the arguments
|
||||
if (argvars[0].v_type != VAR_LIST || argvars[0].vval.v_list == NULL) {
|
||||
semsg(_(e_listarg), retmatchpos ? "matchfuzzypos()" : "matchfuzzy()");
|
||||
return;
|
||||
}
|
||||
if (argvars[1].v_type != VAR_STRING || argvars[1].vval.v_string == NULL) {
|
||||
semsg(_(e_invarg2), tv_get_string(&argvars[1]));
|
||||
return;
|
||||
}
|
||||
|
||||
Callback cb = CALLBACK_NONE;
|
||||
const char *key = NULL;
|
||||
bool matchseq = false;
|
||||
int max_matches = 0;
|
||||
if (argvars[2].v_type != VAR_UNKNOWN) {
|
||||
if (tv_check_for_nonnull_dict_arg(argvars, 2) == FAIL) {
|
||||
return;
|
||||
}
|
||||
|
||||
// To search a dict, either a callback function or a key can be
|
||||
// specified.
|
||||
dict_T *const d = argvars[2].vval.v_dict;
|
||||
const dictitem_T *di;
|
||||
if ((di = tv_dict_find(d, "key", -1)) != NULL) {
|
||||
if (di->di_tv.v_type != VAR_STRING || di->di_tv.vval.v_string == NULL
|
||||
|| *di->di_tv.vval.v_string == NUL) {
|
||||
semsg(_(e_invargNval), "key", tv_get_string(&di->di_tv));
|
||||
return;
|
||||
}
|
||||
key = tv_get_string(&di->di_tv);
|
||||
} else if (!tv_dict_get_callback(d, "text_cb", -1, &cb)) {
|
||||
semsg(_(e_invargval), "text_cb");
|
||||
return;
|
||||
}
|
||||
|
||||
if ((di = tv_dict_find(d, "limit", -1)) != NULL) {
|
||||
if (di->di_tv.v_type != VAR_NUMBER) {
|
||||
semsg(_(e_invargval), "limit");
|
||||
return;
|
||||
}
|
||||
max_matches = (int)tv_get_number_chk(&di->di_tv, NULL);
|
||||
}
|
||||
|
||||
if (tv_dict_has_key(d, "matchseq")) {
|
||||
matchseq = true;
|
||||
}
|
||||
}
|
||||
|
||||
// get the fuzzy matches
|
||||
tv_list_alloc_ret(rettv, retmatchpos ? 3 : kListLenUnknown);
|
||||
if (retmatchpos) {
|
||||
// For matchfuzzypos(), a list with three items are returned. First
|
||||
// item is a list of matching strings, the second item is a list of
|
||||
// lists with matching positions within each string and the third item
|
||||
// is the list of scores of the matches.
|
||||
tv_list_append_list(rettv->vval.v_list, tv_list_alloc(kListLenUnknown));
|
||||
tv_list_append_list(rettv->vval.v_list, tv_list_alloc(kListLenUnknown));
|
||||
tv_list_append_list(rettv->vval.v_list, tv_list_alloc(kListLenUnknown));
|
||||
}
|
||||
|
||||
fuzzy_match_in_list(argvars[0].vval.v_list, (char *)tv_get_string(&argvars[1]),
|
||||
matchseq, key, &cb, retmatchpos, rettv->vval.v_list, max_matches);
|
||||
|
||||
callback_free(&cb);
|
||||
}
|
||||
|
||||
/// "matchfuzzy()" function
|
||||
void f_matchfuzzy(typval_T *argvars, typval_T *rettv, EvalFuncData fptr)
|
||||
{
|
||||
do_fuzzymatch(argvars, rettv, false);
|
||||
}
|
||||
|
||||
/// "matchfuzzypos()" function
|
||||
void f_matchfuzzypos(typval_T *argvars, typval_T *rettv, EvalFuncData fptr)
|
||||
{
|
||||
do_fuzzymatch(argvars, rettv, true);
|
||||
}
|
||||
|
||||
/// Same as fuzzy_match_item_compare() except for use with a string match
|
||||
static int fuzzy_match_str_compare(const void *const s1, const void *const s2)
|
||||
FUNC_ATTR_WARN_UNUSED_RESULT FUNC_ATTR_NONNULL_ALL FUNC_ATTR_PURE
|
||||
{
|
||||
const int v1 = ((fuzmatch_str_T *)s1)->score;
|
||||
const int v2 = ((fuzmatch_str_T *)s2)->score;
|
||||
const int idx1 = ((fuzmatch_str_T *)s1)->idx;
|
||||
const int idx2 = ((fuzmatch_str_T *)s2)->idx;
|
||||
|
||||
if (v1 == v2) {
|
||||
return idx1 == idx2 ? 0 : idx1 > idx2 ? 1 : -1;
|
||||
} else {
|
||||
return v1 > v2 ? -1 : 1;
|
||||
}
|
||||
}
|
||||
|
||||
/// Sort fuzzy matches by score
|
||||
static void fuzzy_match_str_sort(fuzmatch_str_T *const fm, const int sz)
|
||||
FUNC_ATTR_NONNULL_ALL
|
||||
{
|
||||
// Sort the list by the descending order of the match score
|
||||
qsort(fm, (size_t)sz, sizeof(fuzmatch_str_T), fuzzy_match_str_compare);
|
||||
}
|
||||
|
||||
/// Same as fuzzy_match_item_compare() except for use with a function name
|
||||
/// string match. <SNR> functions should be sorted to the end.
|
||||
static int fuzzy_match_func_compare(const void *const s1, const void *const s2)
|
||||
FUNC_ATTR_WARN_UNUSED_RESULT FUNC_ATTR_NONNULL_ALL FUNC_ATTR_PURE
|
||||
{
|
||||
const int v1 = ((fuzmatch_str_T *)s1)->score;
|
||||
const int v2 = ((fuzmatch_str_T *)s2)->score;
|
||||
const int idx1 = ((fuzmatch_str_T *)s1)->idx;
|
||||
const int idx2 = ((fuzmatch_str_T *)s2)->idx;
|
||||
const char *const str1 = ((fuzmatch_str_T *)s1)->str;
|
||||
const char *const str2 = ((fuzmatch_str_T *)s2)->str;
|
||||
|
||||
if (*str1 != '<' && *str2 == '<') {
|
||||
return -1;
|
||||
}
|
||||
if (*str1 == '<' && *str2 != '<') {
|
||||
return 1;
|
||||
}
|
||||
if (v1 == v2) {
|
||||
return idx1 == idx2 ? 0 : idx1 > idx2 ? 1 : -1;
|
||||
}
|
||||
return v1 > v2 ? -1 : 1;
|
||||
}
|
||||
|
||||
/// Sort fuzzy matches of function names by score.
|
||||
/// <SNR> functions should be sorted to the end.
|
||||
static void fuzzy_match_func_sort(fuzmatch_str_T *const fm, const int sz)
|
||||
FUNC_ATTR_NONNULL_ALL
|
||||
{
|
||||
// Sort the list by the descending order of the match score
|
||||
qsort(fm, (size_t)sz, sizeof(fuzmatch_str_T), fuzzy_match_func_compare);
|
||||
}
|
||||
|
||||
/// Fuzzy match "pat" in "str".
|
||||
/// @returns 0 if there is no match. Otherwise, returns the match score.
|
||||
int fuzzy_match_str(char *const str, const char *const pat)
|
||||
FUNC_ATTR_WARN_UNUSED_RESULT
|
||||
{
|
||||
if (str == NULL || pat == NULL) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
int score = FUZZY_SCORE_NONE;
|
||||
uint32_t matchpos[FUZZY_MATCH_MAX_LEN];
|
||||
fuzzy_match(str, pat, true, &score, matchpos, ARRAY_SIZE(matchpos));
|
||||
|
||||
return score;
|
||||
}
|
||||
|
||||
/// Fuzzy match the position of string "pat" in string "str".
|
||||
/// @returns a dynamic array of matching positions. If there is no match, returns NULL.
|
||||
garray_T *fuzzy_match_str_with_pos(char *const str, const char *const pat)
|
||||
{
|
||||
if (str == NULL || pat == NULL) {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
garray_T *match_positions = xmalloc(sizeof(garray_T));
|
||||
ga_init(match_positions, sizeof(uint32_t), 10);
|
||||
|
||||
int score = FUZZY_SCORE_NONE;
|
||||
uint32_t matches[FUZZY_MATCH_MAX_LEN];
|
||||
if (!fuzzy_match(str, pat, false, &score, matches, FUZZY_MATCH_MAX_LEN)
|
||||
|| score == FUZZY_SCORE_NONE) {
|
||||
ga_clear(match_positions);
|
||||
xfree(match_positions);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
int j = 0;
|
||||
for (const char *p = pat; *p != NUL; MB_PTR_ADV(p)) {
|
||||
if (!ascii_iswhite(utf_ptr2char(p))) {
|
||||
GA_APPEND(uint32_t, match_positions, matches[j]);
|
||||
j++;
|
||||
}
|
||||
}
|
||||
|
||||
return match_positions;
|
||||
}
|
||||
|
||||
/// This function splits the line pointed to by `*ptr` into words and performs
|
||||
/// a fuzzy match for the pattern `pat` on each word. It iterates through the
|
||||
/// line, moving `*ptr` to the start of each word during the process.
|
||||
///
|
||||
/// If a match is found:
|
||||
/// - `*ptr` points to the start of the matched word.
|
||||
/// - `*len` is set to the length of the matched word.
|
||||
/// - `*score` contains the match score.
|
||||
///
|
||||
/// If no match is found, `*ptr` is updated to the end of the line.
|
||||
bool fuzzy_match_str_in_line(char **ptr, char *pat, int *len, pos_T *current_pos, int *score)
|
||||
{
|
||||
char *str = *ptr;
|
||||
char *strBegin = str;
|
||||
char *end = NULL;
|
||||
char *start = NULL;
|
||||
bool found = false;
|
||||
|
||||
if (str == NULL || pat == NULL) {
|
||||
return found;
|
||||
}
|
||||
char *line_end = find_line_end(str);
|
||||
|
||||
while (str < line_end) {
|
||||
// Skip non-word characters
|
||||
start = find_word_start(str);
|
||||
if (*start == NUL) {
|
||||
break;
|
||||
}
|
||||
end = find_word_end(start);
|
||||
|
||||
// Extract the word from start to end
|
||||
char save_end = *end;
|
||||
*end = NUL;
|
||||
|
||||
// Perform fuzzy match
|
||||
*score = fuzzy_match_str(start, pat);
|
||||
*end = save_end;
|
||||
|
||||
if (*score != FUZZY_SCORE_NONE) {
|
||||
*len = (int)(end - start);
|
||||
found = true;
|
||||
*ptr = start;
|
||||
if (current_pos) {
|
||||
current_pos->col += (int)(end - strBegin);
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
// Move to the end of the current word for the next iteration
|
||||
str = end;
|
||||
// Ensure we continue searching after the current word
|
||||
while (*str != NUL && !vim_iswordp(str)) {
|
||||
MB_PTR_ADV(str);
|
||||
}
|
||||
}
|
||||
|
||||
if (!found) {
|
||||
*ptr = line_end;
|
||||
}
|
||||
|
||||
return found;
|
||||
}
|
||||
|
||||
/// Search for the next fuzzy match in the specified buffer.
|
||||
/// This function attempts to find the next occurrence of the given pattern
|
||||
/// in the buffer, starting from the current position. It handles line wrapping
|
||||
/// and direction of search.
|
||||
///
|
||||
/// Return true if a match is found, otherwise false.
|
||||
bool search_for_fuzzy_match(buf_T *buf, pos_T *pos, char *pattern, int dir, pos_T *start_pos,
|
||||
int *len, char **ptr, int *score)
|
||||
{
|
||||
pos_T current_pos = *pos;
|
||||
pos_T circly_end;
|
||||
bool found_new_match = false;
|
||||
bool looped_around = false;
|
||||
|
||||
bool whole_line = ctrl_x_mode_whole_line();
|
||||
|
||||
if (buf == curbuf) {
|
||||
circly_end = *start_pos;
|
||||
} else {
|
||||
circly_end.lnum = buf->b_ml.ml_line_count;
|
||||
circly_end.col = 0;
|
||||
circly_end.coladd = 0;
|
||||
}
|
||||
|
||||
if (whole_line && start_pos->lnum != pos->lnum) {
|
||||
current_pos.lnum += dir;
|
||||
}
|
||||
|
||||
while (true) {
|
||||
// Check if looped around and back to start position
|
||||
if (looped_around && equalpos(current_pos, circly_end)) {
|
||||
break;
|
||||
}
|
||||
|
||||
// Ensure current_pos is valid
|
||||
if (current_pos.lnum >= 1 && current_pos.lnum <= buf->b_ml.ml_line_count) {
|
||||
// Get the current line buffer
|
||||
*ptr = ml_get_buf(buf, current_pos.lnum);
|
||||
if (!whole_line) {
|
||||
*ptr += current_pos.col;
|
||||
}
|
||||
|
||||
// If ptr is end of line is reached, move to next line
|
||||
// or previous line based on direction
|
||||
if (*ptr != NULL && **ptr != NUL) {
|
||||
if (!whole_line) {
|
||||
// Try to find a fuzzy match in the current line starting
|
||||
// from current position
|
||||
found_new_match = fuzzy_match_str_in_line(ptr, pattern,
|
||||
len, ¤t_pos, score);
|
||||
if (found_new_match) {
|
||||
*pos = current_pos;
|
||||
break;
|
||||
} else if (looped_around && current_pos.lnum == circly_end.lnum) {
|
||||
break;
|
||||
}
|
||||
} else {
|
||||
if (fuzzy_match_str(*ptr, pattern) != FUZZY_SCORE_NONE) {
|
||||
found_new_match = true;
|
||||
*pos = current_pos;
|
||||
*len = ml_get_buf_len(buf, current_pos.lnum);
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Move to the next line or previous line based on direction
|
||||
if (dir == FORWARD) {
|
||||
if (++current_pos.lnum > buf->b_ml.ml_line_count) {
|
||||
if (p_ws) {
|
||||
current_pos.lnum = 1;
|
||||
looped_around = true;
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
}
|
||||
} else {
|
||||
if (--current_pos.lnum < 1) {
|
||||
if (p_ws) {
|
||||
current_pos.lnum = buf->b_ml.ml_line_count;
|
||||
looped_around = true;
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
current_pos.col = 0;
|
||||
}
|
||||
|
||||
return found_new_match;
|
||||
}
|
||||
|
||||
/// Free an array of fuzzy string matches "fuzmatch[count]".
|
||||
void fuzmatch_str_free(fuzmatch_str_T *const fuzmatch, int count)
|
||||
{
|
||||
if (fuzmatch == NULL) {
|
||||
return;
|
||||
}
|
||||
for (int i = 0; i < count; i++) {
|
||||
xfree(fuzmatch[count].str);
|
||||
}
|
||||
xfree(fuzmatch);
|
||||
}
|
||||
|
||||
/// Copy a list of fuzzy matches into a string list after sorting the matches by
|
||||
/// the fuzzy score. Frees the memory allocated for "fuzmatch".
|
||||
void fuzzymatches_to_strmatches(fuzmatch_str_T *const fuzmatch, char ***const matches,
|
||||
const int count, const bool funcsort)
|
||||
FUNC_ATTR_NONNULL_ARG(2)
|
||||
{
|
||||
if (count <= 0) {
|
||||
return;
|
||||
}
|
||||
|
||||
*matches = xmalloc((size_t)count * sizeof(char *));
|
||||
|
||||
// Sort the list by the descending order of the match score
|
||||
if (funcsort) {
|
||||
fuzzy_match_func_sort(fuzmatch, count);
|
||||
} else {
|
||||
fuzzy_match_str_sort(fuzmatch, count);
|
||||
}
|
||||
|
||||
for (int i = 0; i < count; i++) {
|
||||
(*matches)[i] = fuzmatch[i].str;
|
||||
}
|
||||
xfree(fuzmatch);
|
||||
}
|
||||
|
||||
/// Fuzzy match algorithm ported from https://github.com/jhawthorn/fzy.
|
||||
/// This implementation extends the original by supporting multibyte characters.
|
||||
|
||||
#define MATCH_MAX_LEN FUZZY_MATCH_MAX_LEN
|
||||
|
||||
#define SCORE_GAP_LEADING -0.005
|
||||
#define SCORE_GAP_TRAILING -0.005
|
||||
#define SCORE_GAP_INNER -0.01
|
||||
#define SCORE_MATCH_CONSECUTIVE 1.0
|
||||
#define SCORE_MATCH_SLASH 0.9
|
||||
#define SCORE_MATCH_WORD 0.8
|
||||
#define SCORE_MATCH_CAPITAL 0.7
|
||||
#define SCORE_MATCH_DOT 0.6
|
||||
|
||||
static int has_match(const char *needle, const char *haystack)
|
||||
{
|
||||
while (*needle != NUL) {
|
||||
const int n_char = utf_ptr2char(needle);
|
||||
const char *p = haystack;
|
||||
bool matched = false;
|
||||
|
||||
while (*p != NUL) {
|
||||
const int h_char = utf_ptr2char(p);
|
||||
|
||||
if (n_char == h_char || mb_toupper(n_char) == h_char) {
|
||||
matched = true;
|
||||
break;
|
||||
}
|
||||
p += utfc_ptr2len(p);
|
||||
}
|
||||
|
||||
if (!matched) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
needle += utfc_ptr2len(needle);
|
||||
haystack = p + utfc_ptr2len(p);
|
||||
}
|
||||
return 1;
|
||||
}
|
||||
|
||||
struct match_struct {
|
||||
int needle_len;
|
||||
int haystack_len;
|
||||
int lower_needle[MATCH_MAX_LEN]; ///< stores codepoints
|
||||
int lower_haystack[MATCH_MAX_LEN]; ///< stores codepoints
|
||||
score_t match_bonus[MATCH_MAX_LEN];
|
||||
};
|
||||
|
||||
#define IS_WORD_SEP(c) ((c) == '-' || (c) == '_' || (c) == ' ')
|
||||
#define IS_PATH_SEP(c) ((c) == '/')
|
||||
#define IS_DOT(c) ((c) == '.')
|
||||
|
||||
static score_t compute_bonus_codepoint(int last_c, int c)
|
||||
{
|
||||
if (ASCII_ISALNUM(c) || vim_iswordc(c)) {
|
||||
if (IS_PATH_SEP(last_c)) {
|
||||
return SCORE_MATCH_SLASH;
|
||||
}
|
||||
if (IS_WORD_SEP(last_c)) {
|
||||
return SCORE_MATCH_WORD;
|
||||
}
|
||||
if (IS_DOT(last_c)) {
|
||||
return SCORE_MATCH_DOT;
|
||||
}
|
||||
if (mb_isupper(c) && mb_islower(last_c)) {
|
||||
return SCORE_MATCH_CAPITAL;
|
||||
}
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void setup_match_struct(match_struct *const match, const char *const needle,
|
||||
const char *const haystack)
|
||||
{
|
||||
int i = 0;
|
||||
const char *p = needle;
|
||||
while (*p != NUL && i < MATCH_MAX_LEN) {
|
||||
const int c = utf_ptr2char(p);
|
||||
match->lower_needle[i++] = mb_tolower(c);
|
||||
MB_PTR_ADV(p);
|
||||
}
|
||||
match->needle_len = i;
|
||||
|
||||
i = 0;
|
||||
p = haystack;
|
||||
int prev_c = '/';
|
||||
while (*p != NUL && i < MATCH_MAX_LEN) {
|
||||
const int c = utf_ptr2char(p);
|
||||
match->lower_haystack[i] = mb_tolower(c);
|
||||
match->match_bonus[i] = compute_bonus_codepoint(prev_c, c);
|
||||
prev_c = c;
|
||||
MB_PTR_ADV(p);
|
||||
i++;
|
||||
}
|
||||
match->haystack_len = i;
|
||||
}
|
||||
|
||||
static inline void match_row(const match_struct *match, int row, score_t *curr_D, score_t *curr_M,
|
||||
const score_t *last_D, const score_t *last_M)
|
||||
{
|
||||
int n = match->needle_len;
|
||||
int m = match->haystack_len;
|
||||
int i = row;
|
||||
|
||||
const int *lower_needle = match->lower_needle;
|
||||
const int *lower_haystack = match->lower_haystack;
|
||||
const score_t *match_bonus = match->match_bonus;
|
||||
|
||||
score_t prev_score = (score_t)SCORE_MIN;
|
||||
score_t gap_score = i == n - 1 ? SCORE_GAP_TRAILING : SCORE_GAP_INNER;
|
||||
|
||||
// These will not be used with this value, but not all compilers see it
|
||||
score_t prev_M = (score_t)SCORE_MIN, prev_D = (score_t)SCORE_MIN;
|
||||
|
||||
for (int j = 0; j < m; j++) {
|
||||
if (lower_needle[i] == lower_haystack[j]) {
|
||||
score_t score = (score_t)SCORE_MIN;
|
||||
if (!i) {
|
||||
score = (j * SCORE_GAP_LEADING) + match_bonus[j];
|
||||
} else if (j) { // i > 0 && j > 0
|
||||
score = MAX(prev_M + match_bonus[j],
|
||||
// consecutive match, doesn't stack with match_bonus
|
||||
prev_D + SCORE_MATCH_CONSECUTIVE);
|
||||
}
|
||||
prev_D = last_D[j];
|
||||
prev_M = last_M[j];
|
||||
curr_D[j] = score;
|
||||
curr_M[j] = prev_score = MAX(score, prev_score + gap_score);
|
||||
} else {
|
||||
prev_D = last_D[j];
|
||||
prev_M = last_M[j];
|
||||
curr_D[j] = (score_t)SCORE_MIN;
|
||||
curr_M[j] = prev_score = prev_score + gap_score;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static score_t match_positions(const char *const needle, const char *const haystack,
|
||||
uint32_t *const positions)
|
||||
{
|
||||
if (!*needle) {
|
||||
return (score_t)SCORE_MIN;
|
||||
}
|
||||
|
||||
match_struct match;
|
||||
setup_match_struct(&match, needle, haystack);
|
||||
|
||||
int n = match.needle_len;
|
||||
int m = match.haystack_len;
|
||||
|
||||
if (m > MATCH_MAX_LEN || n > m) {
|
||||
// Unreasonably large candidate: return no score
|
||||
// If it is a valid match it will still be returned, it will
|
||||
// just be ranked below any reasonably sized candidates
|
||||
return (score_t)SCORE_MIN;
|
||||
} else if (n == m) {
|
||||
// Since this method can only be called with a haystack which
|
||||
// matches needle. If the lengths of the strings are equal the
|
||||
// strings themselves must also be equal (ignoring case).
|
||||
if (positions) {
|
||||
for (int i = 0; i < n; i++) {
|
||||
positions[i] = (uint32_t)i;
|
||||
}
|
||||
}
|
||||
return (score_t)SCORE_MAX;
|
||||
}
|
||||
|
||||
// D[][] Stores the best score for this position ending with a match.
|
||||
// M[][] Stores the best possible score at this position.
|
||||
score_t(*D)[MATCH_MAX_LEN] = xmalloc(sizeof(score_t) * MATCH_MAX_LEN * (size_t)n);
|
||||
score_t(*M)[MATCH_MAX_LEN] = xmalloc(sizeof(score_t) * MATCH_MAX_LEN * (size_t)n);
|
||||
|
||||
match_row(&match, 0, D[0], M[0], D[0], M[0]);
|
||||
for (int i = 1; i < n; i++) {
|
||||
match_row(&match, i, D[i], M[i], D[i - 1], M[i - 1]);
|
||||
}
|
||||
|
||||
// backtrace to find the positions of optimal matching
|
||||
if (positions) {
|
||||
int match_required = 0;
|
||||
for (int i = n - 1, j = m - 1; i >= 0; i--) {
|
||||
for (; j >= 0; j--) {
|
||||
// There may be multiple paths which result in
|
||||
// the optimal weight.
|
||||
//
|
||||
// For simplicity, we will pick the first one
|
||||
// we encounter, the latest in the candidate
|
||||
// string.
|
||||
if (D[i][j] != (score_t)SCORE_MIN
|
||||
&& (match_required || D[i][j] == M[i][j])) {
|
||||
// If this score was determined using
|
||||
// SCORE_MATCH_CONSECUTIVE, the
|
||||
// previous character MUST be a match
|
||||
match_required =
|
||||
i && j
|
||||
&& M[i][j] == D[i - 1][j - 1] + SCORE_MATCH_CONSECUTIVE;
|
||||
positions[i] = (uint32_t)(j--);
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
score_t result = M[n - 1][m - 1];
|
||||
|
||||
xfree(M);
|
||||
xfree(D);
|
||||
|
||||
return result;
|
||||
}
|
25
src/nvim/fuzzy.h
Normal file
25
src/nvim/fuzzy.h
Normal file
@@ -0,0 +1,25 @@
|
||||
#pragma once
|
||||
|
||||
#include <limits.h>
|
||||
#include <stdint.h> // IWYU pragma: keep
|
||||
|
||||
#include "nvim/buffer_defs.h" // IWYU pragma: keep
|
||||
#include "nvim/eval/typval_defs.h" // IWYU pragma: keep
|
||||
#include "nvim/garray_defs.h" // IWYU pragma: keep
|
||||
#include "nvim/pos_defs.h" // IWYU pragma: keep
|
||||
#include "nvim/types_defs.h" // IWYU pragma: keep
|
||||
|
||||
enum { FUZZY_MATCH_MAX_LEN = 1024, }; ///< max characters that can be matched
|
||||
enum { FUZZY_SCORE_NONE = INT_MIN, }; ///< invalid fuzzy score
|
||||
|
||||
/// Fuzzy matched string list item. Used for fuzzy match completion. Items are
|
||||
/// usually sorted by "score". The "idx" member is used for stable-sort.
|
||||
typedef struct {
|
||||
int idx;
|
||||
char *str;
|
||||
int score;
|
||||
} fuzmatch_str_T;
|
||||
|
||||
#ifdef INCLUDE_GENERATED_DECLARATIONS
|
||||
# include "fuzzy.h.generated.h"
|
||||
#endif
|
@@ -34,6 +34,7 @@
|
||||
#include "nvim/extmark.h"
|
||||
#include "nvim/extmark_defs.h"
|
||||
#include "nvim/fileio.h"
|
||||
#include "nvim/fuzzy.h"
|
||||
#include "nvim/garray.h"
|
||||
#include "nvim/garray_defs.h"
|
||||
#include "nvim/getchar.h"
|
||||
@@ -1000,7 +1001,7 @@ static int ins_compl_add(char *const str, int len, char *const fname, char *cons
|
||||
// current match in the list of matches .
|
||||
if (compl_first_match == NULL) {
|
||||
match->cp_next = match->cp_prev = NULL;
|
||||
} else if (cfc_has_mode() && score > 0 && compl_get_longest) {
|
||||
} else if (cfc_has_mode() && score != FUZZY_SCORE_NONE && compl_get_longest) {
|
||||
compl_T *current = compl_first_match->cp_next;
|
||||
compl_T *prev = compl_first_match;
|
||||
inserted = false;
|
||||
@@ -1188,7 +1189,8 @@ static void ins_compl_add_matches(int num_matches, char **matches, int icase)
|
||||
|
||||
for (int i = 0; i < num_matches && add_r != FAIL; i++) {
|
||||
add_r = ins_compl_add(matches[i], -1, NULL, NULL, false, NULL, dir,
|
||||
CP_FAST | (icase ? CP_ICASE : 0), false, NULL, 0);
|
||||
CP_FAST | (icase ? CP_ICASE : 0), false, NULL,
|
||||
FUZZY_SCORE_NONE);
|
||||
if (add_r == OK) {
|
||||
// If dir was BACKWARD then honor it just once.
|
||||
dir = FORWARD;
|
||||
@@ -1356,7 +1358,7 @@ static int cp_compare_nearest(const void *a, const void *b)
|
||||
{
|
||||
int score_a = ((compl_T *)a)->cp_score;
|
||||
int score_b = ((compl_T *)b)->cp_score;
|
||||
if (score_a == 0 || score_b == 0) {
|
||||
if (score_a == FUZZY_SCORE_NONE || score_b == FUZZY_SCORE_NONE) {
|
||||
return 0;
|
||||
}
|
||||
return (score_a > score_b) ? 1 : (score_a < score_b) ? -1 : 0;
|
||||
@@ -1524,7 +1526,7 @@ static int ins_compl_build_pum(void)
|
||||
if (!match_at_original_text(comp)
|
||||
&& (leader->data == NULL
|
||||
|| ins_compl_equal(comp, leader->data, leader->size)
|
||||
|| (fuzzy_filter && comp->cp_score > 0))) {
|
||||
|| (fuzzy_filter && comp->cp_score != FUZZY_SCORE_NONE))) {
|
||||
// Limit number of items from each source if max_items is set.
|
||||
bool match_limit_exceeded = false;
|
||||
int cur_source = comp->cp_cpt_source_idx;
|
||||
@@ -1863,7 +1865,7 @@ static int thesaurus_add_words_in_line(char *fname, char **buf_arg, int dir, con
|
||||
// Add the word. Skip the regexp match.
|
||||
if (wstart != skip_word) {
|
||||
status = ins_compl_add_infercase(wstart, (int)(ptr - wstart), p_ic,
|
||||
fname, dir, false, 0);
|
||||
fname, dir, false, FUZZY_SCORE_NONE);
|
||||
if (status == FAIL) {
|
||||
break;
|
||||
}
|
||||
@@ -1909,7 +1911,8 @@ static void ins_compl_files(int count, char **files, bool thesaurus, int flags,
|
||||
ptr = ctrl_x_mode_line_or_eval() ? find_line_end(ptr) : find_word_end(ptr);
|
||||
int add_r = ins_compl_add_infercase(regmatch->startp[0],
|
||||
(int)(ptr - regmatch->startp[0]),
|
||||
p_ic, files[i], *dir, false, 0);
|
||||
p_ic, files[i], *dir, false,
|
||||
FUZZY_SCORE_NONE);
|
||||
if (thesaurus) {
|
||||
// For a thesaurus, add all the words in the line
|
||||
ptr = buf;
|
||||
@@ -3218,7 +3221,7 @@ static int ins_compl_add_tv(typval_T *const tv, const Direction dir, bool fast)
|
||||
return FAIL;
|
||||
}
|
||||
int status = ins_compl_add((char *)word, -1, NULL, cptext, true,
|
||||
&user_data, dir, flags, dup, user_hl, 0);
|
||||
&user_data, dir, flags, dup, user_hl, FUZZY_SCORE_NONE);
|
||||
if (status != OK) {
|
||||
tv_clear(&user_data);
|
||||
}
|
||||
@@ -3314,7 +3317,7 @@ static void set_completion(colnr_T startcol, list_T *list)
|
||||
}
|
||||
if (ins_compl_add(compl_orig_text.data, (int)compl_orig_text.size,
|
||||
NULL, NULL, false, NULL, 0,
|
||||
flags | CP_FAST, false, NULL, 0) != OK) {
|
||||
flags | CP_FAST, false, NULL, FUZZY_SCORE_NONE) != OK) {
|
||||
return;
|
||||
}
|
||||
|
||||
@@ -4072,7 +4075,7 @@ static void get_next_filename_completion(void)
|
||||
for (int i = 0; i < num_matches; i++) {
|
||||
char *ptr = matches[i];
|
||||
int score = fuzzy_match_str(ptr, leader);
|
||||
if (score > 0) {
|
||||
if (score != FUZZY_SCORE_NONE) {
|
||||
GA_APPEND(int, &fuzzy_indices, i);
|
||||
compl_fuzzy_scores[i] = score;
|
||||
}
|
||||
@@ -4245,7 +4248,7 @@ static int get_next_default_completion(ins_compl_next_state_T *st, pos_T *start_
|
||||
bool in_fuzzy_collect = (cfc_has_mode() && compl_length > 0)
|
||||
|| ((get_cot_flags() & kOptCotFlagFuzzy) && compl_autocomplete);
|
||||
char *leader = ins_compl_leader();
|
||||
int score = 0;
|
||||
int score = FUZZY_SCORE_NONE;
|
||||
const bool in_curbuf = st->ins_buf == curbuf;
|
||||
|
||||
// If 'infercase' is set, don't use 'smartcase' here
|
||||
@@ -4343,7 +4346,6 @@ static int get_next_default_completion(ins_compl_next_state_T *st, pos_T *start_
|
||||
if (score < 0) {
|
||||
score = -score;
|
||||
}
|
||||
score++;
|
||||
}
|
||||
|
||||
if (ins_compl_add_infercase(ptr, len, p_ic,
|
||||
@@ -4401,7 +4403,8 @@ static void get_register_completion(void)
|
||||
compl_orig_text.size) == 0
|
||||
: strncmp(str, compl_orig_text.data,
|
||||
compl_orig_text.size) == 0)) {
|
||||
if (ins_compl_add_infercase(str, str_len, p_ic, NULL, dir, false, 0) == OK) {
|
||||
if (ins_compl_add_infercase(str, str_len, p_ic, NULL,
|
||||
dir, false, FUZZY_SCORE_NONE) == OK) {
|
||||
dir = FORWARD;
|
||||
}
|
||||
}
|
||||
@@ -4435,7 +4438,7 @@ static void get_register_completion(void)
|
||||
: strncmp(p, compl_orig_text.data,
|
||||
compl_orig_text.size) == 0))) {
|
||||
if (ins_compl_add_infercase(p, len, p_ic, NULL,
|
||||
dir, false, 0) == OK) {
|
||||
dir, false, FUZZY_SCORE_NONE) == OK) {
|
||||
dir = FORWARD;
|
||||
}
|
||||
}
|
||||
@@ -4553,7 +4556,7 @@ static void get_next_bufname_token(void)
|
||||
char *tail = path_tail(b->b_sfname);
|
||||
if (strncmp(tail, compl_orig_text.data, compl_orig_text.size) == 0) {
|
||||
ins_compl_add(tail, (int)strlen(tail), NULL, NULL, false, NULL, 0,
|
||||
p_ic ? CP_ICASE : 0, false, NULL, 0);
|
||||
p_ic ? CP_ICASE : 0, false, NULL, FUZZY_SCORE_NONE);
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -4796,7 +4799,8 @@ static int ins_compl_get_exp(pos_T *ini)
|
||||
// For `^P` completion, reset `compl_curr_match` to the head to avoid
|
||||
// mixing matches from different sources.
|
||||
if (!compl_dir_forward()) {
|
||||
while (compl_curr_match->cp_prev) {
|
||||
while (compl_curr_match->cp_prev
|
||||
&& !match_at_original_text(compl_curr_match->cp_prev)) {
|
||||
compl_curr_match = compl_curr_match->cp_prev;
|
||||
}
|
||||
}
|
||||
@@ -5145,7 +5149,7 @@ static int find_next_completion_match(bool allow_get_expansion, int todo, bool a
|
||||
if (!match_at_original_text(compl_shown_match)
|
||||
&& leader->data != NULL
|
||||
&& !ins_compl_equal(compl_shown_match, leader->data, leader->size)
|
||||
&& !(compl_fuzzy_match && compl_shown_match->cp_score > 0)) {
|
||||
&& !(compl_fuzzy_match && compl_shown_match->cp_score != FUZZY_SCORE_NONE)) {
|
||||
todo++;
|
||||
} else {
|
||||
// Remember a matching item.
|
||||
@@ -5919,7 +5923,7 @@ static int ins_compl_start(void)
|
||||
}
|
||||
if (ins_compl_add(compl_orig_text.data, (int)compl_orig_text.size,
|
||||
NULL, NULL, false, NULL, 0,
|
||||
flags, false, NULL, 0) != OK) {
|
||||
flags, false, NULL, FUZZY_SCORE_NONE) != OK) {
|
||||
API_CLEAR_STRING(compl_pattern);
|
||||
API_CLEAR_STRING(compl_orig_text);
|
||||
kv_destroy(compl_orig_extmarks);
|
||||
|
@@ -27,6 +27,7 @@
|
||||
#include "nvim/eval/userfunc.h"
|
||||
#include "nvim/ex_cmds_defs.h"
|
||||
#include "nvim/ex_session.h"
|
||||
#include "nvim/fuzzy.h"
|
||||
#include "nvim/garray.h"
|
||||
#include "nvim/garray_defs.h"
|
||||
#include "nvim/getchar.h"
|
||||
@@ -50,7 +51,6 @@
|
||||
#include "nvim/regexp.h"
|
||||
#include "nvim/regexp_defs.h"
|
||||
#include "nvim/runtime.h"
|
||||
#include "nvim/search.h"
|
||||
#include "nvim/state_defs.h"
|
||||
#include "nvim/strings.h"
|
||||
#include "nvim/types_defs.h"
|
||||
@@ -1340,7 +1340,7 @@ int ExpandMappings(char *pat, regmatch_T *regmatch, int *numMatches, char ***mat
|
||||
match = vim_regexec(regmatch, p, 0);
|
||||
} else {
|
||||
score = fuzzy_match_str(p, pat);
|
||||
match = (score != 0);
|
||||
match = (score != FUZZY_SCORE_NONE);
|
||||
}
|
||||
|
||||
if (!match) {
|
||||
@@ -1386,7 +1386,7 @@ int ExpandMappings(char *pat, regmatch_T *regmatch, int *numMatches, char ***mat
|
||||
match = vim_regexec(regmatch, p, 0);
|
||||
} else {
|
||||
score = fuzzy_match_str(p, pat);
|
||||
match = (score != 0);
|
||||
match = (score != FUZZY_SCORE_NONE);
|
||||
}
|
||||
|
||||
if (!match) {
|
||||
|
@@ -56,6 +56,7 @@
|
||||
#include "nvim/ex_getln.h"
|
||||
#include "nvim/ex_session.h"
|
||||
#include "nvim/fold.h"
|
||||
#include "nvim/fuzzy.h"
|
||||
#include "nvim/garray.h"
|
||||
#include "nvim/garray_defs.h"
|
||||
#include "nvim/gettext_defs.h"
|
||||
@@ -97,7 +98,6 @@
|
||||
#include "nvim/regexp.h"
|
||||
#include "nvim/regexp_defs.h"
|
||||
#include "nvim/runtime.h"
|
||||
#include "nvim/search.h"
|
||||
#include "nvim/spell.h"
|
||||
#include "nvim/spellfile.h"
|
||||
#include "nvim/spellsuggest.h"
|
||||
@@ -5578,7 +5578,7 @@ static bool match_str(char *const str, regmatch_T *const regmatch, char **const
|
||||
}
|
||||
} else {
|
||||
const int score = fuzzy_match_str(str, fuzzystr);
|
||||
if (score != 0) {
|
||||
if (score != FUZZY_SCORE_NONE) {
|
||||
if (!test_only) {
|
||||
fuzmatch[idx].idx = idx;
|
||||
fuzmatch[idx].str = xstrdup(str);
|
||||
|
@@ -21,8 +21,7 @@
|
||||
#include "nvim/eval/typval.h"
|
||||
#include "nvim/ex_cmds.h"
|
||||
#include "nvim/ex_cmds_defs.h"
|
||||
#include "nvim/extmark.h"
|
||||
#include "nvim/extmark_defs.h"
|
||||
#include "nvim/fuzzy.h"
|
||||
#include "nvim/garray.h"
|
||||
#include "nvim/garray_defs.h"
|
||||
#include "nvim/getchar.h"
|
||||
@@ -33,22 +32,18 @@
|
||||
#include "nvim/highlight_defs.h"
|
||||
#include "nvim/insexpand.h"
|
||||
#include "nvim/keycodes.h"
|
||||
#include "nvim/mark.h"
|
||||
#include "nvim/mbyte.h"
|
||||
#include "nvim/memline.h"
|
||||
#include "nvim/memory.h"
|
||||
#include "nvim/memory_defs.h"
|
||||
#include "nvim/menu.h"
|
||||
#include "nvim/message.h"
|
||||
#include "nvim/move.h"
|
||||
#include "nvim/ops.h"
|
||||
#include "nvim/option.h"
|
||||
#include "nvim/option_defs.h"
|
||||
#include "nvim/option_vars.h"
|
||||
#include "nvim/plines.h"
|
||||
#include "nvim/popupmenu.h"
|
||||
#include "nvim/pos_defs.h"
|
||||
#include "nvim/search.h"
|
||||
#include "nvim/state_defs.h"
|
||||
#include "nvim/strings.h"
|
||||
#include "nvim/types_defs.h"
|
||||
|
@@ -35,6 +35,7 @@
|
||||
#include "nvim/extmark.h"
|
||||
#include "nvim/fileio.h"
|
||||
#include "nvim/fold.h"
|
||||
#include "nvim/fuzzy.h"
|
||||
#include "nvim/garray.h"
|
||||
#include "nvim/garray_defs.h"
|
||||
#include "nvim/gettext_defs.h"
|
||||
@@ -5487,7 +5488,7 @@ static bool vgr_match_buflines(qf_list_T *qfl, char *fname, buf_T *buf, char *sp
|
||||
{
|
||||
bool found_match = false;
|
||||
size_t pat_len = strlen(spat);
|
||||
pat_len = MIN(pat_len, MAX_FUZZY_MATCHES);
|
||||
pat_len = MIN(pat_len, FUZZY_MATCH_MAX_LEN);
|
||||
|
||||
for (linenr_T lnum = 1; lnum <= buf->b_ml.ml_line_count && *tomatch > 0; lnum++) {
|
||||
colnr_T col = 0;
|
||||
@@ -5533,12 +5534,12 @@ static bool vgr_match_buflines(qf_list_T *qfl, char *fname, buf_T *buf, char *sp
|
||||
char *const str = ml_get_buf(buf, lnum);
|
||||
const colnr_T linelen = ml_get_buf_len(buf, lnum);
|
||||
int score;
|
||||
uint32_t matches[MAX_FUZZY_MATCHES];
|
||||
uint32_t matches[FUZZY_MATCH_MAX_LEN];
|
||||
const size_t sz = sizeof(matches) / sizeof(matches[0]);
|
||||
|
||||
// Fuzzy string match
|
||||
CLEAR_FIELD(matches);
|
||||
while (fuzzy_match(str + col, spat, false, &score, matches, (int)sz, true) > 0) {
|
||||
while (fuzzy_match(str + col, spat, false, &score, matches, (int)sz) > 0) {
|
||||
// Pass the buffer number so that it gets used even for a
|
||||
// dummy buffer, unless duplicate_name is set, then the
|
||||
// buffer will be wiped out below.
|
||||
|
@@ -29,8 +29,6 @@
|
||||
#include "nvim/file_search.h"
|
||||
#include "nvim/fileio.h"
|
||||
#include "nvim/fold.h"
|
||||
#include "nvim/garray.h"
|
||||
#include "nvim/garray_defs.h"
|
||||
#include "nvim/getchar.h"
|
||||
#include "nvim/gettext_defs.h"
|
||||
#include "nvim/globals.h"
|
||||
@@ -2910,925 +2908,6 @@ the_end:
|
||||
restore_incsearch_state();
|
||||
}
|
||||
|
||||
/// Fuzzy string matching
|
||||
///
|
||||
/// Ported from the lib_fts library authored by Forrest Smith.
|
||||
/// https://github.com/forrestthewoods/lib_fts/tree/master/code
|
||||
///
|
||||
/// The following blog describes the fuzzy matching algorithm:
|
||||
/// https://www.forrestthewoods.com/blog/reverse_engineering_sublime_texts_fuzzy_match/
|
||||
///
|
||||
/// Each matching string is assigned a score. The following factors are checked:
|
||||
/// - Matched letter
|
||||
/// - Unmatched letter
|
||||
/// - Consecutively matched letters
|
||||
/// - Proximity to start
|
||||
/// - Letter following a separator (space, underscore)
|
||||
/// - Uppercase letter following lowercase (aka CamelCase)
|
||||
///
|
||||
/// Matched letters are good. Unmatched letters are bad. Matching near the start
|
||||
/// is good. Matching the first letter in the middle of a phrase is good.
|
||||
/// Matching the uppercase letters in camel case entries is good.
|
||||
///
|
||||
/// The score assigned for each factor is explained below.
|
||||
/// File paths are different from file names. File extensions may be ignorable.
|
||||
/// Single words care about consecutive matches but not separators or camel
|
||||
/// case.
|
||||
/// Score starts at 100
|
||||
/// Matched letter: +0 points
|
||||
/// Unmatched letter: -1 point
|
||||
/// Consecutive match bonus: +15 points
|
||||
/// First letter bonus: +15 points
|
||||
/// Separator bonus: +30 points
|
||||
/// Camel case bonus: +30 points
|
||||
/// Unmatched leading letter: -5 points (max: -15)
|
||||
///
|
||||
/// There is some nuance to this. Scores don’t have an intrinsic meaning. The
|
||||
/// score range isn’t 0 to 100. It’s roughly [50, 150]. Longer words have a
|
||||
/// lower minimum score due to unmatched letter penalty. Longer search patterns
|
||||
/// have a higher maximum score due to match bonuses.
|
||||
///
|
||||
/// Separator and camel case bonus is worth a LOT. Consecutive matches are worth
|
||||
/// quite a bit.
|
||||
///
|
||||
/// There is a penalty if you DON’T match the first three letters. Which
|
||||
/// effectively rewards matching near the start. However there’s no difference
|
||||
/// in matching between the middle and end.
|
||||
///
|
||||
/// There is not an explicit bonus for an exact match. Unmatched letters receive
|
||||
/// a penalty. So shorter strings and closer matches are worth more.
|
||||
typedef struct {
|
||||
int idx; ///< used for stable sort
|
||||
listitem_T *item;
|
||||
int score;
|
||||
list_T *lmatchpos;
|
||||
} fuzzyItem_T;
|
||||
|
||||
/// bonus for adjacent matches; this is higher than SEPARATOR_BONUS so that
|
||||
/// matching a whole word is preferred.
|
||||
#define SEQUENTIAL_BONUS 40
|
||||
/// bonus if match occurs after a path separator
|
||||
#define PATH_SEPARATOR_BONUS 30
|
||||
/// bonus if match occurs after a word separator
|
||||
#define WORD_SEPARATOR_BONUS 25
|
||||
/// bonus if match is uppercase and prev is lower
|
||||
#define CAMEL_BONUS 30
|
||||
/// bonus if the first letter is matched
|
||||
#define FIRST_LETTER_BONUS 15
|
||||
/// bonus if exact match
|
||||
#define EXACT_MATCH_BONUS 100
|
||||
/// bonus if case match when no ignorecase
|
||||
#define CASE_MATCH_BONUS 25
|
||||
/// penalty applied for every letter in str before the first match
|
||||
#define LEADING_LETTER_PENALTY (-5)
|
||||
/// maximum penalty for leading letters
|
||||
#define MAX_LEADING_LETTER_PENALTY (-15)
|
||||
/// penalty for every letter that doesn't match
|
||||
#define UNMATCHED_LETTER_PENALTY (-1)
|
||||
/// penalty for gap in matching positions (-2 * k)
|
||||
#define GAP_PENALTY (-2)
|
||||
/// Score for a string that doesn't fuzzy match the pattern
|
||||
#define SCORE_NONE (-9999)
|
||||
|
||||
#define FUZZY_MATCH_RECURSION_LIMIT 10
|
||||
|
||||
/// Compute a score for a fuzzy matched string. The matching character locations
|
||||
/// are in "matches".
|
||||
static int fuzzy_match_compute_score(const char *const fuzpat, const char *const str,
|
||||
const int strSz, const uint32_t *const matches,
|
||||
const int numMatches, bool camelcase)
|
||||
FUNC_ATTR_NONNULL_ALL FUNC_ATTR_WARN_UNUSED_RESULT FUNC_ATTR_PURE
|
||||
{
|
||||
assert(numMatches > 0); // suppress clang "result of operation is garbage"
|
||||
const char *p = str;
|
||||
uint32_t sidx = 0;
|
||||
bool is_exact_match = true;
|
||||
const char *const orig_fuzpat = fuzpat - numMatches;
|
||||
const char *curpat = orig_fuzpat;
|
||||
int pat_idx = 0;
|
||||
// Track consecutive camel case matches
|
||||
int consecutive_camel = 0;
|
||||
|
||||
// Initialize score
|
||||
int score = 100;
|
||||
|
||||
// Apply leading letter penalty
|
||||
int penalty = LEADING_LETTER_PENALTY * (int)matches[0];
|
||||
if (penalty < MAX_LEADING_LETTER_PENALTY) {
|
||||
penalty = MAX_LEADING_LETTER_PENALTY;
|
||||
}
|
||||
score += penalty;
|
||||
|
||||
// Apply unmatched penalty
|
||||
const int unmatched = strSz - numMatches;
|
||||
score += UNMATCHED_LETTER_PENALTY * unmatched;
|
||||
// In a long string, not all matches may be found due to the recursion limit.
|
||||
// If at least one match is found, reset the score to a non-negative value.
|
||||
if (score < 0 && numMatches > 0) {
|
||||
score = 0;
|
||||
}
|
||||
|
||||
// Apply ordering bonuses
|
||||
for (int i = 0; i < numMatches; i++) {
|
||||
const uint32_t currIdx = matches[i];
|
||||
bool is_camel = false;
|
||||
|
||||
if (i > 0) {
|
||||
const uint32_t prevIdx = matches[i - 1];
|
||||
|
||||
// Sequential
|
||||
if (currIdx == prevIdx + 1) {
|
||||
score += SEQUENTIAL_BONUS;
|
||||
} else {
|
||||
score += GAP_PENALTY * (int)(currIdx - prevIdx);
|
||||
// Reset consecutive camel count on gap
|
||||
consecutive_camel = 0;
|
||||
}
|
||||
}
|
||||
|
||||
int curr;
|
||||
// Check for bonuses based on neighbor character value
|
||||
if (currIdx > 0) {
|
||||
// Camel case
|
||||
int neighbor = ' ';
|
||||
|
||||
while (sidx < currIdx) {
|
||||
neighbor = utf_ptr2char(p);
|
||||
MB_PTR_ADV(p);
|
||||
sidx++;
|
||||
}
|
||||
curr = utf_ptr2char(p);
|
||||
|
||||
// Enhanced camel case scoring
|
||||
if (camelcase && mb_islower(neighbor) && mb_isupper(curr)) {
|
||||
score += CAMEL_BONUS * 2; // Double the camel case bonus
|
||||
is_camel = true;
|
||||
consecutive_camel++;
|
||||
// Additional bonus for consecutive camel
|
||||
if (consecutive_camel > 1) {
|
||||
score += CAMEL_BONUS;
|
||||
}
|
||||
} else {
|
||||
consecutive_camel = 0;
|
||||
}
|
||||
|
||||
// Bonus if the match follows a separator character
|
||||
if (neighbor == '/' || neighbor == '\\') {
|
||||
score += PATH_SEPARATOR_BONUS;
|
||||
} else if (neighbor == ' ' || neighbor == '_') {
|
||||
score += WORD_SEPARATOR_BONUS;
|
||||
}
|
||||
} else {
|
||||
// First letter
|
||||
score += FIRST_LETTER_BONUS;
|
||||
curr = utf_ptr2char(p);
|
||||
}
|
||||
|
||||
// Case matching bonus
|
||||
if (mb_isalpha(curr)) {
|
||||
while (pat_idx < i && *curpat) {
|
||||
MB_PTR_ADV(curpat);
|
||||
pat_idx++;
|
||||
}
|
||||
|
||||
if (curr == utf_ptr2char(curpat)) {
|
||||
score += CASE_MATCH_BONUS;
|
||||
// Extra bonus for exact case match in camel
|
||||
if (is_camel) {
|
||||
score += CASE_MATCH_BONUS / 2;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Check exact match condition
|
||||
if (currIdx != (uint32_t)i) {
|
||||
is_exact_match = false;
|
||||
}
|
||||
}
|
||||
|
||||
// Boost score for exact matches
|
||||
if (is_exact_match && numMatches == strSz) {
|
||||
score += EXACT_MATCH_BONUS;
|
||||
}
|
||||
|
||||
return score;
|
||||
}
|
||||
|
||||
/// Perform a recursive search for fuzzy matching "fuzpat" in "str".
|
||||
/// @return the number of matching characters.
|
||||
static int fuzzy_match_recursive(const char *fuzpat, const char *str, uint32_t strIdx,
|
||||
int *const outScore, const char *const strBegin, const int strLen,
|
||||
const uint32_t *const srcMatches, uint32_t *const matches,
|
||||
const int maxMatches, int nextMatch, int *const recursionCount,
|
||||
bool camelcase)
|
||||
FUNC_ATTR_NONNULL_ARG(1, 2, 4, 5, 8, 11) FUNC_ATTR_WARN_UNUSED_RESULT
|
||||
{
|
||||
// Recursion params
|
||||
bool recursiveMatch = false;
|
||||
uint32_t bestRecursiveMatches[MAX_FUZZY_MATCHES];
|
||||
int bestRecursiveScore = 0;
|
||||
|
||||
// Count recursions
|
||||
(*recursionCount)++;
|
||||
if (*recursionCount >= FUZZY_MATCH_RECURSION_LIMIT) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
// Detect end of strings
|
||||
if (*fuzpat == NUL || *str == NUL) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
// Loop through fuzpat and str looking for a match
|
||||
bool first_match = true;
|
||||
while (*fuzpat != NUL && *str != NUL) {
|
||||
const int c1 = utf_ptr2char(fuzpat);
|
||||
const int c2 = utf_ptr2char(str);
|
||||
|
||||
// Found match
|
||||
if (mb_tolower(c1) == mb_tolower(c2)) {
|
||||
// Supplied matches buffer was too short
|
||||
if (nextMatch >= maxMatches) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
int recursiveScore = 0;
|
||||
uint32_t recursiveMatches[MAX_FUZZY_MATCHES];
|
||||
CLEAR_FIELD(recursiveMatches);
|
||||
|
||||
// "Copy-on-Write" srcMatches into matches
|
||||
if (first_match && srcMatches != NULL) {
|
||||
memcpy(matches, srcMatches, (size_t)nextMatch * sizeof(srcMatches[0]));
|
||||
first_match = false;
|
||||
}
|
||||
|
||||
// Recursive call that "skips" this match
|
||||
const char *const next_char = str + utfc_ptr2len(str);
|
||||
if (fuzzy_match_recursive(fuzpat, next_char, strIdx + 1, &recursiveScore, strBegin, strLen,
|
||||
matches, recursiveMatches,
|
||||
sizeof(recursiveMatches) / sizeof(recursiveMatches[0]), nextMatch,
|
||||
recursionCount, camelcase)) {
|
||||
// Pick best recursive score
|
||||
if (!recursiveMatch || recursiveScore > bestRecursiveScore) {
|
||||
memcpy(bestRecursiveMatches, recursiveMatches,
|
||||
MAX_FUZZY_MATCHES * sizeof(recursiveMatches[0]));
|
||||
bestRecursiveScore = recursiveScore;
|
||||
}
|
||||
recursiveMatch = true;
|
||||
}
|
||||
|
||||
// Advance
|
||||
matches[nextMatch++] = strIdx;
|
||||
MB_PTR_ADV(fuzpat);
|
||||
}
|
||||
MB_PTR_ADV(str);
|
||||
strIdx++;
|
||||
}
|
||||
|
||||
// Determine if full fuzpat was matched
|
||||
const bool matched = *fuzpat == NUL;
|
||||
|
||||
// Calculate score
|
||||
if (matched) {
|
||||
*outScore = fuzzy_match_compute_score(fuzpat, strBegin, strLen, matches, nextMatch, camelcase);
|
||||
}
|
||||
|
||||
// Return best result
|
||||
if (recursiveMatch && (!matched || bestRecursiveScore > *outScore)) {
|
||||
// Recursive score is better than "this"
|
||||
memcpy(matches, bestRecursiveMatches, (size_t)maxMatches * sizeof(matches[0]));
|
||||
*outScore = bestRecursiveScore;
|
||||
return nextMatch;
|
||||
} else if (matched) {
|
||||
return nextMatch; // "this" score is better than recursive
|
||||
}
|
||||
|
||||
return 0; // no match
|
||||
}
|
||||
|
||||
/// fuzzy_match()
|
||||
///
|
||||
/// Performs exhaustive search via recursion to find all possible matches and
|
||||
/// match with highest score.
|
||||
/// Scores values have no intrinsic meaning. Possible score range is not
|
||||
/// normalized and varies with pattern.
|
||||
/// Recursion is limited internally (default=10) to prevent degenerate cases
|
||||
/// (pat_arg="aaaaaa" str="aaaaaaaaaaaaaaaaaaaaaaaaaaaaaa").
|
||||
/// Patterns are limited to MAX_FUZZY_MATCHES characters.
|
||||
///
|
||||
/// @return true if "pat_arg" matches "str". Also returns the match score in
|
||||
/// "outScore" and the matching character positions in "matches".
|
||||
bool fuzzy_match(char *const str, const char *const pat_arg, const bool matchseq,
|
||||
int *const outScore, uint32_t *const matches, const int maxMatches, bool camelcase)
|
||||
FUNC_ATTR_NONNULL_ALL
|
||||
{
|
||||
const int len = mb_charlen(str);
|
||||
bool complete = false;
|
||||
int numMatches = 0;
|
||||
|
||||
*outScore = 0;
|
||||
|
||||
char *const save_pat = xstrdup(pat_arg);
|
||||
char *pat = save_pat;
|
||||
char *p = pat;
|
||||
|
||||
// Try matching each word in "pat_arg" in "str"
|
||||
while (true) {
|
||||
if (matchseq) {
|
||||
complete = true;
|
||||
} else {
|
||||
// Extract one word from the pattern (separated by space)
|
||||
p = skipwhite(p);
|
||||
if (*p == NUL) {
|
||||
break;
|
||||
}
|
||||
pat = p;
|
||||
while (*p != NUL && !ascii_iswhite(utf_ptr2char(p))) {
|
||||
MB_PTR_ADV(p);
|
||||
}
|
||||
if (*p == NUL) { // processed all the words
|
||||
complete = true;
|
||||
}
|
||||
*p = NUL;
|
||||
}
|
||||
|
||||
int score = 0;
|
||||
int recursionCount = 0;
|
||||
const int matchCount
|
||||
= fuzzy_match_recursive(pat, str, 0, &score, str, len, NULL,
|
||||
matches + numMatches,
|
||||
maxMatches - numMatches, 0, &recursionCount, camelcase);
|
||||
if (matchCount == 0) {
|
||||
numMatches = 0;
|
||||
break;
|
||||
}
|
||||
|
||||
// Accumulate the match score and the number of matches
|
||||
*outScore += score;
|
||||
numMatches += matchCount;
|
||||
|
||||
if (complete) {
|
||||
break;
|
||||
}
|
||||
|
||||
// try matching the next word
|
||||
p++;
|
||||
}
|
||||
|
||||
xfree(save_pat);
|
||||
return numMatches != 0;
|
||||
}
|
||||
|
||||
/// Sort the fuzzy matches in the descending order of the match score.
|
||||
/// For items with same score, retain the order using the index (stable sort)
|
||||
static int fuzzy_match_item_compare(const void *const s1, const void *const s2)
|
||||
FUNC_ATTR_NONNULL_ALL FUNC_ATTR_WARN_UNUSED_RESULT FUNC_ATTR_PURE
|
||||
{
|
||||
const int v1 = ((const fuzzyItem_T *)s1)->score;
|
||||
const int v2 = ((const fuzzyItem_T *)s2)->score;
|
||||
const int idx1 = ((const fuzzyItem_T *)s1)->idx;
|
||||
const int idx2 = ((const fuzzyItem_T *)s2)->idx;
|
||||
|
||||
if (v1 == v2) {
|
||||
return idx1 == idx2 ? 0 : idx1 > idx2 ? 1 : -1;
|
||||
}
|
||||
return v1 > v2 ? -1 : 1;
|
||||
}
|
||||
|
||||
/// Fuzzy search the string "str" in a list of "items" and return the matching
|
||||
/// strings in "fmatchlist".
|
||||
/// If "matchseq" is true, then for multi-word search strings, match all the
|
||||
/// words in sequence.
|
||||
/// If "items" is a list of strings, then search for "str" in the list.
|
||||
/// If "items" is a list of dicts, then either use "key" to lookup the string
|
||||
/// for each item or use "item_cb" Funcref function to get the string.
|
||||
/// If "retmatchpos" is true, then return a list of positions where "str"
|
||||
/// matches for each item.
|
||||
static void fuzzy_match_in_list(list_T *const l, char *const str, const bool matchseq,
|
||||
const char *const key, Callback *const item_cb,
|
||||
const bool retmatchpos, list_T *const fmatchlist,
|
||||
const int max_matches, bool camelcase)
|
||||
FUNC_ATTR_NONNULL_ARG(2, 5, 7)
|
||||
{
|
||||
int len = tv_list_len(l);
|
||||
if (len == 0) {
|
||||
return;
|
||||
}
|
||||
if (max_matches > 0 && len > max_matches) {
|
||||
len = max_matches;
|
||||
}
|
||||
|
||||
fuzzyItem_T *const items = xcalloc((size_t)len, sizeof(fuzzyItem_T));
|
||||
int match_count = 0;
|
||||
uint32_t matches[MAX_FUZZY_MATCHES];
|
||||
|
||||
// For all the string items in items, get the fuzzy matching score
|
||||
TV_LIST_ITER(l, li, {
|
||||
if (max_matches > 0 && match_count >= max_matches) {
|
||||
break;
|
||||
}
|
||||
|
||||
char *itemstr = NULL;
|
||||
typval_T rettv;
|
||||
rettv.v_type = VAR_UNKNOWN;
|
||||
const typval_T *const tv = TV_LIST_ITEM_TV(li);
|
||||
if (tv->v_type == VAR_STRING) { // list of strings
|
||||
itemstr = tv->vval.v_string;
|
||||
} else if (tv->v_type == VAR_DICT && (key != NULL || item_cb->type != kCallbackNone)) {
|
||||
// For a dict, either use the specified key to lookup the string or
|
||||
// use the specified callback function to get the string.
|
||||
if (key != NULL) {
|
||||
itemstr = tv_dict_get_string(tv->vval.v_dict, key, false);
|
||||
} else {
|
||||
typval_T argv[2];
|
||||
|
||||
// Invoke the supplied callback (if any) to get the dict item
|
||||
tv->vval.v_dict->dv_refcount++;
|
||||
argv[0].v_type = VAR_DICT;
|
||||
argv[0].vval.v_dict = tv->vval.v_dict;
|
||||
argv[1].v_type = VAR_UNKNOWN;
|
||||
if (callback_call(item_cb, 1, argv, &rettv)) {
|
||||
if (rettv.v_type == VAR_STRING) {
|
||||
itemstr = rettv.vval.v_string;
|
||||
}
|
||||
}
|
||||
tv_dict_unref(tv->vval.v_dict);
|
||||
}
|
||||
}
|
||||
|
||||
int score;
|
||||
if (itemstr != NULL && fuzzy_match(itemstr, str, matchseq, &score, matches,
|
||||
MAX_FUZZY_MATCHES, camelcase)) {
|
||||
items[match_count].idx = (int)match_count;
|
||||
items[match_count].item = li;
|
||||
items[match_count].score = score;
|
||||
|
||||
// Copy the list of matching positions in itemstr to a list, if
|
||||
// "retmatchpos" is set.
|
||||
if (retmatchpos) {
|
||||
items[match_count].lmatchpos = tv_list_alloc(kListLenMayKnow);
|
||||
int j = 0;
|
||||
const char *p = str;
|
||||
while (*p != NUL) {
|
||||
if (!ascii_iswhite(utf_ptr2char(p)) || matchseq) {
|
||||
tv_list_append_number(items[match_count].lmatchpos, matches[j]);
|
||||
j++;
|
||||
}
|
||||
MB_PTR_ADV(p);
|
||||
}
|
||||
}
|
||||
match_count++;
|
||||
}
|
||||
tv_clear(&rettv);
|
||||
});
|
||||
|
||||
if (match_count > 0) {
|
||||
// Sort the list by the descending order of the match score
|
||||
qsort(items, (size_t)match_count, sizeof(fuzzyItem_T), fuzzy_match_item_compare);
|
||||
|
||||
// For matchfuzzy(), return a list of matched strings.
|
||||
// ['str1', 'str2', 'str3']
|
||||
// For matchfuzzypos(), return a list with three items.
|
||||
// The first item is a list of matched strings. The second item
|
||||
// is a list of lists where each list item is a list of matched
|
||||
// character positions. The third item is a list of matching scores.
|
||||
// [['str1', 'str2', 'str3'], [[1, 3], [1, 3], [1, 3]]]
|
||||
list_T *retlist;
|
||||
if (retmatchpos) {
|
||||
const listitem_T *const li = tv_list_find(fmatchlist, 0);
|
||||
assert(li != NULL && TV_LIST_ITEM_TV(li)->vval.v_list != NULL);
|
||||
retlist = TV_LIST_ITEM_TV(li)->vval.v_list;
|
||||
} else {
|
||||
retlist = fmatchlist;
|
||||
}
|
||||
|
||||
// Copy the matching strings with a valid score to the return list
|
||||
for (int i = 0; i < match_count; i++) {
|
||||
if (items[i].score == SCORE_NONE) {
|
||||
break;
|
||||
}
|
||||
tv_list_append_tv(retlist, TV_LIST_ITEM_TV(items[i].item));
|
||||
}
|
||||
|
||||
// next copy the list of matching positions
|
||||
if (retmatchpos) {
|
||||
const listitem_T *li = tv_list_find(fmatchlist, -2);
|
||||
assert(li != NULL && TV_LIST_ITEM_TV(li)->vval.v_list != NULL);
|
||||
retlist = TV_LIST_ITEM_TV(li)->vval.v_list;
|
||||
|
||||
for (int i = 0; i < match_count; i++) {
|
||||
if (items[i].score == SCORE_NONE) {
|
||||
break;
|
||||
}
|
||||
tv_list_append_list(retlist, items[i].lmatchpos);
|
||||
}
|
||||
|
||||
// copy the matching scores
|
||||
li = tv_list_find(fmatchlist, -1);
|
||||
assert(li != NULL && TV_LIST_ITEM_TV(li)->vval.v_list != NULL);
|
||||
retlist = TV_LIST_ITEM_TV(li)->vval.v_list;
|
||||
for (int i = 0; i < match_count; i++) {
|
||||
if (items[i].score == SCORE_NONE) {
|
||||
break;
|
||||
}
|
||||
tv_list_append_number(retlist, items[i].score);
|
||||
}
|
||||
}
|
||||
}
|
||||
xfree(items);
|
||||
}
|
||||
|
||||
/// Do fuzzy matching. Returns the list of matched strings in "rettv".
|
||||
/// If "retmatchpos" is true, also returns the matching character positions.
|
||||
static void do_fuzzymatch(const typval_T *const argvars, typval_T *const rettv,
|
||||
const bool retmatchpos)
|
||||
FUNC_ATTR_NONNULL_ALL
|
||||
{
|
||||
// validate and get the arguments
|
||||
if (argvars[0].v_type != VAR_LIST || argvars[0].vval.v_list == NULL) {
|
||||
semsg(_(e_listarg), retmatchpos ? "matchfuzzypos()" : "matchfuzzy()");
|
||||
return;
|
||||
}
|
||||
if (argvars[1].v_type != VAR_STRING || argvars[1].vval.v_string == NULL) {
|
||||
semsg(_(e_invarg2), tv_get_string(&argvars[1]));
|
||||
return;
|
||||
}
|
||||
|
||||
Callback cb = CALLBACK_NONE;
|
||||
const char *key = NULL;
|
||||
bool matchseq = false;
|
||||
int max_matches = 0;
|
||||
bool camelcase = true;
|
||||
if (argvars[2].v_type != VAR_UNKNOWN) {
|
||||
if (tv_check_for_nonnull_dict_arg(argvars, 2) == FAIL) {
|
||||
return;
|
||||
}
|
||||
|
||||
// To search a dict, either a callback function or a key can be
|
||||
// specified.
|
||||
dict_T *const d = argvars[2].vval.v_dict;
|
||||
const dictitem_T *di;
|
||||
if ((di = tv_dict_find(d, "key", -1)) != NULL) {
|
||||
if (di->di_tv.v_type != VAR_STRING || di->di_tv.vval.v_string == NULL
|
||||
|| *di->di_tv.vval.v_string == NUL) {
|
||||
semsg(_(e_invargNval), "key", tv_get_string(&di->di_tv));
|
||||
return;
|
||||
}
|
||||
key = tv_get_string(&di->di_tv);
|
||||
} else if (!tv_dict_get_callback(d, "text_cb", -1, &cb)) {
|
||||
semsg(_(e_invargval), "text_cb");
|
||||
return;
|
||||
}
|
||||
|
||||
if ((di = tv_dict_find(d, "limit", -1)) != NULL) {
|
||||
if (di->di_tv.v_type != VAR_NUMBER) {
|
||||
semsg(_(e_invargval), "limit");
|
||||
return;
|
||||
}
|
||||
max_matches = (int)tv_get_number_chk(&di->di_tv, NULL);
|
||||
}
|
||||
|
||||
if ((di = tv_dict_find(d, "camelcase", -1)) != NULL) {
|
||||
if (di->di_tv.v_type != VAR_BOOL) {
|
||||
semsg(_(e_invargval), "camelcase");
|
||||
return;
|
||||
}
|
||||
camelcase = tv_get_bool_chk(&di->di_tv, NULL);
|
||||
}
|
||||
|
||||
if (tv_dict_find(d, "matchseq", -1) != NULL) {
|
||||
matchseq = true;
|
||||
}
|
||||
}
|
||||
|
||||
// get the fuzzy matches
|
||||
tv_list_alloc_ret(rettv, retmatchpos ? 3 : kListLenUnknown);
|
||||
if (retmatchpos) {
|
||||
// For matchfuzzypos(), a list with three items are returned. First
|
||||
// item is a list of matching strings, the second item is a list of
|
||||
// lists with matching positions within each string and the third item
|
||||
// is the list of scores of the matches.
|
||||
tv_list_append_list(rettv->vval.v_list, tv_list_alloc(kListLenUnknown));
|
||||
tv_list_append_list(rettv->vval.v_list, tv_list_alloc(kListLenUnknown));
|
||||
tv_list_append_list(rettv->vval.v_list, tv_list_alloc(kListLenUnknown));
|
||||
}
|
||||
|
||||
fuzzy_match_in_list(argvars[0].vval.v_list,
|
||||
(char *)tv_get_string(&argvars[1]), matchseq, key,
|
||||
&cb, retmatchpos, rettv->vval.v_list, max_matches, camelcase);
|
||||
callback_free(&cb);
|
||||
}
|
||||
|
||||
/// "matchfuzzy()" function
|
||||
void f_matchfuzzy(typval_T *argvars, typval_T *rettv, EvalFuncData fptr)
|
||||
{
|
||||
do_fuzzymatch(argvars, rettv, false);
|
||||
}
|
||||
|
||||
/// "matchfuzzypos()" function
|
||||
void f_matchfuzzypos(typval_T *argvars, typval_T *rettv, EvalFuncData fptr)
|
||||
{
|
||||
do_fuzzymatch(argvars, rettv, true);
|
||||
}
|
||||
|
||||
/// Same as fuzzy_match_item_compare() except for use with a string match
|
||||
static int fuzzy_match_str_compare(const void *const s1, const void *const s2)
|
||||
FUNC_ATTR_WARN_UNUSED_RESULT FUNC_ATTR_NONNULL_ALL FUNC_ATTR_PURE
|
||||
{
|
||||
const int v1 = ((fuzmatch_str_T *)s1)->score;
|
||||
const int v2 = ((fuzmatch_str_T *)s2)->score;
|
||||
const int idx1 = ((fuzmatch_str_T *)s1)->idx;
|
||||
const int idx2 = ((fuzmatch_str_T *)s2)->idx;
|
||||
|
||||
if (v1 == v2) {
|
||||
return idx1 == idx2 ? 0 : idx1 > idx2 ? 1 : -1;
|
||||
} else {
|
||||
return v1 > v2 ? -1 : 1;
|
||||
}
|
||||
}
|
||||
|
||||
/// Sort fuzzy matches by score
|
||||
static void fuzzy_match_str_sort(fuzmatch_str_T *const fm, const int sz)
|
||||
FUNC_ATTR_NONNULL_ALL
|
||||
{
|
||||
// Sort the list by the descending order of the match score
|
||||
qsort(fm, (size_t)sz, sizeof(fuzmatch_str_T), fuzzy_match_str_compare);
|
||||
}
|
||||
|
||||
/// Same as fuzzy_match_item_compare() except for use with a function name
|
||||
/// string match. <SNR> functions should be sorted to the end.
|
||||
static int fuzzy_match_func_compare(const void *const s1, const void *const s2)
|
||||
FUNC_ATTR_WARN_UNUSED_RESULT FUNC_ATTR_NONNULL_ALL FUNC_ATTR_PURE
|
||||
{
|
||||
const int v1 = ((fuzmatch_str_T *)s1)->score;
|
||||
const int v2 = ((fuzmatch_str_T *)s2)->score;
|
||||
const int idx1 = ((fuzmatch_str_T *)s1)->idx;
|
||||
const int idx2 = ((fuzmatch_str_T *)s2)->idx;
|
||||
const char *const str1 = ((fuzmatch_str_T *)s1)->str;
|
||||
const char *const str2 = ((fuzmatch_str_T *)s2)->str;
|
||||
|
||||
if (*str1 != '<' && *str2 == '<') {
|
||||
return -1;
|
||||
}
|
||||
if (*str1 == '<' && *str2 != '<') {
|
||||
return 1;
|
||||
}
|
||||
if (v1 == v2) {
|
||||
return idx1 == idx2 ? 0 : idx1 > idx2 ? 1 : -1;
|
||||
}
|
||||
return v1 > v2 ? -1 : 1;
|
||||
}
|
||||
|
||||
/// Sort fuzzy matches of function names by score.
|
||||
/// <SNR> functions should be sorted to the end.
|
||||
static void fuzzy_match_func_sort(fuzmatch_str_T *const fm, const int sz)
|
||||
FUNC_ATTR_NONNULL_ALL
|
||||
{
|
||||
// Sort the list by the descending order of the match score
|
||||
qsort(fm, (size_t)sz, sizeof(fuzmatch_str_T), fuzzy_match_func_compare);
|
||||
}
|
||||
|
||||
/// Fuzzy match "pat" in "str".
|
||||
/// @returns 0 if there is no match. Otherwise, returns the match score.
|
||||
int fuzzy_match_str(char *const str, const char *const pat)
|
||||
FUNC_ATTR_WARN_UNUSED_RESULT
|
||||
{
|
||||
if (str == NULL || pat == NULL) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
int score = 0;
|
||||
uint32_t matchpos[MAX_FUZZY_MATCHES];
|
||||
fuzzy_match(str, pat, true, &score, matchpos, sizeof(matchpos) / sizeof(matchpos[0]), true);
|
||||
|
||||
return score;
|
||||
}
|
||||
|
||||
/// Fuzzy match the position of string "pat" in string "str".
|
||||
/// @returns a dynamic array of matching positions. If there is no match, returns NULL.
|
||||
garray_T *fuzzy_match_str_with_pos(char *const str, const char *const pat)
|
||||
{
|
||||
if (str == NULL || pat == NULL) {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
garray_T *match_positions = xmalloc(sizeof(garray_T));
|
||||
ga_init(match_positions, sizeof(uint32_t), 10);
|
||||
|
||||
unsigned matches[MAX_FUZZY_MATCHES];
|
||||
int score = 0;
|
||||
if (!fuzzy_match(str, pat, false, &score, matches, MAX_FUZZY_MATCHES, true)
|
||||
|| score == 0) {
|
||||
ga_clear(match_positions);
|
||||
xfree(match_positions);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
int j = 0;
|
||||
for (const char *p = pat; *p != NUL; MB_PTR_ADV(p)) {
|
||||
if (!ascii_iswhite(utf_ptr2char(p))) {
|
||||
GA_APPEND(uint32_t, match_positions, matches[j]);
|
||||
j++;
|
||||
}
|
||||
}
|
||||
|
||||
return match_positions;
|
||||
}
|
||||
|
||||
/// This function splits the line pointed to by `*ptr` into words and performs
|
||||
/// a fuzzy match for the pattern `pat` on each word. It iterates through the
|
||||
/// line, moving `*ptr` to the start of each word during the process.
|
||||
///
|
||||
/// If a match is found:
|
||||
/// - `*ptr` points to the start of the matched word.
|
||||
/// - `*len` is set to the length of the matched word.
|
||||
/// - `*score` contains the match score.
|
||||
///
|
||||
/// If no match is found, `*ptr` is updated to the end of the line.
|
||||
bool fuzzy_match_str_in_line(char **ptr, char *pat, int *len, pos_T *current_pos, int *score)
|
||||
{
|
||||
char *str = *ptr;
|
||||
char *strBegin = str;
|
||||
char *end = NULL;
|
||||
char *start = NULL;
|
||||
bool found = false;
|
||||
|
||||
if (str == NULL || pat == NULL) {
|
||||
return found;
|
||||
}
|
||||
char *line_end = find_line_end(str);
|
||||
|
||||
while (str < line_end) {
|
||||
// Skip non-word characters
|
||||
start = find_word_start(str);
|
||||
if (*start == NUL) {
|
||||
break;
|
||||
}
|
||||
end = find_word_end(start);
|
||||
|
||||
// Extract the word from start to end
|
||||
char save_end = *end;
|
||||
*end = NUL;
|
||||
|
||||
// Perform fuzzy match
|
||||
*score = fuzzy_match_str(start, pat);
|
||||
*end = save_end;
|
||||
|
||||
if (*score > 0) {
|
||||
*len = (int)(end - start);
|
||||
found = true;
|
||||
*ptr = start;
|
||||
if (current_pos) {
|
||||
current_pos->col += (int)(end - strBegin);
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
// Move to the end of the current word for the next iteration
|
||||
str = end;
|
||||
// Ensure we continue searching after the current word
|
||||
while (*str != NUL && !vim_iswordp(str)) {
|
||||
MB_PTR_ADV(str);
|
||||
}
|
||||
}
|
||||
|
||||
if (!found) {
|
||||
*ptr = line_end;
|
||||
}
|
||||
|
||||
return found;
|
||||
}
|
||||
|
||||
/// Search for the next fuzzy match in the specified buffer.
|
||||
/// This function attempts to find the next occurrence of the given pattern
|
||||
/// in the buffer, starting from the current position. It handles line wrapping
|
||||
/// and direction of search.
|
||||
///
|
||||
/// Return true if a match is found, otherwise false.
|
||||
bool search_for_fuzzy_match(buf_T *buf, pos_T *pos, char *pattern, int dir, pos_T *start_pos,
|
||||
int *len, char **ptr, int *score)
|
||||
{
|
||||
pos_T current_pos = *pos;
|
||||
pos_T circly_end;
|
||||
bool found_new_match = false;
|
||||
bool looped_around = false;
|
||||
|
||||
bool whole_line = ctrl_x_mode_whole_line();
|
||||
|
||||
if (buf == curbuf) {
|
||||
circly_end = *start_pos;
|
||||
} else {
|
||||
circly_end.lnum = buf->b_ml.ml_line_count;
|
||||
circly_end.col = 0;
|
||||
circly_end.coladd = 0;
|
||||
}
|
||||
|
||||
if (whole_line && start_pos->lnum != pos->lnum) {
|
||||
current_pos.lnum += dir;
|
||||
}
|
||||
|
||||
while (true) {
|
||||
// Check if looped around and back to start position
|
||||
if (looped_around && equalpos(current_pos, circly_end)) {
|
||||
break;
|
||||
}
|
||||
|
||||
// Ensure current_pos is valid
|
||||
if (current_pos.lnum >= 1 && current_pos.lnum <= buf->b_ml.ml_line_count) {
|
||||
// Get the current line buffer
|
||||
*ptr = ml_get_buf(buf, current_pos.lnum);
|
||||
if (!whole_line) {
|
||||
*ptr += current_pos.col;
|
||||
}
|
||||
|
||||
// If ptr is end of line is reached, move to next line
|
||||
// or previous line based on direction
|
||||
if (*ptr != NULL && **ptr != NUL) {
|
||||
if (!whole_line) {
|
||||
// Try to find a fuzzy match in the current line starting
|
||||
// from current position
|
||||
found_new_match = fuzzy_match_str_in_line(ptr, pattern,
|
||||
len, ¤t_pos, score);
|
||||
if (found_new_match) {
|
||||
*pos = current_pos;
|
||||
break;
|
||||
} else if (looped_around && current_pos.lnum == circly_end.lnum) {
|
||||
break;
|
||||
}
|
||||
} else {
|
||||
if (fuzzy_match_str(*ptr, pattern) > 0) {
|
||||
found_new_match = true;
|
||||
*pos = current_pos;
|
||||
*len = ml_get_buf_len(buf, current_pos.lnum);
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Move to the next line or previous line based on direction
|
||||
if (dir == FORWARD) {
|
||||
if (++current_pos.lnum > buf->b_ml.ml_line_count) {
|
||||
if (p_ws) {
|
||||
current_pos.lnum = 1;
|
||||
looped_around = true;
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
}
|
||||
} else {
|
||||
if (--current_pos.lnum < 1) {
|
||||
if (p_ws) {
|
||||
current_pos.lnum = buf->b_ml.ml_line_count;
|
||||
looped_around = true;
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
current_pos.col = 0;
|
||||
}
|
||||
|
||||
return found_new_match;
|
||||
}
|
||||
|
||||
/// Copy a list of fuzzy matches into a string list after sorting the matches by
|
||||
/// the fuzzy score. Frees the memory allocated for "fuzmatch".
|
||||
void fuzzymatches_to_strmatches(fuzmatch_str_T *const fuzmatch, char ***const matches,
|
||||
const int count, const bool funcsort)
|
||||
FUNC_ATTR_NONNULL_ARG(2)
|
||||
{
|
||||
if (count <= 0) {
|
||||
return;
|
||||
}
|
||||
|
||||
*matches = xmalloc((size_t)count * sizeof(char *));
|
||||
|
||||
// Sort the list by the descending order of the match score
|
||||
if (funcsort) {
|
||||
fuzzy_match_func_sort(fuzmatch, count);
|
||||
} else {
|
||||
fuzzy_match_str_sort(fuzmatch, count);
|
||||
}
|
||||
|
||||
for (int i = 0; i < count; i++) {
|
||||
(*matches)[i] = fuzmatch[i].str;
|
||||
}
|
||||
xfree(fuzmatch);
|
||||
}
|
||||
|
||||
/// Free a list of fuzzy string matches.
|
||||
void fuzmatch_str_free(fuzmatch_str_T *const fuzmatch, int count)
|
||||
{
|
||||
if (count <= 0 || fuzmatch == NULL) {
|
||||
return;
|
||||
}
|
||||
while (count--) {
|
||||
xfree(fuzmatch[count].str);
|
||||
}
|
||||
xfree(fuzmatch);
|
||||
}
|
||||
|
||||
/// Get line "lnum" and copy it into "buf[LSIZE]".
|
||||
/// The copy is made because the regexp may make the line invalid when using a
|
||||
/// mark.
|
||||
|
@@ -67,11 +67,6 @@ enum { SEARCH_STAT_DEF_TIMEOUT = 40, };
|
||||
// '[>9999/>9999]': 13 + 1 (NUL)
|
||||
enum { SEARCH_STAT_BUF_LEN = 16, };
|
||||
|
||||
enum {
|
||||
/// Maximum number of characters that can be fuzzy matched
|
||||
MAX_FUZZY_MATCHES = 256,
|
||||
};
|
||||
|
||||
/// Structure containing offset definition for the last search pattern
|
||||
///
|
||||
/// @note Only offset for the last search pattern is used, not for the last
|
||||
@@ -112,14 +107,6 @@ typedef struct {
|
||||
int last_maxcount; // the max count of the last search
|
||||
} searchstat_T;
|
||||
|
||||
/// Fuzzy matched string list item. Used for fuzzy match completion. Items are
|
||||
/// usually sorted by "score". The "idx" member is used for stable-sort.
|
||||
typedef struct {
|
||||
int idx;
|
||||
char *str;
|
||||
int score;
|
||||
} fuzmatch_str_T;
|
||||
|
||||
#ifdef INCLUDE_GENERATED_DECLARATIONS
|
||||
# include "search.h.generated.h"
|
||||
#endif
|
||||
|
@@ -3262,7 +3262,7 @@ endfunc
|
||||
func Test_fuzzy_completion_bufname_fullpath()
|
||||
CheckUnix
|
||||
set wildoptions&
|
||||
call mkdir('Xcmd/Xstate/Xfile.js', 'p')
|
||||
call mkdir('Xcmd/Xstate/Xfile.js', 'pR')
|
||||
edit Xcmd/Xstate/Xfile.js
|
||||
cd Xcmd/Xstate
|
||||
enew
|
||||
@@ -3270,9 +3270,8 @@ func Test_fuzzy_completion_bufname_fullpath()
|
||||
call assert_equal('"b CmdStateFile', @:)
|
||||
set wildoptions=fuzzy
|
||||
call feedkeys(":b CmdStateFile\<Tab>\<C-B>\"\<CR>", 'tx')
|
||||
call assert_match('Xcmd/Xstate/Xfile.js$', @:)
|
||||
call assert_equal('"b CmdStateFile', @:)
|
||||
cd -
|
||||
call delete('Xcmd', 'rf')
|
||||
set wildoptions&
|
||||
endfunc
|
||||
|
||||
@@ -3551,7 +3550,7 @@ func Test_fuzzy_completion_mapname()
|
||||
nmap <Plug>state :
|
||||
nmap <Plug>FendingOff :
|
||||
call feedkeys(":nmap <Plug>fo\<C-A>\<C-B>\"\<CR>", 'tx')
|
||||
call assert_equal("\"nmap <Plug>format <Plug>TestFOrmat <Plug>FendingOff <Plug>goformat <Plug>fendoff", @:)
|
||||
call assert_equal("\"nmap <Plug>format <Plug>TestFOrmat <Plug>FendingOff <Plug>fendoff <Plug>goformat", @:)
|
||||
nunmap <Plug>format
|
||||
nunmap <Plug>goformat
|
||||
nunmap <Plug>TestFOrmat
|
||||
@@ -3673,9 +3672,7 @@ func Test_fuzzy_completion_syntax_group()
|
||||
call assert_equal('"syntax list mpar', @:)
|
||||
set wildoptions=fuzzy
|
||||
call feedkeys(":syntax list mpar\<Tab>\<C-B>\"\<CR>", 'tx')
|
||||
" Fuzzy match prefers NvimParenthesis over MatchParen
|
||||
" call assert_equal('"syntax list MatchParen', @:)
|
||||
call assert_equal('"syntax list NvimParenthesis', @:)
|
||||
call assert_equal('"syntax list MatchParen', @:)
|
||||
set wildoptions&
|
||||
endfunc
|
||||
|
||||
@@ -3726,7 +3723,7 @@ func Test_fuzzy_completion_cmd_sort_results()
|
||||
command T123FendingOff :
|
||||
set wildoptions=fuzzy
|
||||
call feedkeys(":T123fo\<C-A>\<C-B>\"\<CR>", 'tx')
|
||||
call assert_equal('"T123format T123TestFOrmat T123FendingOff T123goformat T123fendoff', @:)
|
||||
call assert_equal('"T123format T123TestFOrmat T123FendingOff T123fendoff T123goformat', @:)
|
||||
delcommand T123format
|
||||
delcommand T123goformat
|
||||
delcommand T123TestFOrmat
|
||||
|
@@ -18,11 +18,11 @@ func Test_matchfuzzy()
|
||||
call assert_equal(['crayon', 'camera'], matchfuzzy(['camera', 'crayon'], 'cra'))
|
||||
call assert_equal(['aabbaa', 'aaabbbaaa', 'aaaabbbbaaaa', 'aba'], matchfuzzy(['aba', 'aabbaa', 'aaabbbaaa', 'aaaabbbbaaaa'], 'aa'))
|
||||
call assert_equal(['one'], matchfuzzy(['one', 'two'], 'one'))
|
||||
call assert_equal(['oneTwo', 'onetwo'], matchfuzzy(['onetwo', 'oneTwo'], 'oneTwo'))
|
||||
call assert_equal(['onetwo', 'one_two'], matchfuzzy(['onetwo', 'one_two'], 'oneTwo'))
|
||||
call assert_equal(['oneTwo'], matchfuzzy(['onetwo', 'oneTwo'], 'oneTwo'))
|
||||
call assert_equal([], matchfuzzy(['onetwo', 'one_two'], 'oneTwo'))
|
||||
call assert_equal(['aaaaaaaaaaaaaaaaaaaaaaaaaaaaaa'], matchfuzzy(['aaaaaaaaaaaaaaaaaaaaaaaaaaaaaa'], 'aa'))
|
||||
call assert_equal(256, matchfuzzy([repeat('a', 256)], repeat('a', 256))[0]->len())
|
||||
call assert_equal([], matchfuzzy([repeat('a', 300)], repeat('a', 257)))
|
||||
call assert_equal([repeat('a', 300)], matchfuzzy([repeat('a', 300)], repeat('a', 257)))
|
||||
" full match has highest score
|
||||
call assert_equal(['Cursor', 'lCursor'], matchfuzzy(["hello", "lCursor", "Cursor"], "Cursor"))
|
||||
" matches with same score should not be reordered
|
||||
@@ -30,8 +30,7 @@ func Test_matchfuzzy()
|
||||
call assert_equal(l, l->matchfuzzy('abc'))
|
||||
|
||||
" Tests for match preferences
|
||||
" preference for camel case match
|
||||
call assert_equal(['oneTwo', 'onetwo'], ['onetwo', 'oneTwo']->matchfuzzy('onetwo'))
|
||||
call assert_equal(['onetwo', 'oneTwo'], ['onetwo', 'oneTwo']->matchfuzzy('onetwo'))
|
||||
" preference for match after a separator (_ or space)
|
||||
call assert_equal(['onetwo', 'one_two', 'one two'], ['onetwo', 'one_two', 'one two']->matchfuzzy('onetwo'))
|
||||
" preference for leading letter match
|
||||
@@ -47,7 +46,7 @@ func Test_matchfuzzy()
|
||||
" gap penalty
|
||||
call assert_equal(['xxayybxxxx', 'xxayyybxxx', 'xxayyyybxx'], ['xxayyyybxx', 'xxayyybxxx', 'xxayybxxxx']->matchfuzzy('ab'))
|
||||
" path separator vs word separator
|
||||
call assert_equal(['color/setup.vim', 'color\\setup.vim', 'color setup.vim', 'color_setup.vim', 'colorsetup.vim'], matchfuzzy(['colorsetup.vim', 'color setup.vim', 'color/setup.vim', 'color_setup.vim', 'color\\setup.vim'], 'setup.vim'))
|
||||
call assert_equal(['color/setup.vim', 'color setup.vim', 'color_setup.vim', 'colorsetup.vim', 'color\\setup.vim'], matchfuzzy(['colorsetup.vim', 'color setup.vim', 'color/setup.vim', 'color_setup.vim', 'color\\setup.vim'], 'setup.vim'))
|
||||
|
||||
" match multiple words (separated by space)
|
||||
call assert_equal(['foo bar baz'], ['foo bar baz', 'foo', 'foo bar', 'baz bar']->matchfuzzy('baz foo'))
|
||||
@@ -92,15 +91,16 @@ func Test_matchfuzzy()
|
||||
call assert_fails("let x = matchfuzzy(l, 'foo', {'key' : 'name'})", 'E730:')
|
||||
|
||||
" camelcase
|
||||
call assert_equal(['lCursor', 'shCurlyIn', 'shCurlyError', 'TracesCursor', 'Cursor', 'CurSearch', 'CursorLine'],
|
||||
\ matchfuzzy(['Cursor', 'lCursor', 'shCurlyIn', 'shCurlyError', 'TracesCursor', 'CurSearch', 'CursorLine'], 'Cur'))
|
||||
call assert_equal(['lCursor', 'shCurlyIn', 'shCurlyError', 'TracesCursor', 'Cursor', 'CurSearch', 'CursorLine'],
|
||||
\ matchfuzzy(['Cursor', 'lCursor', 'shCurlyIn', 'shCurlyError', 'TracesCursor', 'CurSearch', 'CursorLine'], 'Cur', {"camelcase": v:true}))
|
||||
call assert_equal(['Cursor', 'CurSearch', 'CursorLine', 'lCursor', 'shCurlyIn', 'shCurlyError', 'TracesCursor'],
|
||||
\ matchfuzzy(['Cursor', 'lCursor', 'shCurlyIn', 'shCurlyError', 'TracesCursor', 'CurSearch', 'CursorLine'], 'Cur', {"camelcase": v:false}))
|
||||
\ matchfuzzy(['Cursor', 'lCursor', 'shCurlyIn', 'shCurlyError', 'TracesCursor', 'CurSearch', 'CursorLine'], 'Cur'))
|
||||
call assert_equal(['things', 'sThings', 'thisThings'],
|
||||
\ matchfuzzy(['things','sThings', 'thisThings'], 'thin', {'camelcase': v:false}))
|
||||
call assert_fails("let x = matchfuzzy([], 'foo', {'camelcase': []})", 'E475: Invalid value for argument camelcase')
|
||||
\ matchfuzzy(['things','sThings', 'thisThings'], 'thin'))
|
||||
call assert_equal(['MyTestCase', 'mytestcase'], matchfuzzy(['mytestcase', 'MyTestCase'], 'mtc'))
|
||||
call assert_equal(['MyTestCase', 'mytestcase'], matchfuzzy(['MyTestCase', 'mytestcase'], 'mtc'))
|
||||
call assert_equal(['MyTest'], matchfuzzy(['Mytest', 'mytest', 'MyTest'], 'MyT'))
|
||||
call assert_equal(['CamelCaseMatchIngAlg'],
|
||||
\ matchfuzzy(['CamelCaseMatchIngAlg', 'camelCaseMatchingAlg', 'camelcasematchingalg'], 'CamelCase'))
|
||||
call assert_equal(['SomeWord', 'PrefixSomeWord'], matchfuzzy(['PrefixSomeWord', 'SomeWord'], 'somewor'))
|
||||
|
||||
" Test in latin1 encoding
|
||||
let save_enc = &encoding
|
||||
@@ -112,50 +112,57 @@ endfunc
|
||||
|
||||
" Test for the matchfuzzypos() function
|
||||
func Test_matchfuzzypos()
|
||||
call assert_equal([['curl', 'world'], [[2,3], [2,3]], [178, 177]], matchfuzzypos(['world', 'curl'], 'rl'))
|
||||
call assert_equal([['curl', 'world'], [[2,3], [2,3]], [178, 177]], matchfuzzypos(['world', 'one', 'curl'], 'rl'))
|
||||
call assert_equal([['curl', 'world'], [[2,3], [2,3]], [990, 985]], matchfuzzypos(['world', 'curl'], 'rl'))
|
||||
call assert_equal([['curl', 'world'], [[2,3], [2,3]], [990, 985]], matchfuzzypos(['world', 'one', 'curl'], 'rl'))
|
||||
call assert_equal([['hello', 'hello world hello world'],
|
||||
\ [[0, 1, 2, 3, 4], [0, 1, 2, 3, 4]], [500, 382]],
|
||||
\ [[0, 1, 2, 3, 4], [0, 1, 2, 3, 4]], [2147483647, 4810]],
|
||||
\ matchfuzzypos(['hello world hello world', 'hello', 'world'], 'hello'))
|
||||
call assert_equal([['aaaaaaa'], [[0, 1, 2]], [266]], matchfuzzypos(['aaaaaaa'], 'aaa'))
|
||||
call assert_equal([['a b'], [[0, 3]], [269]], matchfuzzypos(['a b'], 'a b'))
|
||||
call assert_equal([['a b'], [[0, 3]], [269]], matchfuzzypos(['a b'], 'a b'))
|
||||
call assert_equal([['a b'], [[0]], [137]], matchfuzzypos(['a b'], ' a '))
|
||||
call assert_equal([['aaaaaaa'], [[0, 1, 2]], [2880]], matchfuzzypos(['aaaaaaa'], 'aaa'))
|
||||
call assert_equal([['a b'], [[0, 3]], [1670]], matchfuzzypos(['a b'], 'a b'))
|
||||
call assert_equal([['a b'], [[0, 3]], [1670]], matchfuzzypos(['a b'], 'a b'))
|
||||
call assert_equal([['a b'], [[0]], [885]], matchfuzzypos(['a b'], ' a '))
|
||||
call assert_equal([[], [], []], matchfuzzypos(['a b'], ' '))
|
||||
call assert_equal([[], [], []], matchfuzzypos(['world', 'curl'], 'ab'))
|
||||
let x = matchfuzzypos([repeat('a', 256)], repeat('a', 256))
|
||||
call assert_equal(range(256), x[1][0])
|
||||
call assert_equal([[], [], []], matchfuzzypos([repeat('a', 300)], repeat('a', 257)))
|
||||
|
||||
" fuzzy matches limited to 1024 chars
|
||||
let matches = matchfuzzypos([repeat('a', 1030)], repeat('a', 1025))
|
||||
call assert_equal([repeat('a', 1030)], matches[0])
|
||||
call assert_equal(1024, len(matches[1][0]))
|
||||
call assert_equal(1023, matches[1][0][1023])
|
||||
|
||||
call assert_equal([[], [], []], matchfuzzypos([], 'abc'))
|
||||
call assert_equal([[], [], []], matchfuzzypos(['abc'], ''))
|
||||
|
||||
" match in a long string
|
||||
call assert_equal([[repeat('x', 300) .. 'abc'], [[300, 301, 302]], [155]],
|
||||
call assert_equal([[repeat('x', 300) .. 'abc'], [[300, 301, 302]], [500]],
|
||||
\ matchfuzzypos([repeat('x', 300) .. 'abc'], 'abc'))
|
||||
|
||||
" preference for camel case match
|
||||
call assert_equal([['xabcxxaBc'], [[6, 7, 8]], [269]], matchfuzzypos(['xabcxxaBc'], 'abc'))
|
||||
call assert_equal([['xabcxxaBc'], [[7, 8]], [1665]], matchfuzzypos(['xabcxxaBc'], 'bc'))
|
||||
" preference for match after a separator (_ or space)
|
||||
call assert_equal([['xabx_ab'], [[5, 6]], [195]], matchfuzzypos(['xabx_ab'], 'ab'))
|
||||
call assert_equal([['xabx_ab'], [[5, 6]], [1775]], matchfuzzypos(['xabx_ab'], 'ab'))
|
||||
" preference for leading letter match
|
||||
call assert_equal([['abcxabc'], [[0, 1]], [200]], matchfuzzypos(['abcxabc'], 'ab'))
|
||||
call assert_equal([['abcxabc'], [[0, 1]], [1875]], matchfuzzypos(['abcxabc'], 'ab'))
|
||||
" preference for sequential match
|
||||
call assert_equal([['aobncedone'], [[7, 8, 9]], [233]], matchfuzzypos(['aobncedone'], 'one'))
|
||||
call assert_equal([['aobncedone'], [[7, 8, 9]], [1965]], matchfuzzypos(['aobncedone'], 'one'))
|
||||
" best recursive match
|
||||
call assert_equal([['xoone'], [[2, 3, 4]], [243]], matchfuzzypos(['xoone'], 'one'))
|
||||
call assert_equal([['xoone'], [[2, 3, 4]], [1990]], matchfuzzypos(['xoone'], 'one'))
|
||||
|
||||
" match multiple words (separated by space)
|
||||
call assert_equal([['foo bar baz'], [[8, 9, 10, 0, 1, 2]], [519]], ['foo bar baz', 'foo', 'foo bar', 'baz bar']->matchfuzzypos('baz foo'))
|
||||
call assert_equal([['foo bar baz'], [[8, 9, 10, 0, 1, 2]], [5620]], ['foo bar baz', 'foo', 'foo bar', 'baz bar']->matchfuzzypos('baz foo'))
|
||||
call assert_equal([[], [], []], ['foo bar baz', 'foo', 'foo bar', 'baz bar']->matchfuzzypos('baz foo', {'matchseq': 1}))
|
||||
call assert_equal([['foo bar baz'], [[0, 1, 2, 8, 9, 10]], [519]], ['foo bar baz', 'foo', 'foo bar', 'baz bar']->matchfuzzypos('foo baz'))
|
||||
call assert_equal([['foo bar baz'], [[0, 1, 2, 3, 4, 5, 10]], [476]], ['foo bar baz', 'foo', 'foo bar', 'baz bar']->matchfuzzypos('foo baz', {'matchseq': 1}))
|
||||
call assert_equal([['foo bar baz'], [[0, 1, 2, 8, 9, 10]], [5620]], ['foo bar baz', 'foo', 'foo bar', 'baz bar']->matchfuzzypos('foo baz'))
|
||||
call assert_equal([['foo bar baz'], [[0, 1, 2, 3, 4, 9, 10]], [6660]], ['foo bar baz', 'foo', 'foo bar', 'baz bar']->matchfuzzypos('foo baz', {'matchseq': 1}))
|
||||
call assert_equal([[], [], []], ['foo bar baz', 'foo', 'foo bar', 'baz bar']->matchfuzzypos('one two'))
|
||||
call assert_equal([[], [], []], ['foo bar']->matchfuzzypos(" \t "))
|
||||
call assert_equal([['grace'], [[1, 2, 3, 4, 2, 3, 4, 0, 1, 2, 3, 4]], [1057]], ['grace']->matchfuzzypos('race ace grace'))
|
||||
call assert_equal([['grace'], [[1, 2, 3, 4, 2, 3, 4, 0, 1, 2, 3, 4]], [2147483647]], ['grace']->matchfuzzypos('race ace grace'))
|
||||
|
||||
let l = [{'id' : 5, 'val' : 'crayon'}, {'id' : 6, 'val' : 'camera'}]
|
||||
call assert_equal([[{'id' : 6, 'val' : 'camera'}], [[0, 1, 2]], [267]],
|
||||
call assert_equal([[{'id' : 6, 'val' : 'camera'}], [[0, 1, 2]], [2885]],
|
||||
\ matchfuzzypos(l, 'cam', {'text_cb' : {v -> v.val}}))
|
||||
call assert_equal([[{'id' : 6, 'val' : 'camera'}], [[0, 1, 2]], [267]],
|
||||
call assert_equal([[{'id' : 6, 'val' : 'camera'}], [[0, 1, 2]], [2885]],
|
||||
\ matchfuzzypos(l, 'cam', {'key' : 'val'}))
|
||||
call assert_equal([[], [], []], matchfuzzypos(l, 'day', {'text_cb' : {v -> v.val}}))
|
||||
call assert_equal([[], [], []], matchfuzzypos(l, 'day', {'key' : 'val'}))
|
||||
@@ -175,30 +182,17 @@ func Test_matchfuzzypos()
|
||||
" call assert_fails("let x = matchfuzzypos(l, 'foo', {'text_cb' : test_null_function()})", 'E475:')
|
||||
|
||||
" case match
|
||||
call assert_equal([['Match', 'match'], [[0, 1], [0, 1]], [202, 177]], matchfuzzypos(['match', 'Match'], 'Ma'))
|
||||
call assert_equal([['match', 'Match'], [[0, 1], [0, 1]], [202, 177]], matchfuzzypos(['Match', 'match'], 'ma'))
|
||||
" CamelCase has high weight even case match
|
||||
call assert_equal(['MyTestCase', 'mytestcase'], matchfuzzy(['mytestcase', 'MyTestCase'], 'mtc'))
|
||||
call assert_equal(['MyTestCase', 'mytestcase'], matchfuzzy(['MyTestCase', 'mytestcase'], 'mtc'))
|
||||
call assert_equal(['MyTest', 'Mytest', 'mytest', ],matchfuzzy(['Mytest', 'mytest', 'MyTest'], 'MyT'))
|
||||
call assert_equal(['CamelCaseMatchIngAlg', 'camelCaseMatchingAlg', 'camelcasematchingalg'],
|
||||
\ matchfuzzy(['CamelCaseMatchIngAlg', 'camelcasematchingalg', 'camelCaseMatchingAlg'], 'CamelCase'))
|
||||
call assert_equal(['CamelCaseMatchIngAlg', 'camelCaseMatchingAlg', 'camelcasematchingalg'],
|
||||
\ matchfuzzy(['CamelCaseMatchIngAlg', 'camelcasematchingalg', 'camelCaseMatchingAlg'], 'CamelcaseM'))
|
||||
call assert_equal([['Match'], [[0, 1]], [1885]], matchfuzzypos(['match', 'Match'], 'Ma'))
|
||||
call assert_equal([['match', 'Match'], [[0, 1], [0, 1]], [1885, 1885]], matchfuzzypos(['Match', 'match'], 'ma'))
|
||||
|
||||
let l = [{'id' : 5, 'name' : 'foo'}, {'id' : 6, 'name' : []}, {'id' : 7}]
|
||||
call assert_fails("let x = matchfuzzypos(l, 'foo', {'key' : 'name'})", 'E730:')
|
||||
|
||||
" camelcase
|
||||
call assert_equal([['lCursor', 'shCurlyIn', 'shCurlyError', 'TracesCursor', 'Cursor', 'CurSearch', 'CursorLine'], [[1, 2, 3], [2, 3, 4], [2, 3, 4], [6, 7, 8], [0, 1, 2], [0, 1, 2], [0, 1, 2]], [318, 311, 308, 303, 267, 264, 263]],
|
||||
call assert_equal([['Cursor', 'CurSearch', 'CursorLine', 'lCursor', 'shCurlyIn', 'shCurlyError', 'TracesCursor'], [[0, 1, 2], [0, 1, 2], [0, 1, 2], [1, 2, 3], [2, 3, 4], [2, 3, 4], [6, 7, 8]], [2885, 2870, 2865, 2680, 2670, 2655, 2655]],
|
||||
\ matchfuzzypos(['Cursor', 'lCursor', 'shCurlyIn', 'shCurlyError', 'TracesCursor', 'CurSearch', 'CursorLine'], 'Cur'))
|
||||
call assert_equal([['lCursor', 'shCurlyIn', 'shCurlyError', 'TracesCursor', 'Cursor', 'CurSearch', 'CursorLine'], [[1, 2, 3], [2, 3, 4], [2, 3, 4], [6, 7, 8], [0, 1, 2], [0, 1, 2], [0, 1, 2]], [318, 311, 308, 303, 267, 264, 263]],
|
||||
\ matchfuzzypos(['Cursor', 'lCursor', 'shCurlyIn', 'shCurlyError', 'TracesCursor', 'CurSearch', 'CursorLine'], 'Cur', {"camelcase": v:true}))
|
||||
call assert_equal([['Cursor', 'CurSearch', 'CursorLine', 'lCursor', 'shCurlyIn', 'shCurlyError', 'TracesCursor'], [[0, 1, 2], [0, 1, 2], [0, 1, 2], [1, 2, 3], [2, 3, 4], [2, 3, 4], [6, 7, 8]], [267, 264, 263, 246, 239, 236, 231]],
|
||||
\ matchfuzzypos(['Cursor', 'lCursor', 'shCurlyIn', 'shCurlyError', 'TracesCursor', 'CurSearch', 'CursorLine'], 'Cur', {"camelcase": v:false}))
|
||||
call assert_equal([['things', 'sThings', 'thisThings'], [[0, 1, 2, 3], [1, 2, 3, 4], [0, 1, 2, 7]], [333, 287, 279]],
|
||||
\ matchfuzzypos(['things','sThings', 'thisThings'], 'thin', {'camelcase': v:false}))
|
||||
call assert_fails("let x = matchfuzzypos([], 'foo', {'camelcase': []})", 'E475: Invalid value for argument camelcase')
|
||||
call assert_equal([['things', 'sThings', 'thisThings'], [[0, 1, 2, 3], [1, 2, 3, 4], [0, 1, 6, 7]], [3890, 3685, 3670]],
|
||||
\ matchfuzzypos(['things','sThings', 'thisThings'], 'thin'))
|
||||
endfunc
|
||||
|
||||
" Test for matchfuzzy() with multibyte characters
|
||||
@@ -216,9 +210,14 @@ func Test_matchfuzzy_mbyte()
|
||||
call assert_equal(['세 마리의 작은 돼지'], ['세 마리의 작은 돼지', '마리의', '마리의 작은', '작은 돼지']->matchfuzzy('돼지 마리의'))
|
||||
call assert_equal([], ['세 마리의 작은 돼지', '마리의', '마리의 작은', '작은 돼지']->matchfuzzy('파란 하늘'))
|
||||
|
||||
" preference for camel case match
|
||||
" camel case match
|
||||
call assert_equal(['oneąwo', 'oneĄwo'],
|
||||
\ ['oneĄwo', 'oneąwo']->matchfuzzy('oneąwo'))
|
||||
call assert_equal(['oneĄwo'],
|
||||
\ ['oneĄwo', 'oneąwo']->matchfuzzy('Ąwo'))
|
||||
" prefer camelcase when searched first character matches upper case
|
||||
call assert_equal(['oneĄwo', 'oneąwo'],
|
||||
\ ['oneąwo', 'oneĄwo']->matchfuzzy('oneąwo'))
|
||||
\ ['oneĄwo', 'oneąwo']->matchfuzzy('ąw'))
|
||||
" preference for complete match then match after separator (_ or space)
|
||||
call assert_equal(['ⅠⅡabㄟㄠ'] + sort(['ⅠⅡa_bㄟㄠ', 'ⅠⅡa bㄟㄠ']),
|
||||
\ ['ⅠⅡabㄟㄠ', 'ⅠⅡa bㄟㄠ', 'ⅠⅡa_bㄟㄠ']->matchfuzzy('ⅠⅡabㄟㄠ'))
|
||||
@@ -242,41 +241,39 @@ endfunc
|
||||
" Test for matchfuzzypos() with multibyte characters
|
||||
func Test_matchfuzzypos_mbyte()
|
||||
CheckFeature multi_lang
|
||||
call assert_equal([['こんにちは世界'], [[0, 1, 2, 3, 4]], [273]],
|
||||
call assert_equal([['こんにちは世界'], [[0, 1, 2, 3, 4]], [4890]],
|
||||
\ matchfuzzypos(['こんにちは世界'], 'こんにちは'))
|
||||
call assert_equal([['ンヹㄇヺヴ'], [[1, 3]], [88]], matchfuzzypos(['ンヹㄇヺヴ'], 'ヹヺ'))
|
||||
call assert_equal([['ンヹㄇヺヴ'], [[1, 3]], [-20]], matchfuzzypos(['ンヹㄇヺヴ'], 'ヹヺ'))
|
||||
" reverse the order of characters
|
||||
call assert_equal([[], [], []], matchfuzzypos(['ンヹㄇヺヴ'], 'ヺヹ'))
|
||||
call assert_equal([['αβΩxxx', 'xαxβxΩx'], [[0, 1, 2], [1, 3, 5]], [252, 143]],
|
||||
call assert_equal([['αβΩxxx', 'xαxβxΩx'], [[0, 1, 2], [1, 3, 5]], [2885, 670]],
|
||||
\ matchfuzzypos(['αβΩxxx', 'xαxβxΩx'], 'αβΩ'))
|
||||
call assert_equal([['ππbbππ', 'πππbbbπππ', 'ππππbbbbππππ', 'πbπ'],
|
||||
\ [[0, 1], [0, 1], [0, 1], [0, 2]], [176, 173, 170, 135]],
|
||||
\ [[0, 1], [0, 1], [0, 1], [0, 2]], [1880, 1865, 1850, 890]],
|
||||
\ matchfuzzypos(['πbπ', 'ππbbππ', 'πππbbbπππ', 'ππππbbbbππππ'], 'ππ'))
|
||||
call assert_equal([['ααααααα'], [[0, 1, 2]], [216]],
|
||||
call assert_equal([['ααααααα'], [[0, 1, 2]], [2880]],
|
||||
\ matchfuzzypos(['ααααααα'], 'ααα'))
|
||||
|
||||
call assert_equal([[], [], []], matchfuzzypos(['ンヹㄇ', 'ŗŝţ'], 'fffifl'))
|
||||
let x = matchfuzzypos([repeat('Ψ', 256)], repeat('Ψ', 256))
|
||||
call assert_equal(range(256), x[1][0])
|
||||
call assert_equal([[], [], []], matchfuzzypos([repeat('✓', 300)], repeat('✓', 257)))
|
||||
call assert_equal([repeat('✓', 300)], matchfuzzypos([repeat('✓', 300)], repeat('✓', 257))[0])
|
||||
|
||||
" match multiple words (separated by space)
|
||||
call assert_equal([['세 마리의 작은 돼지'], [[9, 10, 2, 3, 4]], [328]], ['세 마리의 작은 돼지', '마리의', '마리의 작은', '작은 돼지']->matchfuzzypos('돼지 마리의'))
|
||||
call assert_equal([['세 마리의 작은 돼지'], [[9, 10, 2, 3, 4]], [4515]], ['세 마리의 작은 돼지', '마리의', '마리의 작은', '작은 돼지']->matchfuzzypos('돼지 마리의'))
|
||||
call assert_equal([[], [], []], ['세 마리의 작은 돼지', '마리의', '마리의 작은', '작은 돼지']->matchfuzzypos('파란 하늘'))
|
||||
|
||||
" match in a long string
|
||||
call assert_equal([[repeat('ぶ', 300) .. 'ẼẼẼ'], [[300, 301, 302]], [105]],
|
||||
call assert_equal([[repeat('ぶ', 300) .. 'ẼẼẼ'], [[300, 301, 302]], [500]],
|
||||
\ matchfuzzypos([repeat('ぶ', 300) .. 'ẼẼẼ'], 'ẼẼẼ'))
|
||||
" preference for camel case match
|
||||
call assert_equal([['xѳѵҁxxѳѴҁ'], [[6, 7, 8]], [219]], matchfuzzypos(['xѳѵҁxxѳѴҁ'], 'ѳѵҁ'))
|
||||
" preference for match after a separator (_ or space)
|
||||
call assert_equal([['xちだx_ちだ'], [[5, 6]], [145]], matchfuzzypos(['xちだx_ちだ'], 'ちだ'))
|
||||
call assert_equal([['xちだx_ちだ'], [[5, 6]], [1775]], matchfuzzypos(['xちだx_ちだ'], 'ちだ'))
|
||||
" preference for leading letter match
|
||||
call assert_equal([['ѳѵҁxѳѵҁ'], [[0, 1]], [150]], matchfuzzypos(['ѳѵҁxѳѵҁ'], 'ѳѵ'))
|
||||
call assert_equal([['ѳѵҁxѳѵҁ'], [[0, 1]], [1875]], matchfuzzypos(['ѳѵҁxѳѵҁ'], 'ѳѵ'))
|
||||
" preference for sequential match
|
||||
call assert_equal([['aンbヹcㄇdンヹㄇ'], [[7, 8, 9]], [158]], matchfuzzypos(['aンbヹcㄇdンヹㄇ'], 'ンヹㄇ'))
|
||||
call assert_equal([['aンbヹcㄇdンヹㄇ'], [[7, 8, 9]], [1965]], matchfuzzypos(['aンbヹcㄇdンヹㄇ'], 'ンヹㄇ'))
|
||||
" best recursive match
|
||||
call assert_equal([['xффйд'], [[2, 3, 4]], [168]], matchfuzzypos(['xффйд'], 'фйд'))
|
||||
call assert_equal([['xффйд'], [[2, 3, 4]], [1990]], matchfuzzypos(['xффйд'], 'фйд'))
|
||||
endfunc
|
||||
|
||||
" Test for matchfuzzy() with limit
|
||||
|
Reference in New Issue
Block a user