vim-patch:8.0.0190

Problem:    Detecting duplicate tags uses a slow linear search.
Solution:   Use a much faster hash table solution. (James McCoy, closes vim/vim#1046)
            But don't add hi_keylen, it makes hash tables 50% bigger.

810f9c361c
This commit is contained in:
James McCoy
2017-03-19 13:24:45 -04:00
parent 934137fb48
commit 058516aaf9

View File

@@ -73,9 +73,9 @@ typedef struct {
} pat_T; } pat_T;
/* /*
* The matching tags are first stored in ga_match[]. In which one depends on * The matching tags are first stored in one of the ht_match[] hash tables. In
* the priority of the match. * which one depends on the priority of the match.
* At the end, the matches from ga_match[] are concatenated, to make a list * At the end, the matches from ht_match[] are concatenated, to make a list
* sorted on priority. * sorted on priority.
*/ */
#define MT_ST_CUR 0 /* static match in current file */ #define MT_ST_CUR 0 /* static match in current file */
@@ -1122,11 +1122,9 @@ find_tags (
int save_emsg_off; int save_emsg_off;
struct match_found { char_u *mfp;
int len; /* nr of chars of match[] to be compared */ hashtab_T ht_match[MT_COUNT];
char_u match[1]; /* actually longer */ hash_T hash = 0;
} *mfp, *mfp2;
garray_T ga_match[MT_COUNT];
int match_count = 0; /* number of matches found */ int match_count = 0; /* number of matches found */
char_u **matches; char_u **matches;
int mtt; int mtt;
@@ -1186,7 +1184,7 @@ find_tags (
lbuf = xmalloc(lbuf_size); lbuf = xmalloc(lbuf_size);
tag_fname = xmalloc(MAXPATHL + 1); tag_fname = xmalloc(MAXPATHL + 1);
for (mtt = 0; mtt < MT_COUNT; ++mtt) for (mtt = 0; mtt < MT_COUNT; ++mtt)
ga_init(&ga_match[mtt], (int)sizeof(struct match_found *), 100); hash_init(&ht_match[mtt]);
STRCPY(tag_fname, "from cscope"); /* for error messages */ STRCPY(tag_fname, "from cscope"); /* for error messages */
@@ -1752,9 +1750,11 @@ parse_line:
} }
/* /*
* If a match is found, add it to ga_match[]. * If a match is found, add it to ht_match[].
*/ */
if (match) { if (match) {
int len = 0;
if (use_cscope) { if (use_cscope) {
/* Don't change the ordering, always use the same table. */ /* Don't change the ordering, always use the same table. */
mtt = MT_GL_OTH; mtt = MT_GL_OTH;
@@ -1790,116 +1790,102 @@ parse_line:
} }
/* /*
* Add the found match in ga_match[mtt], avoiding duplicates. * Add the found match in ht_match[mtt].
* Store the info we need later, which depends on the kind of * Store the info we need later, which depends on the kind of
* tags we are dealing with. * tags we are dealing with.
*/ */
ga_grow(&ga_match[mtt], 1); if (help_only) {
{
int len;
if (help_only) {
# define ML_EXTRA 3 # define ML_EXTRA 3
/* // Append the help-heuristic number after the tagname, for
* Append the help-heuristic number after the // sorting it later. The heuristic is ignored for
* tagname, for sorting it later. // detecting duplicates.
*/ // The format is {tagname}@{lang}NUL{heuristic}NUL
*tagp.tagname_end = NUL; *tagp.tagname_end = NUL;
len = (int)(tagp.tagname_end - tagp.tagname); len = (int)(tagp.tagname_end - tagp.tagname);
mfp = xmalloc(sizeof(struct match_found) + len + 10 + ML_EXTRA); mfp = xmalloc(sizeof(char_u) + len + 10 + ML_EXTRA + 1);
/* "len" includes the language and the NUL, but
* not the priority. */
mfp->len = len + ML_EXTRA + 1;
#define ML_HELP_LEN 6
p = mfp->match;
STRCPY(p, tagp.tagname);
p[len] = '@';
STRCPY(p + len + 1, help_lang);
sprintf((char *)p + len + 1 + ML_EXTRA, "%06d",
help_heuristic(tagp.tagname,
match_re ? matchoff : 0, !match_no_ic)
+ help_pri
);
*tagp.tagname_end = TAB; p = mfp;
} else if (name_only) { STRCPY(p, tagp.tagname);
if (get_it_again) { p[len] = '@';
char_u *temp_end = tagp.command; STRCPY(p + len + 1, help_lang);
sprintf((char *)p + len + 1 + ML_EXTRA, "%06d",
help_heuristic(tagp.tagname,
match_re ? matchoff : 0, !match_no_ic)
+ help_pri);
if (*temp_end == '/') *tagp.tagname_end = TAB;
while (*temp_end && *temp_end != '\r' } else if (name_only) {
&& *temp_end != '\n' if (get_it_again) {
&& *temp_end != '$') char_u *temp_end = tagp.command;
temp_end++;
if (tagp.command + 2 < temp_end) { if (*temp_end == '/')
len = (int)(temp_end - tagp.command - 2); while (*temp_end && *temp_end != '\r'
mfp = xmalloc(sizeof(struct match_found) + len); && *temp_end != '\n'
mfp->len = len + 1; /* include the NUL */ && *temp_end != '$')
p = mfp->match; temp_end++;
STRLCPY(p, tagp.command + 2, len + 1);
} else
mfp = NULL;
get_it_again = FALSE;
} else {
len = (int)(tagp.tagname_end - tagp.tagname);
mfp = xmalloc(sizeof(struct match_found) + len);
mfp->len = len + 1; /* include the NUL */
p = mfp->match;
STRLCPY(p, tagp.tagname, len + 1);
/* if wanted, re-read line to get long form too */ if (tagp.command + 2 < temp_end) {
if (State & INSERT) len = (int)(temp_end - tagp.command - 2);
get_it_again = p_sft; mfp = xmalloc(sizeof(char_u) + len + 1);
} STRLCPY(mfp, tagp.command + 2, len + 1);
} else {
/* Save the tag in a buffer.
* Emacs tag: <mtt><tag_fname><NUL><ebuf><NUL><lbuf>
* other tag: <mtt><tag_fname><NUL><NUL><lbuf>
* without Emacs tags: <mtt><tag_fname><NUL><lbuf>
*/
len = (int)STRLEN(tag_fname)
+ (int)STRLEN(lbuf) + 3;
mfp = xmalloc(sizeof(struct match_found) + len);
mfp->len = len;
p = mfp->match;
p[0] = mtt;
STRCPY(p + 1, tag_fname);
#ifdef BACKSLASH_IN_FILENAME
/* Ignore differences in slashes, avoid adding
* both path/file and path\file. */
slash_adjust(p + 1);
#endif
s = p + 1 + STRLEN(tag_fname) + 1;
STRCPY(s, lbuf);
}
if (mfp != NULL) {
/*
* Don't add identical matches.
* This can take a lot of time when finding many
* matches, check for CTRL-C now and then.
* Add all cscope tags, because they are all listed.
*/
if (use_cscope)
i = -1;
else
for (i = ga_match[mtt].ga_len; --i >= 0 && !got_int; ) {
mfp2 = ((struct match_found **)
(ga_match[mtt].ga_data))[i];
if (mfp2->len == mfp->len
&& memcmp(mfp2->match, mfp->match,
(size_t)mfp->len) == 0)
break;
fast_breakcheck();
}
if (i < 0) {
((struct match_found **)(ga_match[mtt].ga_data))
[ga_match[mtt].ga_len++] = mfp;
++match_count;
} else } else
xfree(mfp); mfp = NULL;
get_it_again = FALSE;
} else {
len = (int)(tagp.tagname_end - tagp.tagname);
mfp = xmalloc(sizeof(char_u) + len + 1);
STRLCPY(mfp, tagp.tagname, len + 1);
/* if wanted, re-read line to get long form too */
if (State & INSERT)
get_it_again = p_sft;
} }
} else {
#define TAG_SEP 0x01
size_t tag_fname_len = STRLEN(tag_fname);
/* Save the tag in a buffer.
* Use 0x01 to separate fields (Can't use NUL, because the
* hash key is terminated by NUL).
* Emacs tag: <mtt><tag_fname><NUL><ebuf><NUL><lbuf>
* other tag: <mtt><tag_fname><NUL><NUL><lbuf>
* without Emacs tags: <mtt><tag_fname><NUL><lbuf>
*/
len = (int)tag_fname_len + (int)STRLEN(lbuf) + 3;
mfp = xmalloc(sizeof(char_u) + len + 1);
p = mfp;
p[0] = mtt;
STRCPY(p + 1, tag_fname);
#ifdef BACKSLASH_IN_FILENAME
/* Ignore differences in slashes, avoid adding
* both path/file and path\file. */
slash_adjust(p + 1);
#endif
p[tag_fname_len + 1] = TAG_SEP;
s = p + 1 + tag_fname_len + 1;
STRCPY(s, lbuf);
}
if (mfp != NULL) {
hashitem_T *hi;
// Don't add identical matches.
// Add all cscope tags, because they are all listed.
// "mfp" is used as a hash key, there is a NUL byte to end
// the part matters for comparing, more bytes may follow
// after it. E.g. help tags store the priority after the
// NUL.
if (use_cscope)
hash++;
else
hash = hash_hash(mfp);
hi = hash_lookup(&ht_match[mtt], (const char *)mfp,
STRLEN(mfp), hash);
if (HASHITEM_EMPTY(hi)) {
hash_add_item(&ht_match[mtt], hi, mfp, hash);
++match_count;
} else
// duplicate tag, drop it
xfree(mfp);
} }
} }
if (use_cscope && eof) if (use_cscope && eof)
@@ -1962,7 +1948,7 @@ findtag_end:
xfree(tag_fname); xfree(tag_fname);
/* /*
* Move the matches from the ga_match[] arrays into one list of * Move the matches from the ht_match[] arrays into one list of
* matches. When retval == FAIL, free the matches. * matches. When retval == FAIL, free the matches.
*/ */
if (retval == FAIL) if (retval == FAIL)
@@ -1974,20 +1960,27 @@ findtag_end:
matches = NULL; matches = NULL;
match_count = 0; match_count = 0;
for (mtt = 0; mtt < MT_COUNT; ++mtt) { for (mtt = 0; mtt < MT_COUNT; ++mtt) {
for (int i = 0; i < ga_match[mtt].ga_len; ++i) { hashitem_T *hi;
mfp = ((struct match_found **)(ga_match[mtt].ga_data))[i]; long todo = (long)ht_match[mtt].ht_used;
if (matches == NULL)
xfree(mfp); for (hi = ht_match[mtt].ht_array; todo > 0; hi++) {
else { if (!HASHITEM_EMPTY(hi)) {
/* To avoid allocating memory again we turn the struct mfp = hi->hi_key;
* match_found into a string. For help the priority was not if (matches == NULL) {
* included in the length. */ xfree(mfp);
memmove(mfp, mfp->match, } else {
(size_t)(mfp->len + (help_only ? ML_HELP_LEN : 0))); // now change the TAG_SEP back to NUL
matches[match_count++] = (char_u *)mfp; for (p = mfp; *p != NUL; p++) {
if (*p == TAG_SEP) {
*p = NUL;
}
}
matches[match_count++] = (char_u *)mfp;
}
todo--;
} }
} }
ga_clear(&ga_match[mtt]); hash_clear(&ht_match[mtt]);
} }
*matchesp = matches; *matchesp = matches;