mirror of
				https://github.com/go-gitea/gitea.git
				synced 2025-11-04 01:34:27 +00:00 
			
		
		
		
	Use weighted algorithm for string matching when finding files in repo (#21370)
This PR is for: * https://github.com/go-gitea/gitea/issues/20231 Now, when a user searches `word`, they always see `/{word}.txt` before `/{w}e-g{o}t-{r}esult.{d}at` Demo: When searching "a", "a.ext" comes first. Then when searching "at", the longer matched "template" comes first. <details>   </details> This PR also makes the frontend tests could import feature JS files by introducing `jestSetup.js` Co-authored-by: delvh <dev.lh@web.de> Co-authored-by: silverwind <me@silverwind.io>
This commit is contained in:
		@@ -1,3 +1,4 @@
 | 
				
			|||||||
 | 
					// to run tests with ES6 module, node must run with "--experimental-vm-modules", or see Makefile's "test-frontend" for reference
 | 
				
			||||||
export default {
 | 
					export default {
 | 
				
			||||||
  rootDir: 'web_src',
 | 
					  rootDir: 'web_src',
 | 
				
			||||||
  setupFilesAfterEnv: ['jest-extended/all'],
 | 
					  setupFilesAfterEnv: ['jest-extended/all'],
 | 
				
			||||||
@@ -7,6 +8,8 @@ export default {
 | 
				
			|||||||
  transform: {
 | 
					  transform: {
 | 
				
			||||||
    '\\.svg$': '<rootDir>/js/testUtils/jestRawLoader.js',
 | 
					    '\\.svg$': '<rootDir>/js/testUtils/jestRawLoader.js',
 | 
				
			||||||
  },
 | 
					  },
 | 
				
			||||||
 | 
					  setupFiles: [
 | 
				
			||||||
 | 
					    './js/testUtils/jestSetup.js', // prepare global variables used by our code (eg: window.config)
 | 
				
			||||||
 | 
					  ],
 | 
				
			||||||
  verbose: false,
 | 
					  verbose: false,
 | 
				
			||||||
};
 | 
					};
 | 
				
			||||||
 | 
					 | 
				
			||||||
 
 | 
				
			|||||||
@@ -1,45 +1,96 @@
 | 
				
			|||||||
import $ from 'jquery';
 | 
					import $ from 'jquery';
 | 
				
			||||||
 | 
					
 | 
				
			||||||
import {svg} from '../svg.js';
 | 
					import {svg} from '../svg.js';
 | 
				
			||||||
import {strSubMatch} from '../utils.js';
 | 
					 | 
				
			||||||
const {csrf} = window.config;
 | 
					const {csrf} = window.config;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
const threshold = 50;
 | 
					const threshold = 50;
 | 
				
			||||||
let files = [];
 | 
					let files = [];
 | 
				
			||||||
let $repoFindFileInput, $repoFindFileTableBody, $repoFindFileNoResult;
 | 
					let $repoFindFileInput, $repoFindFileTableBody, $repoFindFileNoResult;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					// return the case-insensitive sub-match result as an array:  [unmatched, matched, unmatched, matched, ...]
 | 
				
			||||||
 | 
					// res[even] is unmatched, res[odd] is matched, see unit tests for examples
 | 
				
			||||||
 | 
					// argument subLower must be a lower-cased string.
 | 
				
			||||||
 | 
					export function strSubMatch(full, subLower) {
 | 
				
			||||||
 | 
					  const res = [''];
 | 
				
			||||||
 | 
					  let i = 0, j = 0;
 | 
				
			||||||
 | 
					  const fullLower = full.toLowerCase();
 | 
				
			||||||
 | 
					  while (i < subLower.length && j < fullLower.length) {
 | 
				
			||||||
 | 
					    if (subLower[i] === fullLower[j]) {
 | 
				
			||||||
 | 
					      if (res.length % 2 !== 0) res.push('');
 | 
				
			||||||
 | 
					      res[res.length - 1] += full[j];
 | 
				
			||||||
 | 
					      j++;
 | 
				
			||||||
 | 
					      i++;
 | 
				
			||||||
 | 
					    } else {
 | 
				
			||||||
 | 
					      if (res.length % 2 === 0) res.push('');
 | 
				
			||||||
 | 
					      res[res.length - 1] += full[j];
 | 
				
			||||||
 | 
					      j++;
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					  if (i !== subLower.length) {
 | 
				
			||||||
 | 
					    // if the sub string doesn't match the full, only return the full as unmatched.
 | 
				
			||||||
 | 
					    return [full];
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					  if (j < full.length) {
 | 
				
			||||||
 | 
					    // append remaining chars from full to result as unmatched
 | 
				
			||||||
 | 
					    if (res.length % 2 === 0) res.push('');
 | 
				
			||||||
 | 
					    res[res.length - 1] += full.substring(j);
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					  return res;
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					export function calcMatchedWeight(matchResult) {
 | 
				
			||||||
 | 
					  let weight = 0;
 | 
				
			||||||
 | 
					  for (let i = 0; i < matchResult.length; i++) {
 | 
				
			||||||
 | 
					    if (i % 2 === 1) { // matches are on odd indices, see strSubMatch
 | 
				
			||||||
 | 
					      // use a function f(x+x) > f(x) + f(x) to make the longer matched string has higher weight.
 | 
				
			||||||
 | 
					      weight += matchResult[i].length * matchResult[i].length;
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					  return weight;
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					export function filterRepoFilesWeighted(files, filter) {
 | 
				
			||||||
 | 
					  let filterResult = [];
 | 
				
			||||||
 | 
					  if (filter) {
 | 
				
			||||||
 | 
					    const filterLower = filter.toLowerCase();
 | 
				
			||||||
 | 
					    // TODO: for large repo, this loop could be slow, maybe there could be one more limit:
 | 
				
			||||||
 | 
					    // ... && filterResult.length < threshold * 20,  wait for more feedbacks
 | 
				
			||||||
 | 
					    for (let i = 0; i < files.length; i++) {
 | 
				
			||||||
 | 
					      const res = strSubMatch(files[i], filterLower);
 | 
				
			||||||
 | 
					      if (res.length > 1) { // length==1 means unmatched, >1 means having matched sub strings
 | 
				
			||||||
 | 
					        filterResult.push({matchResult: res, matchWeight: calcMatchedWeight(res)});
 | 
				
			||||||
 | 
					      }
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					    filterResult.sort((a, b) => b.matchWeight - a.matchWeight);
 | 
				
			||||||
 | 
					    filterResult = filterResult.slice(0, threshold);
 | 
				
			||||||
 | 
					  } else {
 | 
				
			||||||
 | 
					    for (let i = 0; i < files.length && i < threshold; i++) {
 | 
				
			||||||
 | 
					      filterResult.push({matchResult: [files[i]], matchWeight: 0});
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					  return filterResult;
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
function filterRepoFiles(filter) {
 | 
					function filterRepoFiles(filter) {
 | 
				
			||||||
  const treeLink = $repoFindFileInput.attr('data-url-tree-link');
 | 
					  const treeLink = $repoFindFileInput.attr('data-url-tree-link');
 | 
				
			||||||
  $repoFindFileTableBody.empty();
 | 
					  $repoFindFileTableBody.empty();
 | 
				
			||||||
 | 
					
 | 
				
			||||||
  const fileRes = [];
 | 
					  const filterResult = filterRepoFilesWeighted(files, filter);
 | 
				
			||||||
  if (filter) {
 | 
					 | 
				
			||||||
    for (let i = 0; i < files.length && fileRes.length < threshold; i++) {
 | 
					 | 
				
			||||||
      const subMatch = strSubMatch(files[i], filter);
 | 
					 | 
				
			||||||
      if (subMatch.length > 1) {
 | 
					 | 
				
			||||||
        fileRes.push(subMatch);
 | 
					 | 
				
			||||||
      }
 | 
					 | 
				
			||||||
    }
 | 
					 | 
				
			||||||
  } else {
 | 
					 | 
				
			||||||
    for (let i = 0; i < files.length && i < threshold; i++) {
 | 
					 | 
				
			||||||
      fileRes.push([files[i]]);
 | 
					 | 
				
			||||||
    }
 | 
					 | 
				
			||||||
  }
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
  const tmplRow = `<tr><td><a></a></td></tr>`;
 | 
					  const tmplRow = `<tr><td><a></a></td></tr>`;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
  $repoFindFileNoResult.toggle(fileRes.length === 0);
 | 
					  $repoFindFileNoResult.toggle(filterResult.length === 0);
 | 
				
			||||||
  for (const matchRes of fileRes) {
 | 
					  for (const r of filterResult) {
 | 
				
			||||||
    const $row = $(tmplRow);
 | 
					    const $row = $(tmplRow);
 | 
				
			||||||
    const $a = $row.find('a');
 | 
					    const $a = $row.find('a');
 | 
				
			||||||
    $a.attr('href', `${treeLink}/${matchRes.join('')}`);
 | 
					    $a.attr('href', `${treeLink}/${r.matchResult.join('')}`);
 | 
				
			||||||
    const $octiconFile = $(svg('octicon-file')).addClass('mr-3');
 | 
					    const $octiconFile = $(svg('octicon-file')).addClass('mr-3');
 | 
				
			||||||
    $a.append($octiconFile);
 | 
					    $a.append($octiconFile);
 | 
				
			||||||
    // if the target file path is "abc/xyz", to search "bx", then the matchRes is ['a', 'b', 'c/', 'x', 'yz']
 | 
					    // if the target file path is "abc/xyz", to search "bx", then the matchResult is ['a', 'b', 'c/', 'x', 'yz']
 | 
				
			||||||
    // the matchRes[odd] is matched and highlighted to red.
 | 
					    // the matchResult[odd] is matched and highlighted to red.
 | 
				
			||||||
    for (let j = 0; j < matchRes.length; j++) {
 | 
					    for (let j = 0; j < r.matchResult.length; j++) {
 | 
				
			||||||
      if (!matchRes[j]) continue;
 | 
					      if (!r.matchResult[j]) continue;
 | 
				
			||||||
      const $span = $('<span>').text(matchRes[j]);
 | 
					      const $span = $('<span>').text(r.matchResult[j]);
 | 
				
			||||||
      if (j % 2 === 1) $span.addClass('ui text red');
 | 
					      if (j % 2 === 1) $span.addClass('ui text red');
 | 
				
			||||||
      $a.append($span);
 | 
					      $a.append($span);
 | 
				
			||||||
    }
 | 
					    }
 | 
				
			||||||
 
 | 
				
			|||||||
							
								
								
									
										34
									
								
								web_src/js/features/repo-findfile.test.js
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										34
									
								
								web_src/js/features/repo-findfile.test.js
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,34 @@
 | 
				
			|||||||
 | 
					import {strSubMatch, calcMatchedWeight, filterRepoFilesWeighted} from './repo-findfile.js';
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					describe('Repo Find Files', () => {
 | 
				
			||||||
 | 
					  test('strSubMatch', () => {
 | 
				
			||||||
 | 
					    expect(strSubMatch('abc', '')).toEqual(['abc']);
 | 
				
			||||||
 | 
					    expect(strSubMatch('abc', 'a')).toEqual(['', 'a', 'bc']);
 | 
				
			||||||
 | 
					    expect(strSubMatch('abc', 'b')).toEqual(['a', 'b', 'c']);
 | 
				
			||||||
 | 
					    expect(strSubMatch('abc', 'c')).toEqual(['ab', 'c']);
 | 
				
			||||||
 | 
					    expect(strSubMatch('abc', 'ac')).toEqual(['', 'a', 'b', 'c']);
 | 
				
			||||||
 | 
					    expect(strSubMatch('abc', 'z')).toEqual(['abc']);
 | 
				
			||||||
 | 
					    expect(strSubMatch('abc', 'az')).toEqual(['abc']);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    expect(strSubMatch('ABc', 'ac')).toEqual(['', 'A', 'B', 'c']);
 | 
				
			||||||
 | 
					    expect(strSubMatch('abC', 'ac')).toEqual(['', 'a', 'b', 'C']);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    expect(strSubMatch('aabbcc', 'abc')).toEqual(['', 'a', 'a', 'b', 'b', 'c', 'c']);
 | 
				
			||||||
 | 
					    expect(strSubMatch('the/directory', 'hedir')).toEqual(['t', 'he', '/', 'dir', 'ectory']);
 | 
				
			||||||
 | 
					  });
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  test('calcMatchedWeight', () => {
 | 
				
			||||||
 | 
					    expect(calcMatchedWeight(['a', 'b', 'c', 'd']) < calcMatchedWeight(['a', 'bc', 'c'])).toBeTruthy();
 | 
				
			||||||
 | 
					  });
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  test('filterRepoFilesWeighted', () => {
 | 
				
			||||||
 | 
					    // the first matched result should always be the "word.txt"
 | 
				
			||||||
 | 
					    let res = filterRepoFilesWeighted(['word.txt', 'we-got-result.dat'], 'word');
 | 
				
			||||||
 | 
					    expect(res).toHaveLength(2);
 | 
				
			||||||
 | 
					    expect(res[0].matchResult).toEqual(['', 'word', '.txt']);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    res = filterRepoFilesWeighted(['we-got-result.dat', 'word.txt'], 'word');
 | 
				
			||||||
 | 
					    expect(res).toHaveLength(2);
 | 
				
			||||||
 | 
					    expect(res[0].matchResult).toEqual(['', 'word', '.txt']);
 | 
				
			||||||
 | 
					  });
 | 
				
			||||||
 | 
					});
 | 
				
			||||||
							
								
								
									
										5
									
								
								web_src/js/testUtils/jestSetup.js
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										5
									
								
								web_src/js/testUtils/jestSetup.js
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,5 @@
 | 
				
			|||||||
 | 
					window.config = {
 | 
				
			||||||
 | 
					  csrfToken: 'jest-test-csrf-token-123456',
 | 
				
			||||||
 | 
					  pageData: {},
 | 
				
			||||||
 | 
					  i18n: {},
 | 
				
			||||||
 | 
					};
 | 
				
			||||||
@@ -59,36 +59,6 @@ export function parseIssueHref(href) {
 | 
				
			|||||||
  return {owner, repo, type, index};
 | 
					  return {owner, repo, type, index};
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
// return the sub-match result as an array:  [unmatched, matched, unmatched, matched, ...]
 | 
					 | 
				
			||||||
// res[even] is unmatched, res[odd] is matched, see unit tests for examples
 | 
					 | 
				
			||||||
export function strSubMatch(full, sub) {
 | 
					 | 
				
			||||||
  const res = [''];
 | 
					 | 
				
			||||||
  let i = 0, j = 0;
 | 
					 | 
				
			||||||
  const subLower = sub.toLowerCase(), fullLower = full.toLowerCase();
 | 
					 | 
				
			||||||
  while (i < subLower.length && j < fullLower.length) {
 | 
					 | 
				
			||||||
    if (subLower[i] === fullLower[j]) {
 | 
					 | 
				
			||||||
      if (res.length % 2 !== 0) res.push('');
 | 
					 | 
				
			||||||
      res[res.length - 1] += full[j];
 | 
					 | 
				
			||||||
      j++;
 | 
					 | 
				
			||||||
      i++;
 | 
					 | 
				
			||||||
    } else {
 | 
					 | 
				
			||||||
      if (res.length % 2 === 0) res.push('');
 | 
					 | 
				
			||||||
      res[res.length - 1] += full[j];
 | 
					 | 
				
			||||||
      j++;
 | 
					 | 
				
			||||||
    }
 | 
					 | 
				
			||||||
  }
 | 
					 | 
				
			||||||
  if (i !== sub.length) {
 | 
					 | 
				
			||||||
    // if the sub string doesn't match the full, only return the full as unmatched.
 | 
					 | 
				
			||||||
    return [full];
 | 
					 | 
				
			||||||
  }
 | 
					 | 
				
			||||||
  if (j < full.length) {
 | 
					 | 
				
			||||||
    // append remaining chars from full to result as unmatched
 | 
					 | 
				
			||||||
    if (res.length % 2 === 0) res.push('');
 | 
					 | 
				
			||||||
    res[res.length - 1] += full.substring(j);
 | 
					 | 
				
			||||||
  }
 | 
					 | 
				
			||||||
  return res;
 | 
					 | 
				
			||||||
}
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
// pretty-print a number using locale-specific separators, e.g. 1200 -> 1,200
 | 
					// pretty-print a number using locale-specific separators, e.g. 1200 -> 1,200
 | 
				
			||||||
export function prettyNumber(num, locale = 'en-US') {
 | 
					export function prettyNumber(num, locale = 'en-US') {
 | 
				
			||||||
  if (typeof num !== 'number') return '';
 | 
					  if (typeof num !== 'number') return '';
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -1,5 +1,5 @@
 | 
				
			|||||||
import {
 | 
					import {
 | 
				
			||||||
  basename, extname, isObject, uniq, stripTags, joinPaths, parseIssueHref, strSubMatch,
 | 
					  basename, extname, isObject, uniq, stripTags, joinPaths, parseIssueHref,
 | 
				
			||||||
  prettyNumber, parseUrl,
 | 
					  prettyNumber, parseUrl,
 | 
				
			||||||
} from './utils.js';
 | 
					} from './utils.js';
 | 
				
			||||||
 | 
					
 | 
				
			||||||
@@ -86,22 +86,6 @@ test('parseIssueHref', () => {
 | 
				
			|||||||
  expect(parseIssueHref('')).toEqual({owner: undefined, repo: undefined, type: undefined, index: undefined});
 | 
					  expect(parseIssueHref('')).toEqual({owner: undefined, repo: undefined, type: undefined, index: undefined});
 | 
				
			||||||
});
 | 
					});
 | 
				
			||||||
 | 
					
 | 
				
			||||||
test('strSubMatch', () => {
 | 
					 | 
				
			||||||
  expect(strSubMatch('abc', '')).toEqual(['abc']);
 | 
					 | 
				
			||||||
  expect(strSubMatch('abc', 'a')).toEqual(['', 'a', 'bc']);
 | 
					 | 
				
			||||||
  expect(strSubMatch('abc', 'b')).toEqual(['a', 'b', 'c']);
 | 
					 | 
				
			||||||
  expect(strSubMatch('abc', 'c')).toEqual(['ab', 'c']);
 | 
					 | 
				
			||||||
  expect(strSubMatch('abc', 'ac')).toEqual(['', 'a', 'b', 'c']);
 | 
					 | 
				
			||||||
  expect(strSubMatch('abc', 'z')).toEqual(['abc']);
 | 
					 | 
				
			||||||
  expect(strSubMatch('abc', 'az')).toEqual(['abc']);
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
  expect(strSubMatch('abc', 'aC')).toEqual(['', 'a', 'b', 'c']);
 | 
					 | 
				
			||||||
  expect(strSubMatch('abC', 'ac')).toEqual(['', 'a', 'b', 'C']);
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
  expect(strSubMatch('aabbcc', 'abc')).toEqual(['', 'a', 'a', 'b', 'b', 'c', 'c']);
 | 
					 | 
				
			||||||
  expect(strSubMatch('the/directory', 'hedir')).toEqual(['t', 'he', '/', 'dir', 'ectory']);
 | 
					 | 
				
			||||||
});
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
test('prettyNumber', () => {
 | 
					test('prettyNumber', () => {
 | 
				
			||||||
  expect(prettyNumber()).toEqual('');
 | 
					  expect(prettyNumber()).toEqual('');
 | 
				
			||||||
  expect(prettyNumber(null)).toEqual('');
 | 
					  expect(prettyNumber(null)).toEqual('');
 | 
				
			||||||
 
 | 
				
			|||||||
		Reference in New Issue
	
	Block a user