mirror of
				https://github.com/go-gitea/gitea.git
				synced 2025-10-26 12:27:06 +00:00 
			
		
		
		
	Use weighted algorithm for string matching when finding files in repo (#21370)
This PR is for: * https://github.com/go-gitea/gitea/issues/20231 Now, when a user searches `word`, they always see `/{word}.txt` before `/{w}e-g{o}t-{r}esult.{d}at` Demo: When searching "a", "a.ext" comes first. Then when searching "at", the longer matched "template" comes first. <details>   </details> This PR also makes the frontend tests could import feature JS files by introducing `jestSetup.js` Co-authored-by: delvh <dev.lh@web.de> Co-authored-by: silverwind <me@silverwind.io>
This commit is contained in:
		| @@ -1,3 +1,4 @@ | |||||||
|  | // to run tests with ES6 module, node must run with "--experimental-vm-modules", or see Makefile's "test-frontend" for reference | ||||||
| export default { | export default { | ||||||
|   rootDir: 'web_src', |   rootDir: 'web_src', | ||||||
|   setupFilesAfterEnv: ['jest-extended/all'], |   setupFilesAfterEnv: ['jest-extended/all'], | ||||||
| @@ -7,6 +8,8 @@ export default { | |||||||
|   transform: { |   transform: { | ||||||
|     '\\.svg$': '<rootDir>/js/testUtils/jestRawLoader.js', |     '\\.svg$': '<rootDir>/js/testUtils/jestRawLoader.js', | ||||||
|   }, |   }, | ||||||
|  |   setupFiles: [ | ||||||
|  |     './js/testUtils/jestSetup.js', // prepare global variables used by our code (eg: window.config) | ||||||
|  |   ], | ||||||
|   verbose: false, |   verbose: false, | ||||||
| }; | }; | ||||||
|  |  | ||||||
|   | |||||||
| @@ -1,45 +1,96 @@ | |||||||
| import $ from 'jquery'; | import $ from 'jquery'; | ||||||
|  |  | ||||||
| import {svg} from '../svg.js'; | import {svg} from '../svg.js'; | ||||||
| import {strSubMatch} from '../utils.js'; |  | ||||||
| const {csrf} = window.config; | const {csrf} = window.config; | ||||||
|  |  | ||||||
| const threshold = 50; | const threshold = 50; | ||||||
| let files = []; | let files = []; | ||||||
| let $repoFindFileInput, $repoFindFileTableBody, $repoFindFileNoResult; | let $repoFindFileInput, $repoFindFileTableBody, $repoFindFileNoResult; | ||||||
|  |  | ||||||
|  |  | ||||||
|  | // return the case-insensitive sub-match result as an array:  [unmatched, matched, unmatched, matched, ...] | ||||||
|  | // res[even] is unmatched, res[odd] is matched, see unit tests for examples | ||||||
|  | // argument subLower must be a lower-cased string. | ||||||
|  | export function strSubMatch(full, subLower) { | ||||||
|  |   const res = ['']; | ||||||
|  |   let i = 0, j = 0; | ||||||
|  |   const fullLower = full.toLowerCase(); | ||||||
|  |   while (i < subLower.length && j < fullLower.length) { | ||||||
|  |     if (subLower[i] === fullLower[j]) { | ||||||
|  |       if (res.length % 2 !== 0) res.push(''); | ||||||
|  |       res[res.length - 1] += full[j]; | ||||||
|  |       j++; | ||||||
|  |       i++; | ||||||
|  |     } else { | ||||||
|  |       if (res.length % 2 === 0) res.push(''); | ||||||
|  |       res[res.length - 1] += full[j]; | ||||||
|  |       j++; | ||||||
|  |     } | ||||||
|  |   } | ||||||
|  |   if (i !== subLower.length) { | ||||||
|  |     // if the sub string doesn't match the full, only return the full as unmatched. | ||||||
|  |     return [full]; | ||||||
|  |   } | ||||||
|  |   if (j < full.length) { | ||||||
|  |     // append remaining chars from full to result as unmatched | ||||||
|  |     if (res.length % 2 === 0) res.push(''); | ||||||
|  |     res[res.length - 1] += full.substring(j); | ||||||
|  |   } | ||||||
|  |   return res; | ||||||
|  | } | ||||||
|  |  | ||||||
|  | export function calcMatchedWeight(matchResult) { | ||||||
|  |   let weight = 0; | ||||||
|  |   for (let i = 0; i < matchResult.length; i++) { | ||||||
|  |     if (i % 2 === 1) { // matches are on odd indices, see strSubMatch | ||||||
|  |       // use a function f(x+x) > f(x) + f(x) to make the longer matched string has higher weight. | ||||||
|  |       weight += matchResult[i].length * matchResult[i].length; | ||||||
|  |     } | ||||||
|  |   } | ||||||
|  |   return weight; | ||||||
|  | } | ||||||
|  |  | ||||||
|  | export function filterRepoFilesWeighted(files, filter) { | ||||||
|  |   let filterResult = []; | ||||||
|  |   if (filter) { | ||||||
|  |     const filterLower = filter.toLowerCase(); | ||||||
|  |     // TODO: for large repo, this loop could be slow, maybe there could be one more limit: | ||||||
|  |     // ... && filterResult.length < threshold * 20,  wait for more feedbacks | ||||||
|  |     for (let i = 0; i < files.length; i++) { | ||||||
|  |       const res = strSubMatch(files[i], filterLower); | ||||||
|  |       if (res.length > 1) { // length==1 means unmatched, >1 means having matched sub strings | ||||||
|  |         filterResult.push({matchResult: res, matchWeight: calcMatchedWeight(res)}); | ||||||
|  |       } | ||||||
|  |     } | ||||||
|  |     filterResult.sort((a, b) => b.matchWeight - a.matchWeight); | ||||||
|  |     filterResult = filterResult.slice(0, threshold); | ||||||
|  |   } else { | ||||||
|  |     for (let i = 0; i < files.length && i < threshold; i++) { | ||||||
|  |       filterResult.push({matchResult: [files[i]], matchWeight: 0}); | ||||||
|  |     } | ||||||
|  |   } | ||||||
|  |   return filterResult; | ||||||
|  | } | ||||||
|  |  | ||||||
| function filterRepoFiles(filter) { | function filterRepoFiles(filter) { | ||||||
|   const treeLink = $repoFindFileInput.attr('data-url-tree-link'); |   const treeLink = $repoFindFileInput.attr('data-url-tree-link'); | ||||||
|   $repoFindFileTableBody.empty(); |   $repoFindFileTableBody.empty(); | ||||||
|  |  | ||||||
|   const fileRes = []; |   const filterResult = filterRepoFilesWeighted(files, filter); | ||||||
|   if (filter) { |  | ||||||
|     for (let i = 0; i < files.length && fileRes.length < threshold; i++) { |  | ||||||
|       const subMatch = strSubMatch(files[i], filter); |  | ||||||
|       if (subMatch.length > 1) { |  | ||||||
|         fileRes.push(subMatch); |  | ||||||
|       } |  | ||||||
|     } |  | ||||||
|   } else { |  | ||||||
|     for (let i = 0; i < files.length && i < threshold; i++) { |  | ||||||
|       fileRes.push([files[i]]); |  | ||||||
|     } |  | ||||||
|   } |  | ||||||
|  |  | ||||||
|   const tmplRow = `<tr><td><a></a></td></tr>`; |   const tmplRow = `<tr><td><a></a></td></tr>`; | ||||||
|  |  | ||||||
|   $repoFindFileNoResult.toggle(fileRes.length === 0); |   $repoFindFileNoResult.toggle(filterResult.length === 0); | ||||||
|   for (const matchRes of fileRes) { |   for (const r of filterResult) { | ||||||
|     const $row = $(tmplRow); |     const $row = $(tmplRow); | ||||||
|     const $a = $row.find('a'); |     const $a = $row.find('a'); | ||||||
|     $a.attr('href', `${treeLink}/${matchRes.join('')}`); |     $a.attr('href', `${treeLink}/${r.matchResult.join('')}`); | ||||||
|     const $octiconFile = $(svg('octicon-file')).addClass('mr-3'); |     const $octiconFile = $(svg('octicon-file')).addClass('mr-3'); | ||||||
|     $a.append($octiconFile); |     $a.append($octiconFile); | ||||||
|     // if the target file path is "abc/xyz", to search "bx", then the matchRes is ['a', 'b', 'c/', 'x', 'yz'] |     // if the target file path is "abc/xyz", to search "bx", then the matchResult is ['a', 'b', 'c/', 'x', 'yz'] | ||||||
|     // the matchRes[odd] is matched and highlighted to red. |     // the matchResult[odd] is matched and highlighted to red. | ||||||
|     for (let j = 0; j < matchRes.length; j++) { |     for (let j = 0; j < r.matchResult.length; j++) { | ||||||
|       if (!matchRes[j]) continue; |       if (!r.matchResult[j]) continue; | ||||||
|       const $span = $('<span>').text(matchRes[j]); |       const $span = $('<span>').text(r.matchResult[j]); | ||||||
|       if (j % 2 === 1) $span.addClass('ui text red'); |       if (j % 2 === 1) $span.addClass('ui text red'); | ||||||
|       $a.append($span); |       $a.append($span); | ||||||
|     } |     } | ||||||
|   | |||||||
							
								
								
									
										34
									
								
								web_src/js/features/repo-findfile.test.js
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										34
									
								
								web_src/js/features/repo-findfile.test.js
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,34 @@ | |||||||
|  | import {strSubMatch, calcMatchedWeight, filterRepoFilesWeighted} from './repo-findfile.js'; | ||||||
|  |  | ||||||
|  | describe('Repo Find Files', () => { | ||||||
|  |   test('strSubMatch', () => { | ||||||
|  |     expect(strSubMatch('abc', '')).toEqual(['abc']); | ||||||
|  |     expect(strSubMatch('abc', 'a')).toEqual(['', 'a', 'bc']); | ||||||
|  |     expect(strSubMatch('abc', 'b')).toEqual(['a', 'b', 'c']); | ||||||
|  |     expect(strSubMatch('abc', 'c')).toEqual(['ab', 'c']); | ||||||
|  |     expect(strSubMatch('abc', 'ac')).toEqual(['', 'a', 'b', 'c']); | ||||||
|  |     expect(strSubMatch('abc', 'z')).toEqual(['abc']); | ||||||
|  |     expect(strSubMatch('abc', 'az')).toEqual(['abc']); | ||||||
|  |  | ||||||
|  |     expect(strSubMatch('ABc', 'ac')).toEqual(['', 'A', 'B', 'c']); | ||||||
|  |     expect(strSubMatch('abC', 'ac')).toEqual(['', 'a', 'b', 'C']); | ||||||
|  |  | ||||||
|  |     expect(strSubMatch('aabbcc', 'abc')).toEqual(['', 'a', 'a', 'b', 'b', 'c', 'c']); | ||||||
|  |     expect(strSubMatch('the/directory', 'hedir')).toEqual(['t', 'he', '/', 'dir', 'ectory']); | ||||||
|  |   }); | ||||||
|  |  | ||||||
|  |   test('calcMatchedWeight', () => { | ||||||
|  |     expect(calcMatchedWeight(['a', 'b', 'c', 'd']) < calcMatchedWeight(['a', 'bc', 'c'])).toBeTruthy(); | ||||||
|  |   }); | ||||||
|  |  | ||||||
|  |   test('filterRepoFilesWeighted', () => { | ||||||
|  |     // the first matched result should always be the "word.txt" | ||||||
|  |     let res = filterRepoFilesWeighted(['word.txt', 'we-got-result.dat'], 'word'); | ||||||
|  |     expect(res).toHaveLength(2); | ||||||
|  |     expect(res[0].matchResult).toEqual(['', 'word', '.txt']); | ||||||
|  |  | ||||||
|  |     res = filterRepoFilesWeighted(['we-got-result.dat', 'word.txt'], 'word'); | ||||||
|  |     expect(res).toHaveLength(2); | ||||||
|  |     expect(res[0].matchResult).toEqual(['', 'word', '.txt']); | ||||||
|  |   }); | ||||||
|  | }); | ||||||
							
								
								
									
										5
									
								
								web_src/js/testUtils/jestSetup.js
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										5
									
								
								web_src/js/testUtils/jestSetup.js
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,5 @@ | |||||||
|  | window.config = { | ||||||
|  |   csrfToken: 'jest-test-csrf-token-123456', | ||||||
|  |   pageData: {}, | ||||||
|  |   i18n: {}, | ||||||
|  | }; | ||||||
| @@ -59,36 +59,6 @@ export function parseIssueHref(href) { | |||||||
|   return {owner, repo, type, index}; |   return {owner, repo, type, index}; | ||||||
| } | } | ||||||
|  |  | ||||||
| // return the sub-match result as an array:  [unmatched, matched, unmatched, matched, ...] |  | ||||||
| // res[even] is unmatched, res[odd] is matched, see unit tests for examples |  | ||||||
| export function strSubMatch(full, sub) { |  | ||||||
|   const res = ['']; |  | ||||||
|   let i = 0, j = 0; |  | ||||||
|   const subLower = sub.toLowerCase(), fullLower = full.toLowerCase(); |  | ||||||
|   while (i < subLower.length && j < fullLower.length) { |  | ||||||
|     if (subLower[i] === fullLower[j]) { |  | ||||||
|       if (res.length % 2 !== 0) res.push(''); |  | ||||||
|       res[res.length - 1] += full[j]; |  | ||||||
|       j++; |  | ||||||
|       i++; |  | ||||||
|     } else { |  | ||||||
|       if (res.length % 2 === 0) res.push(''); |  | ||||||
|       res[res.length - 1] += full[j]; |  | ||||||
|       j++; |  | ||||||
|     } |  | ||||||
|   } |  | ||||||
|   if (i !== sub.length) { |  | ||||||
|     // if the sub string doesn't match the full, only return the full as unmatched. |  | ||||||
|     return [full]; |  | ||||||
|   } |  | ||||||
|   if (j < full.length) { |  | ||||||
|     // append remaining chars from full to result as unmatched |  | ||||||
|     if (res.length % 2 === 0) res.push(''); |  | ||||||
|     res[res.length - 1] += full.substring(j); |  | ||||||
|   } |  | ||||||
|   return res; |  | ||||||
| } |  | ||||||
|  |  | ||||||
| // pretty-print a number using locale-specific separators, e.g. 1200 -> 1,200 | // pretty-print a number using locale-specific separators, e.g. 1200 -> 1,200 | ||||||
| export function prettyNumber(num, locale = 'en-US') { | export function prettyNumber(num, locale = 'en-US') { | ||||||
|   if (typeof num !== 'number') return ''; |   if (typeof num !== 'number') return ''; | ||||||
|   | |||||||
| @@ -1,5 +1,5 @@ | |||||||
| import { | import { | ||||||
|   basename, extname, isObject, uniq, stripTags, joinPaths, parseIssueHref, strSubMatch, |   basename, extname, isObject, uniq, stripTags, joinPaths, parseIssueHref, | ||||||
|   prettyNumber, parseUrl, |   prettyNumber, parseUrl, | ||||||
| } from './utils.js'; | } from './utils.js'; | ||||||
|  |  | ||||||
| @@ -86,22 +86,6 @@ test('parseIssueHref', () => { | |||||||
|   expect(parseIssueHref('')).toEqual({owner: undefined, repo: undefined, type: undefined, index: undefined}); |   expect(parseIssueHref('')).toEqual({owner: undefined, repo: undefined, type: undefined, index: undefined}); | ||||||
| }); | }); | ||||||
|  |  | ||||||
| test('strSubMatch', () => { |  | ||||||
|   expect(strSubMatch('abc', '')).toEqual(['abc']); |  | ||||||
|   expect(strSubMatch('abc', 'a')).toEqual(['', 'a', 'bc']); |  | ||||||
|   expect(strSubMatch('abc', 'b')).toEqual(['a', 'b', 'c']); |  | ||||||
|   expect(strSubMatch('abc', 'c')).toEqual(['ab', 'c']); |  | ||||||
|   expect(strSubMatch('abc', 'ac')).toEqual(['', 'a', 'b', 'c']); |  | ||||||
|   expect(strSubMatch('abc', 'z')).toEqual(['abc']); |  | ||||||
|   expect(strSubMatch('abc', 'az')).toEqual(['abc']); |  | ||||||
|  |  | ||||||
|   expect(strSubMatch('abc', 'aC')).toEqual(['', 'a', 'b', 'c']); |  | ||||||
|   expect(strSubMatch('abC', 'ac')).toEqual(['', 'a', 'b', 'C']); |  | ||||||
|  |  | ||||||
|   expect(strSubMatch('aabbcc', 'abc')).toEqual(['', 'a', 'a', 'b', 'b', 'c', 'c']); |  | ||||||
|   expect(strSubMatch('the/directory', 'hedir')).toEqual(['t', 'he', '/', 'dir', 'ectory']); |  | ||||||
| }); |  | ||||||
|  |  | ||||||
| test('prettyNumber', () => { | test('prettyNumber', () => { | ||||||
|   expect(prettyNumber()).toEqual(''); |   expect(prettyNumber()).toEqual(''); | ||||||
|   expect(prettyNumber(null)).toEqual(''); |   expect(prettyNumber(null)).toEqual(''); | ||||||
|   | |||||||
		Reference in New Issue
	
	Block a user
	 wxiaoguang
					wxiaoguang