mirror of
				https://github.com/go-gitea/gitea.git
				synced 2025-10-26 12:27:06 +00:00 
			
		
		
		
	Improve SHA1 link detection (#6526)
This improves the SHA1 link detection to not pick up extraneous non-whitespace characters at the end of the URL. The '.' is a special case handled in code itself because of missing regexp lookahead support. Regex test cases: https://regex101.com/r/xUMlqh/3
This commit is contained in:
		| @@ -54,7 +54,7 @@ var ( | |||||||
| 	shortLinkPattern = regexp.MustCompile(`\[\[(.*?)\]\](\w*)`) | 	shortLinkPattern = regexp.MustCompile(`\[\[(.*?)\]\](\w*)`) | ||||||
|  |  | ||||||
| 	// anySHA1Pattern allows to split url containing SHA into parts | 	// anySHA1Pattern allows to split url containing SHA into parts | ||||||
| 	anySHA1Pattern = regexp.MustCompile(`https?://(?:\S+/){4}([0-9a-f]{40})/?([^#\s]+)?(?:#(\S+))?`) | 	anySHA1Pattern = regexp.MustCompile(`https?://(?:\S+/){4}([0-9a-f]{40})(/[^#\s]+)?(#\S+)?`) | ||||||
|  |  | ||||||
| 	validLinksPattern = regexp.MustCompile(`^[a-z][\w-]+://`) | 	validLinksPattern = regexp.MustCompile(`^[a-z][\w-]+://`) | ||||||
|  |  | ||||||
| @@ -594,31 +594,46 @@ func fullSha1PatternProcessor(ctx *postProcessCtx, node *html.Node) { | |||||||
| 	if m == nil { | 	if m == nil { | ||||||
| 		return | 		return | ||||||
| 	} | 	} | ||||||
| 	// take out what's relevant |  | ||||||
| 	urlFull := node.Data[m[0]:m[1]] | 	urlFull := node.Data[m[0]:m[1]] | ||||||
| 	hash := node.Data[m[2]:m[3]] | 	text := base.ShortSha(node.Data[m[2]:m[3]]) | ||||||
|  |  | ||||||
| 	var subtree, line string | 	// 3rd capture group matches a optional path | ||||||
|  | 	subpath := "" | ||||||
| 	// optional, we do them depending on the length. |  | ||||||
| 	if m[7] > 0 { |  | ||||||
| 		line = node.Data[m[6]:m[7]] |  | ||||||
| 	} |  | ||||||
| 	if m[5] > 0 { | 	if m[5] > 0 { | ||||||
| 		subtree = node.Data[m[4]:m[5]] | 		subpath = node.Data[m[4]:m[5]] | ||||||
| 	} | 	} | ||||||
|  |  | ||||||
| 	text := base.ShortSha(hash) | 	// 4th capture group matches a optional url hash | ||||||
| 	if subtree != "" { | 	hash := "" | ||||||
| 		text += "/" + subtree | 	if m[7] > 0 { | ||||||
| 	} | 		hash = node.Data[m[6]:m[7]][1:] | ||||||
| 	if line != "" { |  | ||||||
| 		text += " (" |  | ||||||
| 		text += line |  | ||||||
| 		text += ")" |  | ||||||
| 	} | 	} | ||||||
|  |  | ||||||
| 	replaceContent(node, m[0], m[1], createLink(urlFull, text)) | 	start := m[0] | ||||||
|  | 	end := m[1] | ||||||
|  |  | ||||||
|  | 	// If url ends in '.', it's very likely that it is not part of the | ||||||
|  | 	// actual url but used to finish a sentence. | ||||||
|  | 	if strings.HasSuffix(urlFull, ".") { | ||||||
|  | 		end-- | ||||||
|  | 		urlFull = urlFull[:len(urlFull)-1] | ||||||
|  | 		if hash != "" { | ||||||
|  | 			hash = hash[:len(hash)-1] | ||||||
|  | 		} else if subpath != "" { | ||||||
|  | 			subpath = subpath[:len(subpath)-1] | ||||||
|  | 		} | ||||||
|  | 	} | ||||||
|  |  | ||||||
|  | 	if subpath != "" { | ||||||
|  | 		text += subpath | ||||||
|  | 	} | ||||||
|  |  | ||||||
|  | 	if hash != "" { | ||||||
|  | 		text += " (" + hash + ")" | ||||||
|  | 	} | ||||||
|  |  | ||||||
|  | 	replaceContent(node, start, end, createLink(urlFull, text)) | ||||||
| } | } | ||||||
|  |  | ||||||
| // sha1CurrentPatternProcessor renders SHA1 strings to corresponding links that | // sha1CurrentPatternProcessor renders SHA1 strings to corresponding links that | ||||||
|   | |||||||
| @@ -273,12 +273,12 @@ func TestRegExp_anySHA1Pattern(t *testing.T) { | |||||||
| 	testCases := map[string][]string{ | 	testCases := map[string][]string{ | ||||||
| 		"https://github.com/jquery/jquery/blob/a644101ed04d0beacea864ce805e0c4f86ba1cd1/test/unit/event.js#L2703": { | 		"https://github.com/jquery/jquery/blob/a644101ed04d0beacea864ce805e0c4f86ba1cd1/test/unit/event.js#L2703": { | ||||||
| 			"a644101ed04d0beacea864ce805e0c4f86ba1cd1", | 			"a644101ed04d0beacea864ce805e0c4f86ba1cd1", | ||||||
| 			"test/unit/event.js", | 			"/test/unit/event.js", | ||||||
| 			"L2703", | 			"#L2703", | ||||||
| 		}, | 		}, | ||||||
| 		"https://github.com/jquery/jquery/blob/a644101ed04d0beacea864ce805e0c4f86ba1cd1/test/unit/event.js": { | 		"https://github.com/jquery/jquery/blob/a644101ed04d0beacea864ce805e0c4f86ba1cd1/test/unit/event.js": { | ||||||
| 			"a644101ed04d0beacea864ce805e0c4f86ba1cd1", | 			"a644101ed04d0beacea864ce805e0c4f86ba1cd1", | ||||||
| 			"test/unit/event.js", | 			"/test/unit/event.js", | ||||||
| 			"", | 			"", | ||||||
| 		}, | 		}, | ||||||
| 		"https://github.com/jquery/jquery/commit/0705be475092aede1eddae01319ec931fb9c65fc": { | 		"https://github.com/jquery/jquery/commit/0705be475092aede1eddae01319ec931fb9c65fc": { | ||||||
| @@ -288,13 +288,13 @@ func TestRegExp_anySHA1Pattern(t *testing.T) { | |||||||
| 		}, | 		}, | ||||||
| 		"https://github.com/jquery/jquery/tree/0705be475092aede1eddae01319ec931fb9c65fc/src": { | 		"https://github.com/jquery/jquery/tree/0705be475092aede1eddae01319ec931fb9c65fc/src": { | ||||||
| 			"0705be475092aede1eddae01319ec931fb9c65fc", | 			"0705be475092aede1eddae01319ec931fb9c65fc", | ||||||
| 			"src", | 			"/src", | ||||||
| 			"", | 			"", | ||||||
| 		}, | 		}, | ||||||
| 		"https://try.gogs.io/gogs/gogs/commit/d8a994ef243349f321568f9e36d5c3f444b99cae#diff-2": { | 		"https://try.gogs.io/gogs/gogs/commit/d8a994ef243349f321568f9e36d5c3f444b99cae#diff-2": { | ||||||
| 			"d8a994ef243349f321568f9e36d5c3f444b99cae", | 			"d8a994ef243349f321568f9e36d5c3f444b99cae", | ||||||
| 			"", | 			"", | ||||||
| 			"diff-2", | 			"#diff-2", | ||||||
| 		}, | 		}, | ||||||
| 	} | 	} | ||||||
|  |  | ||||||
|   | |||||||
		Reference in New Issue
	
	Block a user
	 silverwind
					silverwind