Improve diff highlighting (#36583)

This commit is contained in:
wxiaoguang
2026-02-11 11:52:17 +08:00
committed by GitHub
parent fd89ceef79
commit 3754e9dd12
2 changed files with 132 additions and 48 deletions

View File

@@ -7,6 +7,9 @@ import (
"bytes"
"html/template"
"strings"
"unicode/utf8"
"code.gitea.io/gitea/modules/util"
"github.com/sergi/go-diff/diffmatchpatch"
)
@@ -84,6 +87,10 @@ type highlightCodeDiff struct {
tokenPlaceholderMap map[string]rune
placeholderOverflowCount int
diffCodeAddedOpen rune
diffCodeRemovedOpen rune
diffCodeClose rune
}
func newHighlightCodeDiff() *highlightCodeDiff {
@@ -155,11 +162,12 @@ func (hcd *highlightCodeDiff) diffLineWithHighlight(lineType DiffLineType, codeA
buf := bytes.NewBuffer(nil)
addedCodePrefix := hcd.registerTokenAsPlaceholder(`<span class="added-code">`)
addedCodeSuffix := hcd.registerTokenAsPlaceholder(`</span><!-- added-code -->`)
removedCodePrefix := hcd.registerTokenAsPlaceholder(`<span class="removed-code">`)
removedCodeSuffix := hcd.registerTokenAsPlaceholder(`</span><!-- removed-code -->`)
if hcd.diffCodeClose == 0 {
// tests can pre-set the placeholders
hcd.diffCodeAddedOpen = hcd.registerTokenAsPlaceholder(`<span class="added-code">`)
hcd.diffCodeRemovedOpen = hcd.registerTokenAsPlaceholder(`<span class="removed-code">`)
hcd.diffCodeClose = hcd.registerTokenAsPlaceholder(`</span><!-- diff-code-close -->`)
}
equalPartSpaceOnly := true
for _, diff := range diffs {
@@ -173,21 +181,21 @@ func (hcd *highlightCodeDiff) diffLineWithHighlight(lineType DiffLineType, codeA
// only add "added"/"removed" tags when needed:
// * non-space contents appear in the DiffEqual parts (not a full-line add/del)
// * placeholder map still works (not exhausted, can get removedCodeSuffix)
addDiffTags := !equalPartSpaceOnly && removedCodeSuffix != 0
// * placeholder map still works (not exhausted, can get the closing tag placeholder)
addDiffTags := !equalPartSpaceOnly && hcd.diffCodeClose != 0
if addDiffTags {
for _, diff := range diffs {
switch {
case diff.Type == diffmatchpatch.DiffEqual:
buf.WriteString(diff.Text)
case diff.Type == diffmatchpatch.DiffInsert && lineType == DiffLineAdd:
buf.WriteRune(addedCodePrefix)
buf.WriteRune(hcd.diffCodeAddedOpen)
buf.WriteString(diff.Text)
buf.WriteRune(addedCodeSuffix)
buf.WriteRune(hcd.diffCodeClose)
case diff.Type == diffmatchpatch.DiffDelete && lineType == DiffLineDel:
buf.WriteRune(removedCodePrefix)
buf.WriteRune(hcd.diffCodeRemovedOpen)
buf.WriteString(diff.Text)
buf.WriteRune(removedCodeSuffix)
buf.WriteRune(hcd.diffCodeClose)
}
}
} else {
@@ -203,12 +211,17 @@ func (hcd *highlightCodeDiff) diffLineWithHighlight(lineType DiffLineType, codeA
}
func (hcd *highlightCodeDiff) registerTokenAsPlaceholder(token string) rune {
recovered := token
if token[0] == '<' && token[1] != '<' {
// when recovering a single tag, only use the tag itself, ignore the trailing comment (for how the comment is generated, see the code in `convert` function)
recovered = token[:strings.IndexByte(token, '>')+1]
}
placeholder, ok := hcd.tokenPlaceholderMap[token]
if !ok {
placeholder = hcd.nextPlaceholder()
if placeholder != 0 {
hcd.tokenPlaceholderMap[token] = placeholder
hcd.placeholderTokenMap[placeholder] = token
hcd.placeholderTokenMap[placeholder] = recovered
}
}
return placeholder
@@ -285,49 +298,77 @@ func (hcd *highlightCodeDiff) convertToPlaceholders(htmlContent template.HTML) s
return res.String()
}
func (hcd *highlightCodeDiff) extractNextPlaceholder(buf []byte, lastIdx int) (idx int, placeholder rune, runeLen int, token string) {
for idx = lastIdx; idx < len(buf); {
placeholder, runeLen = utf8.DecodeRune(buf[idx:])
if token = hcd.placeholderTokenMap[placeholder]; token != "" {
break
}
idx += runeLen
}
return idx, placeholder, runeLen, token
}
func (hcd *highlightCodeDiff) recoverOneDiff(str string) template.HTML {
sb := strings.Builder{}
var tagStack []string
var diffCodeOpenTag string
diffCodeCloseTag := hcd.placeholderTokenMap[hcd.diffCodeClose]
strBytes := util.UnsafeStringToBytes(str)
for idx := 0; idx < len(strBytes); {
newIdx, placeholder, lastRuneLen, token := hcd.extractNextPlaceholder(strBytes, idx)
if newIdx != idx {
if diffCodeOpenTag != "" {
sb.WriteString(diffCodeOpenTag)
sb.Write(strBytes[idx:newIdx])
sb.WriteString(diffCodeCloseTag)
} else {
sb.Write(strBytes[idx:newIdx])
}
} // else: no text content before, the current token is a placeholder
if token == "" {
break // reaches the string end, no more placeholder
}
idx = newIdx + lastRuneLen
for _, r := range str {
token, ok := hcd.placeholderTokenMap[r]
if !ok || token == "" {
sb.WriteRune(r) // if the rune is not a placeholder, write it as it is
// for HTML entity
if token[0] == '&' {
sb.WriteString(token)
continue
}
var tokenToRecover string
if strings.HasPrefix(token, "</") { // for closing tag
// only get the tag itself, ignore the trailing comment (for how the comment is generated, see the code in `convert` function)
tokenToRecover = token[:strings.IndexByte(token, '>')+1]
if len(tagStack) == 0 {
continue // if no opening tag in stack yet, skip the closing tag
}
tagStack = tagStack[:len(tagStack)-1]
} else if token[0] == '<' { // for HTML tag
if token[1] == '<' {
// full tag `<<span>content</span>>`, recover to `<span>content</span>`
tokenToRecover = token[1 : len(token)-1]
// for various HTML tags
var recovered string
if token[1] == '<' { // full tag `<<span>content</span>>`, recover to `<span>content</span>`
recovered = token[1 : len(token)-1]
if diffCodeOpenTag != "" {
recovered = diffCodeOpenTag + recovered + diffCodeCloseTag
} // else: just use the recovered content
} else if token[1] != '/' { // opening tag
if placeholder == hcd.diffCodeAddedOpen || placeholder == hcd.diffCodeRemovedOpen {
diffCodeOpenTag = token
} else {
// opening tag
tokenToRecover = token
tagStack = append(tagStack, token)
recovered = token
tagStack = append(tagStack, recovered)
}
} else if token[0] == '&' { // for HTML entity
tokenToRecover = token
} // else: impossible
sb.WriteString(tokenToRecover)
} else { // closing tag
if placeholder == hcd.diffCodeClose {
diffCodeOpenTag = "" // the highlighted diff is closed, no more diff
} else if len(tagStack) != 0 {
recovered = token
tagStack = tagStack[:len(tagStack)-1]
} // else: if no opening tag in stack yet, skip the closing tag
}
sb.WriteString(recovered)
}
if len(tagStack) > 0 {
// close all opening tags
for i := len(tagStack) - 1; i >= 0; i-- {
tagToClose := tagStack[i]
// get the closing tag "</span>" from "<span class=...>" or "<span>"
pos := strings.IndexAny(tagToClose, " >")
if pos != -1 {
sb.WriteString("</" + tagToClose[1:pos] + ">")
} // else: impossible. every tag was pushed into the stack by the code above and is valid HTML opening tag
}
// close all opening tags
for i := len(tagStack) - 1; i >= 0; i-- {
tagToClose := tagStack[i]
// get the closing tag "</span>" from "<span class=...>" or "<span>"
pos := strings.IndexAny(tagToClose, " >")
// pos must be positive, because the tags were pushed by us
sb.WriteString("</" + tagToClose[1:pos] + ">")
}
return template.HTML(sb.String())
}

View File

@@ -64,7 +64,7 @@ func TestDiffWithHighlight(t *testing.T) {
outDel = hcd.diffLineWithHighlight(DiffLineDel, codeA, codeB)
assert.Equal(t, `<span>line1</span>`+"\n"+`<span class="removed-code"><span>line2</span></span>`, string(outDel))
outAdd = hcd.diffLineWithHighlight(DiffLineAdd, codeA, codeB)
assert.Equal(t, `<span>line1</span>`+"\n"+`<span class="added-code"><span>line!</span></span>`, string(outAdd))
assert.Equal(t, `<span>line1</span>`+"\n"+`<span><span class="added-code">line!</span></span>`, string(outAdd))
})
t.Run("OpenCloseTags", func(t *testing.T) {
@@ -75,12 +75,55 @@ func TestDiffWithHighlight(t *testing.T) {
assert.Empty(t, string(hcd.recoverOneDiff("C")))
})
t.Run("ComplexDiff", func(t *testing.T) {
t.Run("ComplexDiff1", func(t *testing.T) {
oldCode, _ := highlight.RenderCodeFast("a.go", "Go", `xxx || yyy`)
newCode, _ := highlight.RenderCodeFast("a.go", "Go", `bot.xxx || bot.yyy`)
hcd := newHighlightCodeDiff()
out := hcd.diffLineWithHighlight(DiffLineAdd, oldCode, newCode)
assert.Equal(t, `<span class="added-code"><span class="nx">bot</span><span class="p">.</span></span><span class="nx">xxx</span><span class="w"> </span><span class="o">||</span><span class="w"> </span><span class="added-code"><span class="nx">bot</span><span class="p">.</span></span><span class="nx">yyy</span>`, string(out))
assert.Equal(t, strings.ReplaceAll(`
<span class="added-code"><span class="nx">bot</span></span>
<span class="added-code"><span class="p">.</span></span>
<span class="nx">xxx</span><span class="w"> </span><span class="o">||</span><span class="w"> </span>
<span class="added-code"><span class="nx">bot</span></span>
<span class="added-code"><span class="p">.</span></span>
<span class="nx">yyy</span>`, "\n", ""), string(out))
})
forceTokenAsPlaceholder := func(hcd *highlightCodeDiff, r rune, token string) rune {
// for testing purpose only
hcd.tokenPlaceholderMap[token] = r
hcd.placeholderTokenMap[r] = token
return r
}
t.Run("ComplexDiff2", func(t *testing.T) {
// When running "diffLineWithHighlight", the newly inserted "added-code", and "removed-code" tags may break the original layout.
// The newly inserted tags can appear in any position, because the "diff" algorithm can make outputs like:
// * Equal: <span>
// * Insert: xx</span><span>yy
// * Equal: zz</span>
// Then the newly inserted tags will make this output, the tags mismatch.
// * <span> <added>xx</span><span>yy</added> zz</span>
// So we need to fix it to:
// * <span> <added>xx</added></span> <span><added>yy</added> zz</span>
hcd := newHighlightCodeDiff()
hcd.diffCodeAddedOpen = forceTokenAsPlaceholder(hcd, '[', "<add>")
hcd.diffCodeClose = forceTokenAsPlaceholder(hcd, ']', "</add>")
forceTokenAsPlaceholder(hcd, '{', "<T>")
forceTokenAsPlaceholder(hcd, '}', "</T>")
assert.Equal(t, `aa<T>xx<add>yy</add>zz</T>bb`, string(hcd.recoverOneDiff("aa{xx[yy]zz}bb")))
assert.Equal(t, `aa<add>xx</add><T><add>yy</add></T><add>zz</add>bb`, string(hcd.recoverOneDiff("aa[xx{yy}zz]bb")))
assert.Equal(t, `aa<T>xx<add>yy</add></T><add>zz</add>bb`, string(hcd.recoverOneDiff("aa{xx[yy}zz]bb")))
assert.Equal(t, `aa<add>xx</add><T><add>yy</add>zz</T>bb`, string(hcd.recoverOneDiff("aa[xx{yy]zz}bb")))
assert.Equal(t, `aa<add>xx</add><T><add>yy</add><add>zz</add></T><add>bb</add>cc`, string(hcd.recoverOneDiff("aa[xx{yy][zz}bb]cc")))
// And do a simple test for "diffCodeRemovedOpen", it shares the same logic as "diffCodeAddedOpen"
hcd = newHighlightCodeDiff()
hcd.diffCodeRemovedOpen = forceTokenAsPlaceholder(hcd, '[', "<del>")
hcd.diffCodeClose = forceTokenAsPlaceholder(hcd, ']', "</del>")
forceTokenAsPlaceholder(hcd, '{', "<T>")
forceTokenAsPlaceholder(hcd, '}', "</T>")
assert.Equal(t, `aa<del>xx</del><T><del>yy</del><del>zz</del></T><del>bb</del>cc`, string(hcd.recoverOneDiff("aa[xx{yy][zz}bb]cc")))
})
}