feat(diff): merge adjacent blocks using inline:word (#37085)

vim-patch:9.2.0174: diff: inline word-diffs can be fragmented

Problem:  When using 'diffopt=inline:word', lines were excessively
          fragmented with punctuation creating separate highlight
          blocks, making it harder to read the diffs.
Solution: Added 'diff_refine_inline_word_highlight()' to merge
          adjacent diff blocks that are separated by small gaps of
          non-word characters (up to 5 bytes by default) (HarshK97).

When using inline:word diff mode, adjacent changed words separated by
punctuation or whitespace are now merged into a single highlight block
if the gap between them contains fewer than 5 non-word characters.

This creates more readable diffs and closely matches GitHub's own diff
display.

closes: vim/vim#19098

42c6686c78
This commit is contained in:
Harsh Kapse
2026-03-17 17:41:55 +05:30
committed by GitHub
parent 9ab6c607cc
commit a5b8cf145d
6 changed files with 134 additions and 13 deletions

View File

@@ -366,6 +366,8 @@ OPTIONS
• 'completeopt' flag "nearest" sorts completion results by distance to cursor.
• 'diffanchors' specifies addresses to anchor a diff.
• 'diffopt' `inline:` configures diff highlighting for changes within a line.
• 'diffopt' with `inline:word` now automatically merges adjacent diff blocks
separated by gaps and punctuation by 5 bytes to improve readability.
• 'fillchars' has new flag "foldinner".
• 'fsync' and 'grepformat' are now |global-local| options.
• 'listchars' has new flag "leadtab".

View File

@@ -2256,7 +2256,10 @@ A jump table for the options with a short description can be found at |Q_op|.
difference. Non-alphanumeric
multi-byte characters such as emoji
and CJK characters are considered
individual words.
individual words. Small gaps of
non-word characters (5 bytes or less)
between changes are merged into a
single highlight block.
internal Use the internal diff library. This is
ignored when 'diffexpr' is set. *E960*

View File

@@ -1873,7 +1873,10 @@ vim.go.dex = vim.go.diffexpr
--- difference. Non-alphanumeric
--- multi-byte characters such as emoji
--- and CJK characters are considered
--- individual words.
--- individual words. Small gaps of
--- non-word characters (5 bytes or less)
--- between changes are merged into a
--- single highlight block.
---
--- internal Use the internal diff library. This is
--- ignored when 'diffexpr' is set. *E960*

View File

@@ -99,6 +99,7 @@ static int diff_flags = DIFF_INTERNAL | DIFF_FILLER | DIFF_CLOSE_OFF
| DIFF_LINEMATCH | DIFF_INLINE_CHAR;
static int diff_algorithm = XDF_INDENT_HEURISTIC;
static int diff_word_gap = 5; // gap threshold for inline:word
static int linematch_lines = 40;
#define LBUFLEN 50 // length of line in diff file
@@ -3093,6 +3094,113 @@ static void diff_refine_inline_char_highlight(diff_T *dp_orig, garray_T *linemap
} while (pass++ < 4); // use limited number of passes to avoid excessive looping
}
/// Refine inline word diff blocks by merging blocks that are only separated
/// by whitespace or punctuation. This creates more coherent highlighting.
static void diff_refine_inline_word_highlight(diff_T *dp_orig, garray_T *linemap, int idx1,
linenr_T start_lnum)
{
int pass = 1;
do {
diff_T *dp = dp_orig;
while (dp != NULL && dp->df_next != NULL) {
// Only merge blocks on the same line
if (dp->df_lnum[idx1] + dp->df_count[idx1] - 1 >= linemap[idx1].ga_len
|| dp->df_next->df_lnum[idx1] - 1 >= linemap[idx1].ga_len) {
dp = dp->df_next;
continue;
}
linemap_entry_T *entry1 =
&((linemap_entry_T *)linemap[idx1].ga_data)[dp->df_lnum[idx1]
+ dp->df_count[idx1] - 2];
linemap_entry_T *entry2 =
&((linemap_entry_T *)linemap[idx1].ga_data)[dp->df_next->df_lnum[idx1] - 1];
// Skip if blocks are on different lines
if (entry1->lineoff != entry2->lineoff) {
dp = dp->df_next;
continue;
}
// Calculate the gap between blocks
int gap_start = entry1->byte_start + entry1->num_bytes;
int gap_end = entry2->byte_start;
int gap_size = gap_end - gap_start;
// Merge adjacent diff blocks separated by small gaps to reduce visual
// fragmentation. Gap threshold is set to 5 bytes which handles most
// common separators (spaces, punctuation, short variable names) while
// still preserving visually distinct changes.
if (gap_size <= 0 || gap_size > diff_word_gap) {
dp = dp->df_next;
continue;
}
// Get the text between the two blocks
char *line = ml_get_buf(curtab->tp_diffbuf[idx1],
start_lnum + entry1->lineoff);
char *gap_text = line + gap_start;
// Check if gap contains only whitespace and/or punctuation
bool only_non_word = true;
bool has_content = false;
for (int i = 0; i < gap_size && gap_text[i] != NUL; i++) {
has_content = true;
int char_class = mb_get_class_tab(gap_text + i,
curtab->tp_diffbuf[idx1]->b_chartab);
// class 2 is word characters, if we find any, don't merge
if (char_class == 2) {
only_non_word = false;
break;
}
}
// Merge if the gap is small and contains only non-word characters
if (has_content && only_non_word) {
long total_change_bytes = 0;
for (int i = 0; i < DB_COUNT; i++) {
if (curtab->tp_diffbuf[i] != NULL) {
// count bytes in the first block
for (int k = 0; k < dp->df_count[i]; k++) {
int idx = dp->df_lnum[i] + k - 1;
if (idx < linemap[i].ga_len) {
total_change_bytes += ((linemap_entry_T *)linemap[i].ga_data)[idx].num_bytes;
}
}
// count bytes in the next block
for (int k = 0; k < dp->df_next->df_count[i]; k++) {
int idx = dp->df_next->df_lnum[i] + k - 1;
if (idx < linemap[i].ga_len) {
total_change_bytes += ((linemap_entry_T *)linemap[i].ga_data)[idx].num_bytes;
}
}
}
}
if (total_change_bytes >= gap_size * 2) {
// Merge the blocks by extending the first block to include the next
for (int i = 0; i < DB_COUNT; i++) {
if (curtab->tp_diffbuf[i] != NULL) {
dp->df_count[i] = dp->df_next->df_lnum[i] + dp->df_next->df_count[i]
- dp->df_lnum[i];
}
}
diff_T *dp_next = dp->df_next;
dp->df_next = dp_next->df_next;
clear_diffblock(dp_next);
// Don't advance dp, check if can merge with the next block too
continue;
}
}
dp = dp->df_next;
}
} while (pass++ < 4); // use limited number of passes to avoid excessive looping
}
/// Find the inline difference within a diff block among different buffers. Do
/// this by splitting each block's content into characters or words, and then
/// use internal xdiff to calculate the per-character/word diff. The result is
@@ -3319,7 +3427,9 @@ static void diff_find_change_inline_diff(diff_T *dp)
}
diff_T *new_diff = curtab->tp_first_diff;
if (diff_flags & DIFF_INLINE_CHAR && file1_idx != -1) {
if (diff_flags & DIFF_INLINE_WORD && file1_idx != -1) {
diff_refine_inline_word_highlight(new_diff, linemap, file1_idx, dp->df_lnum[file1_idx]);
} else if (diff_flags & DIFF_INLINE_CHAR && file1_idx != -1) {
diff_refine_inline_char_highlight(new_diff, linemap, file1_idx);
}

View File

@@ -2459,7 +2459,10 @@ local options = {
difference. Non-alphanumeric
multi-byte characters such as emoji
and CJK characters are considered
individual words.
individual words. Small gaps of
non-word characters (5 bytes or less)
between changes are merged into a
single highlight block.
internal Use the internal diff library. This is
ignored when 'diffexpr' is set. *E960*

View File

@@ -2316,7 +2316,7 @@ it('diff mode inline highlighting', function()
command('set diffopt=internal,filler diffopt+=inline:word')
screen:expect([[
{7: }{27:^abcdef}{4: }{27:ghi}{4: }{27:jk}{4: n }│{7: }{27:aBcef}{4: }{27:gHi}{4: }{27:lm}{4: n }|
{7: }{27:^abcdef ghi jk}{4: n }│{7: }{27:aBcef gHi lm}{4: n }|
{7: }{22:x }│{7: }{23:----------------}|
{7: }y │{7: }y |
{7: }{23:----------------}│{7: }{22:z }|
@@ -2497,7 +2497,7 @@ it('diff mode inline highlighting', function()
)
command('set diffopt=internal,filler diffopt+=inline:word')
screen:expect([[
{7: }{4:^🚀}{27:⛵️}{4:一二}{27:三}{4:ひら}{100:が}{4:な}{27:Δέλτα}{4: }{27:Δelta}{4: fooba}│{7: }{4:🚀}{27:🛸}{4:一二}{27:四}{4:ひら}{27:δέλτα}{4: }{27:δelta}{4: foobar }|
{7: }{4:^🚀}{27:⛵️}{4:一二}{27:三}{4:ひら}{27:がなΔέλτα Δelta}{4: fooba}│{7: }{4:🚀}{27:🛸}{4:一二}{27:四}{4:ひら}{27:δέλτα δelta}{4: foobar }|
{1:~ }│{1:~ }|*17
{3:Xdifile1 }{2:Xdifile2 }|
|
@@ -2573,9 +2573,9 @@ it('diff mode inline highlighting', function()
]])
command('set diffopt=internal,filler diffopt+=inline:word,iwhite')
screen:expect([[
{7: }{4:^this is }│{7: }{4:this is }{27:some}{4: }{27:test}{4: }|
{7: }{27:sometest}{4: }{27:text}{4: }{27:foo}{4: }│{7: }{27:texts}{4: }|
{7: }{27:baz}{4: }{27:abc}{4: }{27:def}{4: }│{7: }{27:foo}{4: }{27:bar}{4: }{27:abX}{4: }{27:Yef}{4: }|
{7: }{4:^this is }│{7: }{4:this is }{27:some test}{4: }|
{7: }{27:sometest text foo}{4: }│{7: }{27:texts}{4: }|
{7: }{27:baz abc def}{4: }│{7: }{27:foo bar abX}{4: }{27:Yef}{4: }|
{7: }{27:one}{4: }│{7: }{27:oneword}{4: another word }|
{7: }{27:word}{4: another word }│{7: }{23:----------------------}|
{7: }{22:additional line }│{7: }{23:----------------------}|
@@ -2597,9 +2597,9 @@ it('diff mode inline highlighting', function()
]])
command('set diffopt=internal,filler diffopt+=inline:word,iwhiteeol')
screen:expect([[
{7: }{4:^this }{100: }{4:is }│{7: }{4:this is }{27:some}{4: }{27:test}{4: }|
{7: }{27:sometest}{4: }{27:text}{4: foo }│{7: }{27:texts}{4: }|
{7: }{27:baz}{4: }{27:abc}{4: }{27:def}{4: }│{7: }{4:foo }{27:bar}{4: }{27:abX}{4: }{27:Yef}{4: }|
{7: }{4:^this }{100: }{4:is }│{7: }{4:this is }{27:some test}{4: }|
{7: }{27:sometest text}{4: foo }│{7: }{27:texts}{4: }|
{7: }{27:baz abc def}{4: }│{7: }{4:foo }{27:bar abX Yef}{4: }|
{7: }{27:one}{4: }│{7: }{27:oneword}{4: another word }|
{7: }{27:word}{4: another word }│{7: }{23:----------------------}|
{7: }{22:additional line }│{7: }{23:----------------------}|
@@ -2765,7 +2765,7 @@ it('diff mode inline highlighting with 3 buffers', function()
command('set iskeyword+=+ | 2wincmd w | set iskeyword+=- | 1wincmd w')
command('set diffopt=internal,filler diffopt+=inline:word')
local s4 = [[
{7: }{27:^This+is}{4:=}{27:a}{4:-setence }│{7: }{27:This+is}{4:=}{27:another}{4:-setenc}│{7: }{27:That+is}{4:=}{27:a}{4:-setence }|
{7: }{27:^This+is=a}{4:-setence }│{7: }{27:This+is=another}{4:-setenc}│{7: }{27:That+is=a}{4:-setence }|
{1:~ }│{1:~ }│{1:~ }|*17
{3:Xdifile1 }{2:Xdifile2 Xdifile3 }|
|