From a5b8cf145d1f46428f2eaa5fec89d41f5c9f87f7 Mon Sep 17 00:00:00 2001 From: Harsh Kapse Date: Tue, 17 Mar 2026 17:41:55 +0530 Subject: [PATCH] feat(diff): merge adjacent blocks using inline:word (#37085) vim-patch:9.2.0174: diff: inline word-diffs can be fragmented Problem: When using 'diffopt=inline:word', lines were excessively fragmented with punctuation creating separate highlight blocks, making it harder to read the diffs. Solution: Added 'diff_refine_inline_word_highlight()' to merge adjacent diff blocks that are separated by small gaps of non-word characters (up to 5 bytes by default) (HarshK97). When using inline:word diff mode, adjacent changed words separated by punctuation or whitespace are now merged into a single highlight block if the gap between them contains fewer than 5 non-word characters. This creates more readable diffs and closely matches GitHub's own diff display. closes: vim/vim#19098 https://github.com/vim/vim/commit/42c6686c78d39843f71dba989a8ea59bc6975132 --- runtime/doc/news.txt | 2 + runtime/doc/options.txt | 5 +- runtime/lua/vim/_meta/options.lua | 5 +- src/nvim/diff.c | 112 +++++++++++++++++++++++++++++- src/nvim/options.lua | 5 +- test/functional/ui/diff_spec.lua | 18 ++--- 6 files changed, 134 insertions(+), 13 deletions(-) diff --git a/runtime/doc/news.txt b/runtime/doc/news.txt index 215f71722a..9a04cb0f9a 100644 --- a/runtime/doc/news.txt +++ b/runtime/doc/news.txt @@ -366,6 +366,8 @@ OPTIONS • 'completeopt' flag "nearest" sorts completion results by distance to cursor. • 'diffanchors' specifies addresses to anchor a diff. • 'diffopt' `inline:` configures diff highlighting for changes within a line. +• 'diffopt' with `inline:word` now automatically merges adjacent diff blocks + separated by gaps and punctuation by 5 bytes to improve readability. • 'fillchars' has new flag "foldinner". • 'fsync' and 'grepformat' are now |global-local| options. • 'listchars' has new flag "leadtab". diff --git a/runtime/doc/options.txt b/runtime/doc/options.txt index 601ce8f63a..266e95d55a 100644 --- a/runtime/doc/options.txt +++ b/runtime/doc/options.txt @@ -2256,7 +2256,10 @@ A jump table for the options with a short description can be found at |Q_op|. difference. Non-alphanumeric multi-byte characters such as emoji and CJK characters are considered - individual words. + individual words. Small gaps of + non-word characters (5 bytes or less) + between changes are merged into a + single highlight block. internal Use the internal diff library. This is ignored when 'diffexpr' is set. *E960* diff --git a/runtime/lua/vim/_meta/options.lua b/runtime/lua/vim/_meta/options.lua index 9b29d3380d..5e0c52bf7e 100644 --- a/runtime/lua/vim/_meta/options.lua +++ b/runtime/lua/vim/_meta/options.lua @@ -1873,7 +1873,10 @@ vim.go.dex = vim.go.diffexpr --- difference. Non-alphanumeric --- multi-byte characters such as emoji --- and CJK characters are considered ---- individual words. +--- individual words. Small gaps of +--- non-word characters (5 bytes or less) +--- between changes are merged into a +--- single highlight block. --- --- internal Use the internal diff library. This is --- ignored when 'diffexpr' is set. *E960* diff --git a/src/nvim/diff.c b/src/nvim/diff.c index 8077e8fcdc..cc2715543c 100644 --- a/src/nvim/diff.c +++ b/src/nvim/diff.c @@ -99,6 +99,7 @@ static int diff_flags = DIFF_INTERNAL | DIFF_FILLER | DIFF_CLOSE_OFF | DIFF_LINEMATCH | DIFF_INLINE_CHAR; static int diff_algorithm = XDF_INDENT_HEURISTIC; +static int diff_word_gap = 5; // gap threshold for inline:word static int linematch_lines = 40; #define LBUFLEN 50 // length of line in diff file @@ -3093,6 +3094,113 @@ static void diff_refine_inline_char_highlight(diff_T *dp_orig, garray_T *linemap } while (pass++ < 4); // use limited number of passes to avoid excessive looping } +/// Refine inline word diff blocks by merging blocks that are only separated +/// by whitespace or punctuation. This creates more coherent highlighting. +static void diff_refine_inline_word_highlight(diff_T *dp_orig, garray_T *linemap, int idx1, + linenr_T start_lnum) +{ + int pass = 1; + do { + diff_T *dp = dp_orig; + + while (dp != NULL && dp->df_next != NULL) { + // Only merge blocks on the same line + if (dp->df_lnum[idx1] + dp->df_count[idx1] - 1 >= linemap[idx1].ga_len + || dp->df_next->df_lnum[idx1] - 1 >= linemap[idx1].ga_len) { + dp = dp->df_next; + continue; + } + + linemap_entry_T *entry1 = + &((linemap_entry_T *)linemap[idx1].ga_data)[dp->df_lnum[idx1] + + dp->df_count[idx1] - 2]; + linemap_entry_T *entry2 = + &((linemap_entry_T *)linemap[idx1].ga_data)[dp->df_next->df_lnum[idx1] - 1]; + + // Skip if blocks are on different lines + if (entry1->lineoff != entry2->lineoff) { + dp = dp->df_next; + continue; + } + + // Calculate the gap between blocks + int gap_start = entry1->byte_start + entry1->num_bytes; + int gap_end = entry2->byte_start; + int gap_size = gap_end - gap_start; + + // Merge adjacent diff blocks separated by small gaps to reduce visual + // fragmentation. Gap threshold is set to 5 bytes which handles most + // common separators (spaces, punctuation, short variable names) while + // still preserving visually distinct changes. + if (gap_size <= 0 || gap_size > diff_word_gap) { + dp = dp->df_next; + continue; + } + + // Get the text between the two blocks + char *line = ml_get_buf(curtab->tp_diffbuf[idx1], + start_lnum + entry1->lineoff); + char *gap_text = line + gap_start; + + // Check if gap contains only whitespace and/or punctuation + bool only_non_word = true; + bool has_content = false; + for (int i = 0; i < gap_size && gap_text[i] != NUL; i++) { + has_content = true; + int char_class = mb_get_class_tab(gap_text + i, + curtab->tp_diffbuf[idx1]->b_chartab); + // class 2 is word characters, if we find any, don't merge + if (char_class == 2) { + only_non_word = false; + break; + } + } + + // Merge if the gap is small and contains only non-word characters + if (has_content && only_non_word) { + long total_change_bytes = 0; + for (int i = 0; i < DB_COUNT; i++) { + if (curtab->tp_diffbuf[i] != NULL) { + // count bytes in the first block + for (int k = 0; k < dp->df_count[i]; k++) { + int idx = dp->df_lnum[i] + k - 1; + if (idx < linemap[i].ga_len) { + total_change_bytes += ((linemap_entry_T *)linemap[i].ga_data)[idx].num_bytes; + } + } + // count bytes in the next block + for (int k = 0; k < dp->df_next->df_count[i]; k++) { + int idx = dp->df_next->df_lnum[i] + k - 1; + if (idx < linemap[i].ga_len) { + total_change_bytes += ((linemap_entry_T *)linemap[i].ga_data)[idx].num_bytes; + } + } + } + } + + if (total_change_bytes >= gap_size * 2) { + // Merge the blocks by extending the first block to include the next + for (int i = 0; i < DB_COUNT; i++) { + if (curtab->tp_diffbuf[i] != NULL) { + dp->df_count[i] = dp->df_next->df_lnum[i] + dp->df_next->df_count[i] + - dp->df_lnum[i]; + } + } + + diff_T *dp_next = dp->df_next; + dp->df_next = dp_next->df_next; + clear_diffblock(dp_next); + + // Don't advance dp, check if can merge with the next block too + continue; + } + } + + dp = dp->df_next; + } + } while (pass++ < 4); // use limited number of passes to avoid excessive looping +} + /// Find the inline difference within a diff block among different buffers. Do /// this by splitting each block's content into characters or words, and then /// use internal xdiff to calculate the per-character/word diff. The result is @@ -3319,7 +3427,9 @@ static void diff_find_change_inline_diff(diff_T *dp) } diff_T *new_diff = curtab->tp_first_diff; - if (diff_flags & DIFF_INLINE_CHAR && file1_idx != -1) { + if (diff_flags & DIFF_INLINE_WORD && file1_idx != -1) { + diff_refine_inline_word_highlight(new_diff, linemap, file1_idx, dp->df_lnum[file1_idx]); + } else if (diff_flags & DIFF_INLINE_CHAR && file1_idx != -1) { diff_refine_inline_char_highlight(new_diff, linemap, file1_idx); } diff --git a/src/nvim/options.lua b/src/nvim/options.lua index 4e224ab92e..5b816662a4 100644 --- a/src/nvim/options.lua +++ b/src/nvim/options.lua @@ -2459,7 +2459,10 @@ local options = { difference. Non-alphanumeric multi-byte characters such as emoji and CJK characters are considered - individual words. + individual words. Small gaps of + non-word characters (5 bytes or less) + between changes are merged into a + single highlight block. internal Use the internal diff library. This is ignored when 'diffexpr' is set. *E960* diff --git a/test/functional/ui/diff_spec.lua b/test/functional/ui/diff_spec.lua index e7b679a3c4..e62e3b0a95 100644 --- a/test/functional/ui/diff_spec.lua +++ b/test/functional/ui/diff_spec.lua @@ -2316,7 +2316,7 @@ it('diff mode inline highlighting', function() command('set diffopt=internal,filler diffopt+=inline:word') screen:expect([[ - {7: }{27:^abcdef}{4: }{27:ghi}{4: }{27:jk}{4: n }│{7: }{27:aBcef}{4: }{27:gHi}{4: }{27:lm}{4: n }| + {7: }{27:^abcdef ghi jk}{4: n }│{7: }{27:aBcef gHi lm}{4: n }| {7: }{22:x }│{7: }{23:----------------}| {7: }y │{7: }y | {7: }{23:----------------}│{7: }{22:z }| @@ -2497,7 +2497,7 @@ it('diff mode inline highlighting', function() ) command('set diffopt=internal,filler diffopt+=inline:word') screen:expect([[ - {7: }{4:^🚀}{27:⛵️}{4:一二}{27:三}{4:ひら}{100:が}{4:な}{27:Δέλτα}{4: }{27:Δelta}{4: fooba}│{7: }{4:🚀}{27:🛸}{4:一二}{27:四}{4:ひらな}{27:δέλτα}{4: }{27:δelta}{4: foobar }| + {7: }{4:^🚀}{27:⛵️}{4:一二}{27:三}{4:ひら}{27:がなΔέλτα Δelta}{4: fooba}│{7: }{4:🚀}{27:🛸}{4:一二}{27:四}{4:ひら}{27:なδέλτα δelta}{4: foobar }| {1:~ }│{1:~ }|*17 {3:Xdifile1 }{2:Xdifile2 }| | @@ -2573,9 +2573,9 @@ it('diff mode inline highlighting', function() ]]) command('set diffopt=internal,filler diffopt+=inline:word,iwhite') screen:expect([[ - {7: }{4:^this is }│{7: }{4:this is }{27:some}{4: }{27:test}{4: }| - {7: }{27:sometest}{4: }{27:text}{4: }{27:foo}{4: }│{7: }{27:texts}{4: }| - {7: }{27:baz}{4: }{27:abc}{4: }{27:def}{4: }│{7: }{27:foo}{4: }{27:bar}{4: }{27:abX}{4: }{27:Yef}{4: }| + {7: }{4:^this is }│{7: }{4:this is }{27:some test}{4: }| + {7: }{27:sometest text foo}{4: }│{7: }{27:texts}{4: }| + {7: }{27:baz abc def}{4: }│{7: }{27:foo bar abX}{4: }{27:Yef}{4: }| {7: }{27:one}{4: }│{7: }{27:oneword}{4: another word }| {7: }{27:word}{4: another word }│{7: }{23:----------------------}| {7: }{22:additional line }│{7: }{23:----------------------}| @@ -2597,9 +2597,9 @@ it('diff mode inline highlighting', function() ]]) command('set diffopt=internal,filler diffopt+=inline:word,iwhiteeol') screen:expect([[ - {7: }{4:^this }{100: }{4:is }│{7: }{4:this is }{27:some}{4: }{27:test}{4: }| - {7: }{27:sometest}{4: }{27:text}{4: foo }│{7: }{27:texts}{4: }| - {7: }{27:baz}{4: }{27:abc}{4: }{27:def}{4: }│{7: }{4:foo }{27:bar}{4: }{27:abX}{4: }{27:Yef}{4: }| + {7: }{4:^this }{100: }{4:is }│{7: }{4:this is }{27:some test}{4: }| + {7: }{27:sometest text}{4: foo }│{7: }{27:texts}{4: }| + {7: }{27:baz abc def}{4: }│{7: }{4:foo }{27:bar abX Yef}{4: }| {7: }{27:one}{4: }│{7: }{27:oneword}{4: another word }| {7: }{27:word}{4: another word }│{7: }{23:----------------------}| {7: }{22:additional line }│{7: }{23:----------------------}| @@ -2765,7 +2765,7 @@ it('diff mode inline highlighting with 3 buffers', function() command('set iskeyword+=+ | 2wincmd w | set iskeyword+=- | 1wincmd w') command('set diffopt=internal,filler diffopt+=inline:word') local s4 = [[ - {7: }{27:^This+is}{4:=}{27:a}{4:-setence }│{7: }{27:This+is}{4:=}{27:another}{4:-setenc}│{7: }{27:That+is}{4:=}{27:a}{4:-setence }| + {7: }{27:^This+is=a}{4:-setence }│{7: }{27:This+is=another}{4:-setenc}│{7: }{27:That+is=a}{4:-setence }| {1:~ }│{1:~ }│{1:~ }|*17 {3:Xdifile1 }{2:Xdifile2 Xdifile3 }| |