mirror of
https://github.com/neovim/neovim.git
synced 2026-03-27 19:02:02 +00:00
feat(diff): merge adjacent blocks using inline:word (#37085)
vim-patch:9.2.0174: diff: inline word-diffs can be fragmented
Problem: When using 'diffopt=inline:word', lines were excessively
fragmented with punctuation creating separate highlight
blocks, making it harder to read the diffs.
Solution: Added 'diff_refine_inline_word_highlight()' to merge
adjacent diff blocks that are separated by small gaps of
non-word characters (up to 5 bytes by default) (HarshK97).
When using inline:word diff mode, adjacent changed words separated by
punctuation or whitespace are now merged into a single highlight block
if the gap between them contains fewer than 5 non-word characters.
This creates more readable diffs and closely matches GitHub's own diff
display.
closes: vim/vim#19098
42c6686c78
This commit is contained in:
@@ -366,6 +366,8 @@ OPTIONS
|
||||
• 'completeopt' flag "nearest" sorts completion results by distance to cursor.
|
||||
• 'diffanchors' specifies addresses to anchor a diff.
|
||||
• 'diffopt' `inline:` configures diff highlighting for changes within a line.
|
||||
• 'diffopt' with `inline:word` now automatically merges adjacent diff blocks
|
||||
separated by gaps and punctuation by 5 bytes to improve readability.
|
||||
• 'fillchars' has new flag "foldinner".
|
||||
• 'fsync' and 'grepformat' are now |global-local| options.
|
||||
• 'listchars' has new flag "leadtab".
|
||||
|
||||
@@ -2256,7 +2256,10 @@ A jump table for the options with a short description can be found at |Q_op|.
|
||||
difference. Non-alphanumeric
|
||||
multi-byte characters such as emoji
|
||||
and CJK characters are considered
|
||||
individual words.
|
||||
individual words. Small gaps of
|
||||
non-word characters (5 bytes or less)
|
||||
between changes are merged into a
|
||||
single highlight block.
|
||||
|
||||
internal Use the internal diff library. This is
|
||||
ignored when 'diffexpr' is set. *E960*
|
||||
|
||||
5
runtime/lua/vim/_meta/options.lua
generated
5
runtime/lua/vim/_meta/options.lua
generated
@@ -1873,7 +1873,10 @@ vim.go.dex = vim.go.diffexpr
|
||||
--- difference. Non-alphanumeric
|
||||
--- multi-byte characters such as emoji
|
||||
--- and CJK characters are considered
|
||||
--- individual words.
|
||||
--- individual words. Small gaps of
|
||||
--- non-word characters (5 bytes or less)
|
||||
--- between changes are merged into a
|
||||
--- single highlight block.
|
||||
---
|
||||
--- internal Use the internal diff library. This is
|
||||
--- ignored when 'diffexpr' is set. *E960*
|
||||
|
||||
112
src/nvim/diff.c
112
src/nvim/diff.c
@@ -99,6 +99,7 @@ static int diff_flags = DIFF_INTERNAL | DIFF_FILLER | DIFF_CLOSE_OFF
|
||||
| DIFF_LINEMATCH | DIFF_INLINE_CHAR;
|
||||
|
||||
static int diff_algorithm = XDF_INDENT_HEURISTIC;
|
||||
static int diff_word_gap = 5; // gap threshold for inline:word
|
||||
static int linematch_lines = 40;
|
||||
|
||||
#define LBUFLEN 50 // length of line in diff file
|
||||
@@ -3093,6 +3094,113 @@ static void diff_refine_inline_char_highlight(diff_T *dp_orig, garray_T *linemap
|
||||
} while (pass++ < 4); // use limited number of passes to avoid excessive looping
|
||||
}
|
||||
|
||||
/// Refine inline word diff blocks by merging blocks that are only separated
|
||||
/// by whitespace or punctuation. This creates more coherent highlighting.
|
||||
static void diff_refine_inline_word_highlight(diff_T *dp_orig, garray_T *linemap, int idx1,
|
||||
linenr_T start_lnum)
|
||||
{
|
||||
int pass = 1;
|
||||
do {
|
||||
diff_T *dp = dp_orig;
|
||||
|
||||
while (dp != NULL && dp->df_next != NULL) {
|
||||
// Only merge blocks on the same line
|
||||
if (dp->df_lnum[idx1] + dp->df_count[idx1] - 1 >= linemap[idx1].ga_len
|
||||
|| dp->df_next->df_lnum[idx1] - 1 >= linemap[idx1].ga_len) {
|
||||
dp = dp->df_next;
|
||||
continue;
|
||||
}
|
||||
|
||||
linemap_entry_T *entry1 =
|
||||
&((linemap_entry_T *)linemap[idx1].ga_data)[dp->df_lnum[idx1]
|
||||
+ dp->df_count[idx1] - 2];
|
||||
linemap_entry_T *entry2 =
|
||||
&((linemap_entry_T *)linemap[idx1].ga_data)[dp->df_next->df_lnum[idx1] - 1];
|
||||
|
||||
// Skip if blocks are on different lines
|
||||
if (entry1->lineoff != entry2->lineoff) {
|
||||
dp = dp->df_next;
|
||||
continue;
|
||||
}
|
||||
|
||||
// Calculate the gap between blocks
|
||||
int gap_start = entry1->byte_start + entry1->num_bytes;
|
||||
int gap_end = entry2->byte_start;
|
||||
int gap_size = gap_end - gap_start;
|
||||
|
||||
// Merge adjacent diff blocks separated by small gaps to reduce visual
|
||||
// fragmentation. Gap threshold is set to 5 bytes which handles most
|
||||
// common separators (spaces, punctuation, short variable names) while
|
||||
// still preserving visually distinct changes.
|
||||
if (gap_size <= 0 || gap_size > diff_word_gap) {
|
||||
dp = dp->df_next;
|
||||
continue;
|
||||
}
|
||||
|
||||
// Get the text between the two blocks
|
||||
char *line = ml_get_buf(curtab->tp_diffbuf[idx1],
|
||||
start_lnum + entry1->lineoff);
|
||||
char *gap_text = line + gap_start;
|
||||
|
||||
// Check if gap contains only whitespace and/or punctuation
|
||||
bool only_non_word = true;
|
||||
bool has_content = false;
|
||||
for (int i = 0; i < gap_size && gap_text[i] != NUL; i++) {
|
||||
has_content = true;
|
||||
int char_class = mb_get_class_tab(gap_text + i,
|
||||
curtab->tp_diffbuf[idx1]->b_chartab);
|
||||
// class 2 is word characters, if we find any, don't merge
|
||||
if (char_class == 2) {
|
||||
only_non_word = false;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
// Merge if the gap is small and contains only non-word characters
|
||||
if (has_content && only_non_word) {
|
||||
long total_change_bytes = 0;
|
||||
for (int i = 0; i < DB_COUNT; i++) {
|
||||
if (curtab->tp_diffbuf[i] != NULL) {
|
||||
// count bytes in the first block
|
||||
for (int k = 0; k < dp->df_count[i]; k++) {
|
||||
int idx = dp->df_lnum[i] + k - 1;
|
||||
if (idx < linemap[i].ga_len) {
|
||||
total_change_bytes += ((linemap_entry_T *)linemap[i].ga_data)[idx].num_bytes;
|
||||
}
|
||||
}
|
||||
// count bytes in the next block
|
||||
for (int k = 0; k < dp->df_next->df_count[i]; k++) {
|
||||
int idx = dp->df_next->df_lnum[i] + k - 1;
|
||||
if (idx < linemap[i].ga_len) {
|
||||
total_change_bytes += ((linemap_entry_T *)linemap[i].ga_data)[idx].num_bytes;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (total_change_bytes >= gap_size * 2) {
|
||||
// Merge the blocks by extending the first block to include the next
|
||||
for (int i = 0; i < DB_COUNT; i++) {
|
||||
if (curtab->tp_diffbuf[i] != NULL) {
|
||||
dp->df_count[i] = dp->df_next->df_lnum[i] + dp->df_next->df_count[i]
|
||||
- dp->df_lnum[i];
|
||||
}
|
||||
}
|
||||
|
||||
diff_T *dp_next = dp->df_next;
|
||||
dp->df_next = dp_next->df_next;
|
||||
clear_diffblock(dp_next);
|
||||
|
||||
// Don't advance dp, check if can merge with the next block too
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
||||
dp = dp->df_next;
|
||||
}
|
||||
} while (pass++ < 4); // use limited number of passes to avoid excessive looping
|
||||
}
|
||||
|
||||
/// Find the inline difference within a diff block among different buffers. Do
|
||||
/// this by splitting each block's content into characters or words, and then
|
||||
/// use internal xdiff to calculate the per-character/word diff. The result is
|
||||
@@ -3319,7 +3427,9 @@ static void diff_find_change_inline_diff(diff_T *dp)
|
||||
}
|
||||
diff_T *new_diff = curtab->tp_first_diff;
|
||||
|
||||
if (diff_flags & DIFF_INLINE_CHAR && file1_idx != -1) {
|
||||
if (diff_flags & DIFF_INLINE_WORD && file1_idx != -1) {
|
||||
diff_refine_inline_word_highlight(new_diff, linemap, file1_idx, dp->df_lnum[file1_idx]);
|
||||
} else if (diff_flags & DIFF_INLINE_CHAR && file1_idx != -1) {
|
||||
diff_refine_inline_char_highlight(new_diff, linemap, file1_idx);
|
||||
}
|
||||
|
||||
|
||||
@@ -2459,7 +2459,10 @@ local options = {
|
||||
difference. Non-alphanumeric
|
||||
multi-byte characters such as emoji
|
||||
and CJK characters are considered
|
||||
individual words.
|
||||
individual words. Small gaps of
|
||||
non-word characters (5 bytes or less)
|
||||
between changes are merged into a
|
||||
single highlight block.
|
||||
|
||||
internal Use the internal diff library. This is
|
||||
ignored when 'diffexpr' is set. *E960*
|
||||
|
||||
@@ -2316,7 +2316,7 @@ it('diff mode inline highlighting', function()
|
||||
|
||||
command('set diffopt=internal,filler diffopt+=inline:word')
|
||||
screen:expect([[
|
||||
{7: }{27:^abcdef}{4: }{27:ghi}{4: }{27:jk}{4: n }│{7: }{27:aBcef}{4: }{27:gHi}{4: }{27:lm}{4: n }|
|
||||
{7: }{27:^abcdef ghi jk}{4: n }│{7: }{27:aBcef gHi lm}{4: n }|
|
||||
{7: }{22:x }│{7: }{23:----------------}|
|
||||
{7: }y │{7: }y |
|
||||
{7: }{23:----------------}│{7: }{22:z }|
|
||||
@@ -2497,7 +2497,7 @@ it('diff mode inline highlighting', function()
|
||||
)
|
||||
command('set diffopt=internal,filler diffopt+=inline:word')
|
||||
screen:expect([[
|
||||
{7: }{4:^🚀}{27:⛵️}{4:一二}{27:三}{4:ひら}{100:が}{4:な}{27:Δέλτα}{4: }{27:Δelta}{4: fooba}│{7: }{4:🚀}{27:🛸}{4:一二}{27:四}{4:ひらな}{27:δέλτα}{4: }{27:δelta}{4: foobar }|
|
||||
{7: }{4:^🚀}{27:⛵️}{4:一二}{27:三}{4:ひら}{27:がなΔέλτα Δelta}{4: fooba}│{7: }{4:🚀}{27:🛸}{4:一二}{27:四}{4:ひら}{27:なδέλτα δelta}{4: foobar }|
|
||||
{1:~ }│{1:~ }|*17
|
||||
{3:Xdifile1 }{2:Xdifile2 }|
|
||||
|
|
||||
@@ -2573,9 +2573,9 @@ it('diff mode inline highlighting', function()
|
||||
]])
|
||||
command('set diffopt=internal,filler diffopt+=inline:word,iwhite')
|
||||
screen:expect([[
|
||||
{7: }{4:^this is }│{7: }{4:this is }{27:some}{4: }{27:test}{4: }|
|
||||
{7: }{27:sometest}{4: }{27:text}{4: }{27:foo}{4: }│{7: }{27:texts}{4: }|
|
||||
{7: }{27:baz}{4: }{27:abc}{4: }{27:def}{4: }│{7: }{27:foo}{4: }{27:bar}{4: }{27:abX}{4: }{27:Yef}{4: }|
|
||||
{7: }{4:^this is }│{7: }{4:this is }{27:some test}{4: }|
|
||||
{7: }{27:sometest text foo}{4: }│{7: }{27:texts}{4: }|
|
||||
{7: }{27:baz abc def}{4: }│{7: }{27:foo bar abX}{4: }{27:Yef}{4: }|
|
||||
{7: }{27:one}{4: }│{7: }{27:oneword}{4: another word }|
|
||||
{7: }{27:word}{4: another word }│{7: }{23:----------------------}|
|
||||
{7: }{22:additional line }│{7: }{23:----------------------}|
|
||||
@@ -2597,9 +2597,9 @@ it('diff mode inline highlighting', function()
|
||||
]])
|
||||
command('set diffopt=internal,filler diffopt+=inline:word,iwhiteeol')
|
||||
screen:expect([[
|
||||
{7: }{4:^this }{100: }{4:is }│{7: }{4:this is }{27:some}{4: }{27:test}{4: }|
|
||||
{7: }{27:sometest}{4: }{27:text}{4: foo }│{7: }{27:texts}{4: }|
|
||||
{7: }{27:baz}{4: }{27:abc}{4: }{27:def}{4: }│{7: }{4:foo }{27:bar}{4: }{27:abX}{4: }{27:Yef}{4: }|
|
||||
{7: }{4:^this }{100: }{4:is }│{7: }{4:this is }{27:some test}{4: }|
|
||||
{7: }{27:sometest text}{4: foo }│{7: }{27:texts}{4: }|
|
||||
{7: }{27:baz abc def}{4: }│{7: }{4:foo }{27:bar abX Yef}{4: }|
|
||||
{7: }{27:one}{4: }│{7: }{27:oneword}{4: another word }|
|
||||
{7: }{27:word}{4: another word }│{7: }{23:----------------------}|
|
||||
{7: }{22:additional line }│{7: }{23:----------------------}|
|
||||
@@ -2765,7 +2765,7 @@ it('diff mode inline highlighting with 3 buffers', function()
|
||||
command('set iskeyword+=+ | 2wincmd w | set iskeyword+=- | 1wincmd w')
|
||||
command('set diffopt=internal,filler diffopt+=inline:word')
|
||||
local s4 = [[
|
||||
{7: }{27:^This+is}{4:=}{27:a}{4:-setence }│{7: }{27:This+is}{4:=}{27:another}{4:-setenc}│{7: }{27:That+is}{4:=}{27:a}{4:-setence }|
|
||||
{7: }{27:^This+is=a}{4:-setence }│{7: }{27:This+is=another}{4:-setenc}│{7: }{27:That+is=a}{4:-setence }|
|
||||
{1:~ }│{1:~ }│{1:~ }|*17
|
||||
{3:Xdifile1 }{2:Xdifile2 Xdifile3 }|
|
||||
|
|
||||
|
||||
Reference in New Issue
Block a user