feat(diff): merge adjacent blocks using inline:word (#37085)

vim-patch:9.2.0174: diff: inline word-diffs can be fragmented Problem: When using 'diffopt=inline:word', lines were excessively fragmented with punctuation creating separate highlight blocks, making it harder to read the diffs. Solution: Added 'diff_refine_inline_word_highlight()' to merge adjacent diff blocks that are separated by small gaps of non-word characters (up to 5 bytes by default) (HarshK97). When using inline:word diff mode, adjacent changed words separated by punctuation or whitespace are now merged into a single highlight block if the gap between them contains fewer than 5 non-word characters. This creates more readable diffs and closely matches GitHub's own diff display. closes: vim/vim#19098 42c6686c78
2026-05-19 11:31:12 +00:00 · 2026-03-17 17:41:55 +05:30
parent 9ab6c607cc
commit a5b8cf145d
6 changed files with 134 additions and 13 deletions
--- a/runtime/doc/news.txt
+++ b/runtime/doc/news.txt
@@ -366,6 +366,8 @@ OPTIONS
 • 'completeopt' flag "nearest" sorts completion results by distance to cursor.
 • 'diffanchors' specifies addresses to anchor a diff.
 • 'diffopt' `inline:` configures diff highlighting for changes within a line.
+• 'diffopt' with `inline:word` now automatically merges adjacent diff blocks
+  separated by gaps and punctuation by 5 bytes to improve readability.
 • 'fillchars' has new flag "foldinner".
 • 'fsync' and 'grepformat' are now |global-local| options.
 • 'listchars' has new flag "leadtab".
--- a/runtime/doc/options.txt
+++ b/runtime/doc/options.txt
@@ -2256,7 +2256,10 @@ A jump table for the options with a short description can be found at |Q_op|.
 					difference.  Non-alphanumeric
 					multi-byte characters such as emoji
 					and CJK characters are considered
-					individual words.
+					individual words.  Small gaps of
+					non-word characters (5 bytes or less)
+					between changes are merged into a
+					single highlight block.

 		internal	Use the internal diff library.  This is
 				ignored when 'diffexpr' is set.  *E960*
--- a/runtime/lua/vim/_meta/options.lua
+++ b/runtime/lua/vim/_meta/options.lua
@@ -1873,7 +1873,10 @@ vim.go.dex = vim.go.diffexpr
 --- 				difference.  Non-alphanumeric
 --- 				multi-byte characters such as emoji
 --- 				and CJK characters are considered
--- 				individual words.
+--- 				individual words.  Small gaps of
+--- 				non-word characters (5 bytes or less)
+--- 				between changes are merged into a
+--- 				single highlight block.
 ---
 --- 	internal	Use the internal diff library.  This is
 --- 			ignored when 'diffexpr' is set.  *E960*
--- a/src/nvim/diff.c
+++ b/src/nvim/diff.c
@@ -99,6 +99,7 @@ static int diff_flags = DIFF_INTERNAL | DIFF_FILLER | DIFF_CLOSE_OFF
                        | DIFF_LINEMATCH | DIFF_INLINE_CHAR;

 static int diff_algorithm = XDF_INDENT_HEURISTIC;
+static int diff_word_gap = 5;  // gap threshold for inline:word
 static int linematch_lines = 40;

 #define LBUFLEN 50               // length of line in diff file
@@ -3093,6 +3094,113 @@ static void diff_refine_inline_char_highlight(diff_T *dp_orig, garray_T *linemap
  } while (pass++ < 4);  // use limited number of passes to avoid excessive looping
 }

+/// Refine inline word diff blocks by merging blocks that are only separated
+/// by whitespace or punctuation. This creates more coherent highlighting.
+static void diff_refine_inline_word_highlight(diff_T *dp_orig, garray_T *linemap, int idx1,
+                                              linenr_T start_lnum)
+{
+  int pass = 1;
+  do {
+    diff_T *dp = dp_orig;
+
+    while (dp != NULL && dp->df_next != NULL) {
+      // Only merge blocks on the same line
+      if (dp->df_lnum[idx1] + dp->df_count[idx1] - 1 >= linemap[idx1].ga_len
+          || dp->df_next->df_lnum[idx1] - 1 >= linemap[idx1].ga_len) {
+        dp = dp->df_next;
+        continue;
+      }
+
+      linemap_entry_T *entry1 =
+        &((linemap_entry_T *)linemap[idx1].ga_data)[dp->df_lnum[idx1]
+                                                    + dp->df_count[idx1] - 2];
+      linemap_entry_T *entry2 =
+        &((linemap_entry_T *)linemap[idx1].ga_data)[dp->df_next->df_lnum[idx1] - 1];
+
+      // Skip if blocks are on different lines
+      if (entry1->lineoff != entry2->lineoff) {
+        dp = dp->df_next;
+        continue;
+      }
+
+      // Calculate the gap between blocks
+      int gap_start = entry1->byte_start + entry1->num_bytes;
+      int gap_end = entry2->byte_start;
+      int gap_size = gap_end - gap_start;
+
+      // Merge adjacent diff blocks separated by small gaps to reduce visual
+      // fragmentation. Gap threshold is set to 5 bytes which handles most
+      // common separators (spaces, punctuation, short variable names) while
+      // still preserving visually distinct changes.
+      if (gap_size <= 0 || gap_size > diff_word_gap) {
+        dp = dp->df_next;
+        continue;
+      }
+
+      // Get the text between the two blocks
+      char *line = ml_get_buf(curtab->tp_diffbuf[idx1],
+                              start_lnum + entry1->lineoff);
+      char *gap_text = line + gap_start;
+
+      // Check if gap contains only whitespace and/or punctuation
+      bool only_non_word = true;
+      bool has_content = false;
+      for (int i = 0; i < gap_size && gap_text[i] != NUL; i++) {
+        has_content = true;
+        int char_class = mb_get_class_tab(gap_text + i,
+                                          curtab->tp_diffbuf[idx1]->b_chartab);
+        // class 2 is word characters, if we find any, don't merge
+        if (char_class == 2) {
+          only_non_word = false;
+          break;
+        }
+      }
+
+      // Merge if the gap is small and contains only non-word characters
+      if (has_content && only_non_word) {
+        long total_change_bytes = 0;
+        for (int i = 0; i < DB_COUNT; i++) {
+          if (curtab->tp_diffbuf[i] != NULL) {
+            // count bytes in the first block
+            for (int k = 0; k < dp->df_count[i]; k++) {
+              int idx = dp->df_lnum[i] + k - 1;
+              if (idx < linemap[i].ga_len) {
+                total_change_bytes += ((linemap_entry_T *)linemap[i].ga_data)[idx].num_bytes;
+              }
+            }
+            // count bytes in the next block
+            for (int k = 0; k < dp->df_next->df_count[i]; k++) {
+              int idx = dp->df_next->df_lnum[i] + k - 1;
+              if (idx < linemap[i].ga_len) {
+                total_change_bytes += ((linemap_entry_T *)linemap[i].ga_data)[idx].num_bytes;
+              }
+            }
+          }
+        }
+
+        if (total_change_bytes >= gap_size * 2) {
+          // Merge the blocks by extending the first block to include the next
+          for (int i = 0; i < DB_COUNT; i++) {
+            if (curtab->tp_diffbuf[i] != NULL) {
+              dp->df_count[i] = dp->df_next->df_lnum[i] + dp->df_next->df_count[i]
+                                - dp->df_lnum[i];
+            }
+          }
+
+          diff_T *dp_next = dp->df_next;
+          dp->df_next = dp_next->df_next;
+          clear_diffblock(dp_next);
+
+          // Don't advance dp, check if can merge with the next block too
+          continue;
+        }
+      }
+
+      dp = dp->df_next;
+    }
+  } while (pass++ < 4);  // use limited number of passes to avoid excessive looping
+}
+
 /// Find the inline difference within a diff block among different buffers.  Do
 /// this by splitting each block's content into characters or words, and then
 /// use internal xdiff to calculate the per-character/word diff.  The result is
@@ -3319,7 +3427,9 @@ static void diff_find_change_inline_diff(diff_T *dp)
  }
  diff_T *new_diff = curtab->tp_first_diff;

-  if (diff_flags & DIFF_INLINE_CHAR && file1_idx != -1) {
+  if (diff_flags & DIFF_INLINE_WORD && file1_idx != -1) {
+    diff_refine_inline_word_highlight(new_diff, linemap, file1_idx, dp->df_lnum[file1_idx]);
+  } else if (diff_flags & DIFF_INLINE_CHAR && file1_idx != -1) {
    diff_refine_inline_char_highlight(new_diff, linemap, file1_idx);
  }

--- a/src/nvim/options.lua
+++ b/src/nvim/options.lua
@@ -2459,7 +2459,10 @@ local options = {
        				difference.  Non-alphanumeric
        				multi-byte characters such as emoji
        				and CJK characters are considered
-        				individual words.
+        				individual words.  Small gaps of
+        				non-word characters (5 bytes or less)
+        				between changes are merged into a
+        				single highlight block.

        	internal	Use the internal diff library.  This is
        			ignored when 'diffexpr' is set.  *E960*
--- a/test/functional/ui/diff_spec.lua
+++ b/test/functional/ui/diff_spec.lua
@@ -2316,7 +2316,7 @@ it('diff mode inline highlighting', function()

  command('set diffopt=internal,filler diffopt+=inline:word')
  screen:expect([[
-    {7:  }{27:^abcdef}{4: }{27:ghi}{4: }{27:jk}{4: n }│{7:  }{27:aBcef}{4: }{27:gHi}{4: }{27:lm}{4: n  }|
+    {7:  }{27:^abcdef ghi jk}{4: n }│{7:  }{27:aBcef gHi lm}{4: n  }|
    {7:  }{22:x               }│{7:  }{23:----------------}|
    {7:  }y               │{7:  }y               |
    {7:  }{23:----------------}│{7:  }{22:z               }|
@@ -2497,7 +2497,7 @@ it('diff mode inline highlighting', function()
  )
  command('set diffopt=internal,filler diffopt+=inline:word')
  screen:expect([[
-    {7:  }{4:^🚀}{27:⛵️}{4:一二}{27:三}{4:ひら}{100:が}{4:な}{27:Δέλτα}{4: }{27:Δelta}{4: fooba}│{7:  }{4:🚀}{27:🛸}{4:一二}{27:四}{4:ひらな}{27:δέλτα}{4: }{27:δelta}{4: foobar }|
+    {7:  }{4:^🚀}{27:⛵️}{4:一二}{27:三}{4:ひら}{27:がなΔέλτα Δelta}{4: fooba}│{7:  }{4:🚀}{27:🛸}{4:一二}{27:四}{4:ひら}{27:なδέλτα δelta}{4: foobar }|
    {1:~                                    }│{1:~                                    }|*17
    {3:Xdifile1                              }{2:Xdifile2                             }|
                                                                               |
@@ -2573,9 +2573,9 @@ it('diff mode inline highlighting', function()
  ]])
  command('set diffopt=internal,filler diffopt+=inline:word,iwhite')
  screen:expect([[
-    {7:  }{4:^this   is             }│{7:  }{4:this is }{27:some}{4: }{27:test}{4:     }|
-    {7:  }{27:sometest}{4: }{27:text}{4: }{27:foo}{4:     }│{7:  }{27:texts}{4:                 }|
-    {7:  }{27:baz}{4: }{27:abc}{4: }{27:def}{4:           }│{7:  }{27:foo}{4: }{27:bar}{4: }{27:abX}{4: }{27:Yef}{4:       }|
+    {7:  }{4:^this   is             }│{7:  }{4:this is }{27:some test}{4:     }|
+    {7:  }{27:sometest text foo}{4:     }│{7:  }{27:texts}{4:                 }|
+    {7:  }{27:baz abc def}{4:           }│{7:  }{27:foo bar abX}{4: }{27:Yef}{4:       }|
    {7:  }{27:one}{4:                   }│{7:  }{27:oneword}{4: another word  }|
    {7:  }{27:word}{4: another word     }│{7:  }{23:----------------------}|
    {7:  }{22:additional line       }│{7:  }{23:----------------------}|
@@ -2597,9 +2597,9 @@ it('diff mode inline highlighting', function()
  ]])
  command('set diffopt=internal,filler diffopt+=inline:word,iwhiteeol')
  screen:expect([[
-    {7:  }{4:^this }{100:  }{4:is             }│{7:  }{4:this is }{27:some}{4: }{27:test}{4:     }|
-    {7:  }{27:sometest}{4: }{27:text}{4: foo     }│{7:  }{27:texts}{4:                 }|
-    {7:  }{27:baz}{4: }{27:abc}{4: }{27:def}{4:           }│{7:  }{4:foo }{27:bar}{4: }{27:abX}{4: }{27:Yef}{4:       }|
+    {7:  }{4:^this }{100:  }{4:is             }│{7:  }{4:this is }{27:some test}{4:     }|
+    {7:  }{27:sometest text}{4: foo     }│{7:  }{27:texts}{4:                 }|
+    {7:  }{27:baz abc def}{4:           }│{7:  }{4:foo }{27:bar abX Yef}{4:       }|
    {7:  }{27:one}{4:                   }│{7:  }{27:oneword}{4: another word  }|
    {7:  }{27:word}{4: another word     }│{7:  }{23:----------------------}|
    {7:  }{22:additional line       }│{7:  }{23:----------------------}|
@@ -2765,7 +2765,7 @@ it('diff mode inline highlighting with 3 buffers', function()
  command('set iskeyword+=+ | 2wincmd w | set iskeyword+=- | 1wincmd w')
  command('set diffopt=internal,filler diffopt+=inline:word')
  local s4 = [[
-    {7:  }{27:^This+is}{4:=}{27:a}{4:-setence      }│{7:  }{27:This+is}{4:=}{27:another}{4:-setenc}│{7:  }{27:That+is}{4:=}{27:a}{4:-setence     }|
+    {7:  }{27:^This+is=a}{4:-setence      }│{7:  }{27:This+is=another}{4:-setenc}│{7:  }{27:That+is=a}{4:-setence     }|
    {1:~                        }│{1:~                       }│{1:~                       }|*17
    {3:Xdifile1                  }{2:Xdifile2                 Xdifile3                }|
                                                                               |