mirror of
				https://github.com/neovim/neovim.git
				synced 2025-10-26 12:27:24 +00:00 
			
		
		
		
	fix(diff): use mmfile_t in linematch
Problem: Linematch used to use strchr to navigate a string, however strchr does not supoprt embedded NULs. Solution: Use `mmfile_t` instead of `char *` in linematch and introduce `strnchr()`. Also remove heap allocations from `matching_char_iwhite()` Fixes: #30505
This commit is contained in:
		| @@ -713,8 +713,8 @@ vim.diff({a}, {b}, {opts})                                        *vim.diff()* | ||||
|     Parameters: ~ | ||||
|       • {a}     (`string`) First string to compare | ||||
|       • {b}     (`string`) Second string to compare | ||||
|       • {opts}  (`table`) Optional parameters: | ||||
|                 • {on_hunk} | ||||
|       • {opts}  (`table?`) Optional parameters: | ||||
|                 • {on_hunk}? | ||||
|                   (`fun(start_a: integer, count_a: integer, start_b: integer, count_b: integer): integer`) | ||||
|                   Invoked for each hunk in the diff. Return a negative number | ||||
|                   to cancel the callback for any remaining hunks. Arguments: | ||||
| @@ -722,33 +722,33 @@ vim.diff({a}, {b}, {opts})                                        *vim.diff()* | ||||
|                   • `count_a` (`integer`): Hunk size in {a}. | ||||
|                   • `start_b` (`integer`): Start line of hunk in {b}. | ||||
|                   • `count_b` (`integer`): Hunk size in {b}. | ||||
|                 • {result_type} (`'unified'|'indices'`, default: `'unified'`) | ||||
|                 • {result_type}? (`'unified'|'indices'`, default: `'unified'`) | ||||
|                   Form of the returned diff: | ||||
|                   • `unified`: String in unified format. | ||||
|                   • `indices`: Array of hunk locations. Note: This option is | ||||
|                     ignored if `on_hunk` is used. | ||||
|                 • {linematch} (`boolean|integer`) Run linematch on the | ||||
|                 • {linematch}? (`boolean|integer`) Run linematch on the | ||||
|                   resulting hunks from xdiff. When integer, only hunks upto | ||||
|                   this size in lines are run through linematch. Requires | ||||
|                   `result_type = indices`, ignored otherwise. | ||||
|                 • {algorithm} (`'myers'|'minimal'|'patience'|'histogram'`, | ||||
|                 • {algorithm}? (`'myers'|'minimal'|'patience'|'histogram'`, | ||||
|                   default: `'myers'`) Diff algorithm to use. Values: | ||||
|                   • `myers`: the default algorithm | ||||
|                   • `minimal`: spend extra time to generate the smallest | ||||
|                     possible diff | ||||
|                   • `patience`: patience diff algorithm | ||||
|                   • `histogram`: histogram diff algorithm | ||||
|                 • {ctxlen} (`integer`) Context length | ||||
|                 • {interhunkctxlen} (`integer`) Inter hunk context length | ||||
|                 • {ignore_whitespace} (`boolean`) Ignore whitespace | ||||
|                 • {ignore_whitespace_change} (`boolean`) Ignore whitespace | ||||
|                 • {ctxlen}? (`integer`) Context length | ||||
|                 • {interhunkctxlen}? (`integer`) Inter hunk context length | ||||
|                 • {ignore_whitespace}? (`boolean`) Ignore whitespace | ||||
|                 • {ignore_whitespace_change}? (`boolean`) Ignore whitespace | ||||
|                   change | ||||
|                 • {ignore_whitespace_change_at_eol} (`boolean`) Ignore | ||||
|                 • {ignore_whitespace_change_at_eol}? (`boolean`) Ignore | ||||
|                   whitespace change at end-of-line. | ||||
|                 • {ignore_cr_at_eol} (`boolean`) Ignore carriage return at | ||||
|                 • {ignore_cr_at_eol}? (`boolean`) Ignore carriage return at | ||||
|                   end-of-line | ||||
|                 • {ignore_blank_lines} (`boolean`) Ignore blank lines | ||||
|                 • {indent_heuristic} (`boolean`) Use the indent heuristic for | ||||
|                 • {ignore_blank_lines}? (`boolean`) Ignore blank lines | ||||
|                 • {indent_heuristic}? (`boolean`) Use the indent heuristic for | ||||
|                   the internal diff library. | ||||
|  | ||||
|     Return: ~ | ||||
|   | ||||
| @@ -11,19 +11,19 @@ | ||||
| ---   - `count_a` (`integer`): Hunk size in {a}. | ||||
| ---   - `start_b` (`integer`): Start line of hunk in {b}. | ||||
| ---   - `count_b` (`integer`): Hunk size in {b}. | ||||
| --- @field on_hunk fun(start_a: integer, count_a: integer, start_b: integer, count_b: integer): integer | ||||
| --- @field on_hunk? fun(start_a: integer, count_a: integer, start_b: integer, count_b: integer): integer | ||||
| --- | ||||
| --- Form of the returned diff: | ||||
| ---   - `unified`: String in unified format. | ||||
| ---   - `indices`: Array of hunk locations. | ||||
| --- Note: This option is ignored if `on_hunk` is used. | ||||
| --- (default: `'unified'`) | ||||
| --- @field result_type 'unified'|'indices' | ||||
| --- @field result_type? 'unified'|'indices' | ||||
| --- | ||||
| --- Run linematch on the resulting hunks from xdiff. When integer, only hunks | ||||
| --- upto this size in lines are run through linematch. | ||||
| --- Requires `result_type = indices`, ignored otherwise. | ||||
| --- @field linematch boolean|integer | ||||
| --- @field linematch? boolean|integer | ||||
| --- | ||||
| --- Diff algorithm to use. Values: | ||||
| ---   - `myers`: the default algorithm | ||||
| @@ -31,15 +31,15 @@ | ||||
| ---   - `patience`: patience diff algorithm | ||||
| ---   - `histogram`: histogram diff algorithm | ||||
| --- (default: `'myers'`) | ||||
| --- @field algorithm 'myers'|'minimal'|'patience'|'histogram' | ||||
| --- @field ctxlen integer Context length | ||||
| --- @field interhunkctxlen integer Inter hunk context length | ||||
| --- @field ignore_whitespace boolean Ignore whitespace | ||||
| --- @field ignore_whitespace_change boolean Ignore whitespace change | ||||
| --- @field ignore_whitespace_change_at_eol boolean Ignore whitespace change at end-of-line. | ||||
| --- @field ignore_cr_at_eol boolean Ignore carriage return at end-of-line | ||||
| --- @field ignore_blank_lines boolean Ignore blank lines | ||||
| --- @field indent_heuristic boolean Use the indent heuristic for the internal diff library. | ||||
| --- @field algorithm? 'myers'|'minimal'|'patience'|'histogram' | ||||
| --- @field ctxlen? integer Context length | ||||
| --- @field interhunkctxlen? integer Inter hunk context length | ||||
| --- @field ignore_whitespace? boolean Ignore whitespace | ||||
| --- @field ignore_whitespace_change? boolean Ignore whitespace change | ||||
| --- @field ignore_whitespace_change_at_eol? boolean Ignore whitespace change at end-of-line. | ||||
| --- @field ignore_cr_at_eol? boolean Ignore carriage return at end-of-line | ||||
| --- @field ignore_blank_lines? boolean Ignore blank lines | ||||
| --- @field indent_heuristic? boolean Use the indent heuristic for the internal diff library. | ||||
|  | ||||
| -- luacheck: no unused args | ||||
|  | ||||
| @@ -65,7 +65,7 @@ | ||||
| --- | ||||
| ---@param a string First string to compare | ||||
| ---@param b string Second string to compare | ||||
| ---@param opts vim.diff.Opts | ||||
| ---@param opts? vim.diff.Opts | ||||
| ---@return string|integer[][]? | ||||
| ---     See {opts.result_type}. `nil` if {opts.on_hunk} is given. | ||||
| function vim.diff(a, b, opts) end | ||||
|   | ||||
| @@ -881,6 +881,7 @@ def CheckIncludes(filename, lines, error): | ||||
|             "nvim/func_attr.h", | ||||
|             "termkey/termkey.h", | ||||
|             "vterm/vterm.h", | ||||
|             "xdiff/xdiff.h", | ||||
|             ] | ||||
|  | ||||
|     for i in check_includes_ignore: | ||||
|   | ||||
| @@ -2005,7 +2005,7 @@ static void run_linematch_algorithm(diff_T *dp) | ||||
| { | ||||
|   // define buffers for diff algorithm | ||||
|   mmfile_t diffbufs_mm[DB_COUNT]; | ||||
|   const char *diffbufs[DB_COUNT]; | ||||
|   const mmfile_t *diffbufs[DB_COUNT]; | ||||
|   int diff_length[DB_COUNT]; | ||||
|   size_t ndiffs = 0; | ||||
|   for (int i = 0; i < DB_COUNT; i++) { | ||||
| @@ -2015,9 +2015,7 @@ static void run_linematch_algorithm(diff_T *dp) | ||||
|       diff_write_buffer(curtab->tp_diffbuf[i], &diffbufs_mm[ndiffs], | ||||
|                         dp->df_lnum[i], dp->df_lnum[i] + dp->df_count[i] - 1); | ||||
|  | ||||
|       // we want to get the char* to the diff buffer that was just written | ||||
|       // we add it to the array of char*, diffbufs | ||||
|       diffbufs[ndiffs] = diffbufs_mm[ndiffs].ptr; | ||||
|       diffbufs[ndiffs] = &diffbufs_mm[ndiffs]; | ||||
|  | ||||
|       // keep track of the length of this diff block to pass it to the linematch | ||||
|       // algorithm | ||||
|   | ||||
| @@ -10,6 +10,8 @@ | ||||
| #include "nvim/macros_defs.h" | ||||
| #include "nvim/memory.h" | ||||
| #include "nvim/pos_defs.h" | ||||
| #include "nvim/strings.h" | ||||
| #include "xdiff/xdiff.h" | ||||
|  | ||||
| #define LN_MAX_BUFS 8 | ||||
| #define LN_DECISION_MAX 255  // pow(2, LN_MAX_BUFS(8)) - 1 = 255 | ||||
| @@ -29,48 +31,48 @@ struct diffcmppath_S { | ||||
| # include "linematch.c.generated.h" | ||||
| #endif | ||||
|  | ||||
| static size_t line_len(const char *s) | ||||
| static size_t line_len(const mmfile_t *m) | ||||
| { | ||||
|   char *end = strchr(s, '\n'); | ||||
|   char *s = m->ptr; | ||||
|   size_t n = (size_t)m->size; | ||||
|   char *end = strnchr(s, &n, '\n'); | ||||
|   if (end) { | ||||
|     return (size_t)(end - s); | ||||
|   } | ||||
|   return strlen(s); | ||||
|   return (size_t)m->size; | ||||
| } | ||||
|  | ||||
| #define MATCH_CHAR_MAX_LEN 800 | ||||
|  | ||||
| /// Same as matching_chars but ignore whitespace | ||||
| /// | ||||
| /// @param s1 | ||||
| /// @param s2 | ||||
| static int matching_chars_iwhite(const char *s1, const char *s2) | ||||
| static int matching_chars_iwhite(const mmfile_t *s1, const mmfile_t *s2) | ||||
| { | ||||
|   // the newly processed strings that will be compared | ||||
|   // delete the white space characters, and/or replace all upper case with lower | ||||
|   char *strsproc[2]; | ||||
|   const char *strsorig[2] = { s1, s2 }; | ||||
|   // delete the white space characters | ||||
|   mmfile_t sp[2]; | ||||
|   char p[2][MATCH_CHAR_MAX_LEN]; | ||||
|   for (int k = 0; k < 2; k++) { | ||||
|     size_t d = 0; | ||||
|     size_t i = 0; | ||||
|     size_t slen = line_len(strsorig[k]); | ||||
|     strsproc[k] = xmalloc((slen + 1) * sizeof(char)); | ||||
|     while (d + i < slen) { | ||||
|       char e = strsorig[k][i + d]; | ||||
|     const mmfile_t *s = k == 0 ? s1 : s2; | ||||
|     size_t pi = 0; | ||||
|     size_t slen = MIN(MATCH_CHAR_MAX_LEN - 1, line_len(s)); | ||||
|     for (size_t i = 0; i <= slen; i++) { | ||||
|       char e = s->ptr[i]; | ||||
|       if (e != ' ' && e != '\t') { | ||||
|         strsproc[k][i] = e; | ||||
|         i++; | ||||
|       } else { | ||||
|         d++; | ||||
|         p[k][pi] = e; | ||||
|         pi++; | ||||
|       } | ||||
|     } | ||||
|     strsproc[k][i] = NUL; | ||||
|   } | ||||
|   int matching = matching_chars(strsproc[0], strsproc[1]); | ||||
|   xfree(strsproc[0]); | ||||
|   xfree(strsproc[1]); | ||||
|   return matching; | ||||
| } | ||||
|  | ||||
| #define MATCH_CHAR_MAX_LEN 800 | ||||
|     sp[k] = (mmfile_t){ | ||||
|       .ptr = p[k], | ||||
|       .size = (int)pi, | ||||
|     }; | ||||
|   } | ||||
|   return matching_chars(&sp[0], &sp[1]); | ||||
| } | ||||
|  | ||||
| /// Return matching characters between "s1" and "s2" whilst respecting sequence order. | ||||
| /// Consider the case of two strings 'AAACCC' and 'CCCAAA', the | ||||
| @@ -83,12 +85,14 @@ static int matching_chars_iwhite(const char *s1, const char *s2) | ||||
| ///   matching_chars("abcdefg", "gfedcba")         -> 1  // all characters in common, | ||||
| ///                                                      // but only at most 1 in sequence | ||||
| /// | ||||
| /// @param s1 | ||||
| /// @param s2 | ||||
| static int matching_chars(const char *s1, const char *s2) | ||||
| /// @param m1 | ||||
| /// @param m2 | ||||
| static int matching_chars(const mmfile_t *m1, const mmfile_t *m2) | ||||
| { | ||||
|   size_t s1len = MIN(MATCH_CHAR_MAX_LEN - 1, line_len(s1)); | ||||
|   size_t s2len = MIN(MATCH_CHAR_MAX_LEN - 1, line_len(s2)); | ||||
|   size_t s1len = MIN(MATCH_CHAR_MAX_LEN - 1, line_len(m1)); | ||||
|   size_t s2len = MIN(MATCH_CHAR_MAX_LEN - 1, line_len(m2)); | ||||
|   char *s1 = m1->ptr; | ||||
|   char *s2 = m2->ptr; | ||||
|   int matrix[2][MATCH_CHAR_MAX_LEN] = { 0 }; | ||||
|   bool icur = 1;  // save space by storing only two rows for i axis | ||||
|   for (size_t i = 0; i < s1len; i++) { | ||||
| @@ -119,13 +123,13 @@ static int matching_chars(const char *s1, const char *s2) | ||||
| /// @param sp | ||||
| /// @param fomvals | ||||
| /// @param n | ||||
| static int count_n_matched_chars(const char **sp, const size_t n, bool iwhite) | ||||
| static int count_n_matched_chars(mmfile_t **sp, const size_t n, bool iwhite) | ||||
| { | ||||
|   int matched_chars = 0; | ||||
|   int matched = 0; | ||||
|   for (size_t i = 0; i < n; i++) { | ||||
|     for (size_t j = i + 1; j < n; j++) { | ||||
|       if (sp[i] != NULL && sp[j] != NULL) { | ||||
|       if (sp[i]->ptr != NULL && sp[j]->ptr != NULL) { | ||||
|         matched++; | ||||
|         // TODO(lewis6991): handle whitespace ignoring higher up in the stack | ||||
|         matched_chars += iwhite ? matching_chars_iwhite(sp[i], sp[j]) | ||||
| @@ -143,15 +147,17 @@ static int count_n_matched_chars(const char **sp, const size_t n, bool iwhite) | ||||
|   return matched_chars; | ||||
| } | ||||
|  | ||||
| void fastforward_buf_to_lnum(const char **s, linenr_T lnum) | ||||
| mmfile_t fastforward_buf_to_lnum(mmfile_t s, linenr_T lnum) | ||||
| { | ||||
|   for (int i = 0; i < lnum - 1; i++) { | ||||
|     *s = strchr(*s, '\n'); | ||||
|     if (!*s) { | ||||
|       return; | ||||
|     s.ptr = strnchr(s.ptr, (size_t *)&s.size, '\n'); | ||||
|     if (!s.ptr) { | ||||
|       break; | ||||
|     } | ||||
|     (*s)++; | ||||
|     s.ptr++; | ||||
|     s.size--; | ||||
|   } | ||||
|   return s; | ||||
| } | ||||
|  | ||||
| /// try all the different ways to compare these lines and use the one that | ||||
| @@ -167,25 +173,25 @@ void fastforward_buf_to_lnum(const char **s, linenr_T lnum) | ||||
| /// @param diff_blk | ||||
| static void try_possible_paths(const int *df_iters, const size_t *paths, const int npaths, | ||||
|                                const int path_idx, int *choice, diffcmppath_T *diffcmppath, | ||||
|                                const int *diff_len, const size_t ndiffs, const char **diff_blk, | ||||
|                                const int *diff_len, const size_t ndiffs, const mmfile_t **diff_blk, | ||||
|                                bool iwhite) | ||||
| { | ||||
|   if (path_idx == npaths) { | ||||
|     if ((*choice) > 0) { | ||||
|       int from_vals[LN_MAX_BUFS] = { 0 }; | ||||
|       const int *to_vals = df_iters; | ||||
|       const char *current_lines[LN_MAX_BUFS]; | ||||
|       mmfile_t mm[LN_MAX_BUFS];  // stack memory for current_lines | ||||
|       mmfile_t *current_lines[LN_MAX_BUFS]; | ||||
|       for (size_t k = 0; k < ndiffs; k++) { | ||||
|         from_vals[k] = df_iters[k]; | ||||
|         // get the index at all of the places | ||||
|         if ((*choice) & (1 << k)) { | ||||
|           from_vals[k]--; | ||||
|           const char *p = diff_blk[k]; | ||||
|           fastforward_buf_to_lnum(&p, df_iters[k]); | ||||
|           current_lines[k] = p; | ||||
|           mm[k] = fastforward_buf_to_lnum(*diff_blk[k], df_iters[k]); | ||||
|         } else { | ||||
|           current_lines[k] = NULL; | ||||
|           mm[k] = (mmfile_t){ 0 }; | ||||
|         } | ||||
|         current_lines[k] = &mm[k]; | ||||
|       } | ||||
|       size_t unwrapped_idx_from = unwrap_indexes(from_vals, diff_len, ndiffs); | ||||
|       size_t unwrapped_idx_to = unwrap_indexes(to_vals, diff_len, ndiffs); | ||||
| @@ -244,7 +250,7 @@ static size_t unwrap_indexes(const int *values, const int *diff_len, const size_ | ||||
| /// @param ndiffs | ||||
| /// @param diff_blk | ||||
| static void populate_tensor(int *df_iters, const size_t ch_dim, diffcmppath_T *diffcmppath, | ||||
|                             const int *diff_len, const size_t ndiffs, const char **diff_blk, | ||||
|                             const int *diff_len, const size_t ndiffs, const mmfile_t **diff_blk, | ||||
|                             bool iwhite) | ||||
| { | ||||
|   if (ch_dim == ndiffs) { | ||||
| @@ -327,7 +333,7 @@ static void populate_tensor(int *df_iters, const size_t ch_dim, diffcmppath_T *d | ||||
| /// @param ndiffs | ||||
| /// @param [out] [allocated] decisions | ||||
| /// @return the length of decisions | ||||
| size_t linematch_nbuffers(const char **diff_blk, const int *diff_len, const size_t ndiffs, | ||||
| size_t linematch_nbuffers(const mmfile_t **diff_blk, const int *diff_len, const size_t ndiffs, | ||||
|                           int **decisions, bool iwhite) | ||||
| { | ||||
|   assert(ndiffs <= LN_MAX_BUFS); | ||||
|   | ||||
| @@ -3,6 +3,7 @@ | ||||
| #include <stddef.h>  // IWYU pragma: keep | ||||
|  | ||||
| #include "nvim/pos_defs.h"  // IWYU pragma: keep | ||||
| #include "xdiff/xdiff.h" | ||||
|  | ||||
| #ifdef INCLUDE_GENERATED_DECLARATIONS | ||||
| # include "linematch.h.generated.h" | ||||
|   | ||||
| @@ -67,11 +67,11 @@ static void get_linematch_results(lua_State *lstate, mmfile_t *ma, mmfile_t *mb, | ||||
|                                   int count_a, int start_b, int count_b, bool iwhite) | ||||
| { | ||||
|   // get the pointer to char of the start of the diff to pass it to linematch algorithm | ||||
|   const char *diff_begin[2] = { ma->ptr, mb->ptr }; | ||||
|   int diff_length[2] = { count_a, count_b }; | ||||
|   mmfile_t ma0 = fastforward_buf_to_lnum(*ma, (linenr_T)start_a + 1); | ||||
|   mmfile_t mb0 = fastforward_buf_to_lnum(*mb, (linenr_T)start_b + 1); | ||||
|  | ||||
|   fastforward_buf_to_lnum(&diff_begin[0], (linenr_T)start_a + 1); | ||||
|   fastforward_buf_to_lnum(&diff_begin[1], (linenr_T)start_b + 1); | ||||
|   const mmfile_t *diff_begin[2] = { &ma0, &mb0 }; | ||||
|   int diff_length[2] = { count_a, count_b }; | ||||
|  | ||||
|   int *decisions = NULL; | ||||
|   size_t decisions_length = linematch_nbuffers(diff_begin, diff_length, 2, &decisions, iwhite); | ||||
|   | ||||
| @@ -496,6 +496,20 @@ char *vim_strchr(const char *const string, const int c) | ||||
|   } | ||||
| } | ||||
|  | ||||
| // Sized version of strchr that can handle embedded NULs. | ||||
| // Adjusts n to the new size. | ||||
| char *strnchr(const char *p, size_t *n, int c) | ||||
| { | ||||
|   while (*n > 0) { | ||||
|     if (*p == c) { | ||||
|       return (char *)p; | ||||
|     } | ||||
|     p++; | ||||
|     (*n)--; | ||||
|   } | ||||
|   return NULL; | ||||
| } | ||||
|  | ||||
| // Sort an array of strings. | ||||
|  | ||||
| static int sort_compare(const void *s1, const void *s2) | ||||
|   | ||||
| @@ -174,4 +174,13 @@ describe('xdiff bindings', function() | ||||
|       pcall_err(exec_lua, [[vim.diff('a', 'b', { on_hunk = true })]]) | ||||
|     ) | ||||
|   end) | ||||
|  | ||||
|   it('can handle strings with embedded NUL characters (GitHub #30305)', function() | ||||
|     eq( | ||||
|       { { 0, 0, 1, 1 }, { 1, 0, 3, 2 } }, | ||||
|       exec_lua(function() | ||||
|         return vim.diff('\n', '\0\n\n\nb', { linematch = true, result_type = 'indices' }) | ||||
|       end) | ||||
|     ) | ||||
|   end) | ||||
| end) | ||||
|   | ||||
		Reference in New Issue
	
	Block a user
	 Lewis Russell
					Lewis Russell