perf: improve utf_char2cells() performance (#27353)

`utf_char2cells()` calls `utf_printable()` twice (sometimes indirectly, through `vim_isprintc()`) for characters >= 128. The function can be refactored to call to it only once. `utf_printable()` uses binary search on ranges of unprintable characters to determine if a given character is printable. Since there are only 9 ranges, and the first range contains only one character, binary search can be replaced with SSE2 SIMD comparisons that check 8 ranges at a time, and the first range is checked separately. SSE2 is enabled by default in GCC, Clang and MSVC for x86-64. Add 3-byte utf-8 to screenpos_spec benchmarks.
2026-04-30 19:24:09 +00:00 · 2024-02-07 01:03:45 -06:00
parent 6725565258
commit cca8a78ea2
2 changed files with 239 additions and 179 deletions
--- a/src/nvim/mbyte.c
+++ b/src/nvim/mbyte.c
@@ -445,24 +445,26 @@ int mb_get_class_tab(const char *p, const uint64_t *const chartab)
 static bool intable(const struct interval *table, size_t n_items, int c)
  FUNC_ATTR_PURE
 {
+  assert(n_items > 0);
  // first quick check for Latin1 etc. characters
  if (c < table[0].first) {
    return false;
  }

+  assert(n_items <= SIZE_MAX / 2);
  // binary search in table
-  int bot = 0;
-  int top = (int)(n_items - 1);
-  while (top >= bot) {
-    int mid = (bot + top) / 2;
+  size_t bot = 0;
+  size_t top = n_items;
+  do {
+    size_t mid = (bot + top) >> 1;
    if (table[mid].last < c) {
      bot = mid + 1;
    } else if (table[mid].first > c) {
-      top = mid - 1;
+      top = mid;
    } else {
      return true;
    }
-  }
+  } while (top > bot);
  return false;
 }

@@ -476,32 +478,28 @@ static bool intable(const struct interval *table, size_t n_items, int c)
 ///       gen_unicode_tables.lua, which must be manually invoked as needed.
 int utf_char2cells(int c)
 {
-  // Use the value from setcellwidths() at 0x80 and higher, unless the
-  // character is not printable.
-  if (c >= 0x80 && vim_isprintc(c)) {
-    int n = cw_value(c);
-    if (n != 0) {
-      return n;
-    }
+  if (c < 0x80) {
+    return 1;
  }

-  if (c >= 0x100) {
-    if (!utf_printable(c)) {
-      return 6;                 // unprintable, displays <xxxx>
-    }
-    if (intable(doublewidth, ARRAY_SIZE(doublewidth), c)) {
-      return 2;
-    }
-    if (p_emoji && intable(emoji_wide, ARRAY_SIZE(emoji_wide), c)) {
-      return 2;
-    }
-  } else if (c >= 0x80 && !vim_isprintc(c)) {
-    // Characters below 0x100 are influenced by 'isprint' option.
-    return 4;                   // unprintable, displays <xx>
+  if (!vim_isprintc(c)) {
+    assert(c <= 0xFFFF);
+    // unprintable is displayed either as <xx> or <xxxx>
+    return c > 0xFF ? 6 : 4;
  }

-  if (c >= 0x80 && *p_ambw == 'd'
-      && intable(ambiguous, ARRAY_SIZE(ambiguous), c)) {
+  int n = cw_value(c);
+  if (n != 0) {
+    return n;
+  }
+
+  if (intable(doublewidth, ARRAY_SIZE(doublewidth), c)) {
+    return 2;
+  }
+  if (p_emoji && intable(emoji_wide, ARRAY_SIZE(emoji_wide), c)) {
+    return 2;
+  }
+  if (*p_ambw == 'd' && intable(ambiguous, ARRAY_SIZE(ambiguous), c)) {
    return 2;
  }

@@ -1092,9 +1090,52 @@ bool utf_iscomposing(int c)
  return intable(combining, ARRAY_SIZE(combining), c);
 }

+#ifdef __SSE2__
+
+# include <emmintrin.h>
+
 // Return true for characters that can be displayed in a normal way.
 // Only for characters of 0x100 and above!
 bool utf_printable(int c)
+  FUNC_ATTR_CONST
+{
+  if (c < 0x180B || c > 0xFFFF) {
+    return c != 0x70F;
+  }
+
+# define L(v) ((int16_t)((v) - 1))  // lower bound (exclusive)
+# define H(v) ((int16_t)(v))  // upper bound (inclusive)
+
+  // Boundaries of unprintable characters.
+  // Some values are negative when converted to int16_t.
+  // Ranges must not wrap around when converted to int16_t.
+  __m128i const lo = _mm_setr_epi16(L(0x180b), L(0x200b), L(0x202a), L(0x2060),
+                                    L(0xd800), L(0xfeff), L(0xfff9), L(0xfffe));
+
+  __m128i const hi = _mm_setr_epi16(H(0x180e), H(0x200f), H(0x202e), H(0x206f),
+                                    H(0xdfff), H(0xfeff), H(0xfffb), H(0xffff));
+
+# undef L
+# undef H
+
+  __m128i value = _mm_set1_epi16((int16_t)c);
+
+  // Using _mm_cmplt_epi16() is less optimal, since it would require
+  // swapping operands (sse2 only has cmpgt instruction),
+  // and only the second operand can be a memory location.
+
+  // Character is printable when it is above/below both bounds of each range
+  // (corresponding bits in both masks are equal).
+  return _mm_movemask_epi8(_mm_cmpgt_epi16(value, lo))
+         == _mm_movemask_epi8(_mm_cmpgt_epi16(value, hi));
+}
+
+#else
+
+// Return true for characters that can be displayed in a normal way.
+// Only for characters of 0x100 and above!
+bool utf_printable(int c)
+  FUNC_ATTR_PURE
 {
  // Sorted list of non-overlapping intervals.
  // 0xd800-0xdfff is reserved for UTF-16, actually illegal.
@@ -1107,6 +1148,8 @@ bool utf_printable(int c)
  return !intable(nonprint, ARRAY_SIZE(nonprint), c);
 }

+#endif
+
 // Get class of a Unicode character.
 // 0: white space
 // 1: punctuation