encoding: cleanup mbyte.c given fixed encoding=utf-8

Eliminate mb_init(): Set "enc_utf" and "has_mbyte" early. Eliminate "enc_unicode" and "enc_latin1like". init_chartab() and screenalloc() are already invoked elsewhere in the initialization process. The EncodingChanged autocmd cannot be triggered. At initialization, there is no spellfiles to reload
2025-09-29 14:38:32 +00:00 · 2015-06-26 12:49:14 +02:00
parent 4804001aff
commit 5072ab9e5b
12 changed files with 107 additions and 390 deletions
--- a/src/nvim/charset.c
+++ b/src/nvim/charset.c
@@ -1612,9 +1612,7 @@ bool vim_islower(int c)
      return false;
    }
-    if (enc_latin1like) {
+    return (latin1flags[c] & LATIN1LOWER) == LATIN1LOWER;
      return (latin1flags[c] & LATIN1LOWER) == LATIN1LOWER;
    }
  }
  return islower(c);
 }
@@ -1643,9 +1641,7 @@ bool vim_isupper(int c)
      return false;
    }
-    if (enc_latin1like) {
+    return (latin1flags[c] & LATIN1UPPER) == LATIN1UPPER;
      return (latin1flags[c] & LATIN1UPPER) == LATIN1UPPER;
    }
  }
  return isupper(c);
 }
@@ -1670,9 +1666,7 @@ int vim_toupper(int c)
      return c;
    }
-    if (enc_latin1like) {
+    return latin1upper[c];
      return latin1upper[c];
    }
  }
  return TOUPPER_LOC(c);
 }
@@ -1697,9 +1691,7 @@ int vim_tolower(int c)
      return c;
    }
-    if (enc_latin1like) {
+    return latin1lower[c];
      return latin1lower[c];
    }
  }
  return TOLOWER_LOC(c);
 }
--- a/src/nvim/fileio.c
+++ b/src/nvim/fileio.c
@@ -4165,9 +4165,8 @@ static bool need_conversion(const char_u *fenc)
    same_encoding = (enc_flags != 0 && fenc_flags == enc_flags);
  }
  if (same_encoding) {
-    /* Specified encoding matches with 'encoding'.  This requires
+    // Specified file encoding matches UTF-8.
-     * conversion when 'encoding' is Unicode but not UTF-8. */
+    return false;
    return enc_unicode != 0;
  }
  /* Encodings differ.  However, conversion is not needed when 'enc' is any
--- a/src/nvim/globals.h
+++ b/src/nvim/globals.h
@@ -778,44 +778,18 @@ EXTERN int vr_lines_changed INIT(= 0);      /* #Lines changed by "gR" so far */
 # define DBCS_2BYTE     1       /* 2byte- */
 # define DBCS_DEBUG     -1
-EXTERN int enc_dbcs INIT(= 0);                  /* One of DBCS_xxx values if
+// mbyte flags that used to depend on 'encoding'. These are now deprecated, as
-                                                   DBCS encoding */
+// 'encoding' is always "utf-8". Code that use them can be refactored to
-EXTERN int enc_unicode INIT(= 0);       /* 2: UCS-2 or UTF-16, 4: UCS-4 */
+// remove dead code.
-EXTERN bool enc_utf8 INIT(= false);             /* UTF-8 encoded Unicode */
+#define enc_dbcs false
-EXTERN int enc_latin1like INIT(= TRUE);         /* 'encoding' is latin1 comp. */
+#define enc_utf8 true
-EXTERN int has_mbyte INIT(= 0);                 /* any multi-byte encoding */
+#define has_mbyte true
 /// Encoding used when 'fencs' is set to "default"
 EXTERN char_u *fenc_default INIT(= NULL);
-/*
+// To speed up BYTELEN() we keep a table with the byte lengths for utf-8
- * To speed up BYTELEN() we fill a table with the byte lengths whenever
+EXTERN char utf8len_tab[256];
 * enc_utf8 or enc_dbcs changes.
 */
 EXTERN char mb_bytelen_tab[256];
 /*
 * Function pointers, used to quickly get to the right function.  Each has
 * three possible values: latin_ (8-bit), utfc_ or utf_ (utf-8) and dbcs_
 * (DBCS).
 * The value is set in mb_init();
 */
 /* length of char in bytes, including following composing chars */
 EXTERN int (*mb_ptr2len)(const char_u *p) INIT(= latin_ptr2len);
 /* idem, with limit on string length */
 EXTERN int (*mb_ptr2len_len)(const char_u *p, int size) INIT(= latin_ptr2len_len);
 /* byte length of char */
 EXTERN int (*mb_char2len)(int c) INIT(= latin_char2len);
 /* convert char to bytes, return the length */
 EXTERN int (*mb_char2bytes)(int c, char_u *buf) INIT(= latin_char2bytes);
 EXTERN int (*mb_ptr2cells)(const char_u *p) INIT(= latin_ptr2cells);
 EXTERN int (*mb_ptr2cells_len)(const char_u *p, int size) INIT(
      = latin_ptr2cells_len);
 EXTERN int (*mb_char2cells)(int c) INIT(= latin_char2cells);
 EXTERN int (*mb_off2cells)(unsigned off, unsigned max_off) INIT(
      = latin_off2cells);
 EXTERN int (*mb_ptr2char)(const char_u *p) INIT(= latin_ptr2char);
 EXTERN int (*mb_head_off)(const char_u *base, const char_u *p) INIT(= latin_head_off);
 # if defined(USE_ICONV) && defined(DYNAMIC_ICONV)
 /* Pointers to functions and variables to be loaded at runtime */
--- a/src/nvim/macros.h
+++ b/src/nvim/macros.h
@@ -122,32 +122,29 @@
 /* Whether to draw the vertical bar on the right side of the cell. */
 # define CURSOR_BAR_RIGHT (curwin->w_p_rl && (!(State & CMDLINE) || cmdmsg_rl))
-/*
+// mb_ptr_adv(): advance a pointer to the next character, taking care of
- * mb_ptr_adv(): advance a pointer to the next character, taking care of
+// multi-byte characters if needed.
- * multi-byte characters if needed.
+// mb_ptr_back(): backup a pointer to the previous character, taking care of
- * mb_ptr_back(): backup a pointer to the previous character, taking care of
+// multi-byte characters if needed.
- * multi-byte characters if needed.
+// MB_COPY_CHAR(f, t): copy one char from "f" to "t" and advance the pointers.
- * MB_COPY_CHAR(f, t): copy one char from "f" to "t" and advance the pointers.
+// PTR2CHAR(): get character from pointer.
 * PTR2CHAR(): get character from pointer.
 */
 /* Get the length of the character p points to */
 # define MB_PTR2LEN(p)          (has_mbyte ? (*mb_ptr2len)(p) : 1)
 /* Advance multi-byte pointer, skip over composing chars. */
 # define mb_ptr_adv(p)      (p += has_mbyte ? (*mb_ptr2len)((char_u *)p) : 1)
 /* Advance multi-byte pointer, do not skip over composing chars. */
 # define mb_cptr_adv(p)     (p += \
  enc_utf8 ? utf_ptr2len(p) : has_mbyte ? (*mb_ptr2len)(p) : 1)
 /* Backup multi-byte pointer. Only use with "p" > "s" ! */
 # define mb_ptr_back(s, p)  (p -= has_mbyte ? ((*mb_head_off)((char_u *)s, (char_u *)p - 1) + 1) : 1)
 /* get length of multi-byte char, not including composing chars */
 # define mb_cptr2len(p)     (enc_utf8 ? utf_ptr2len(p) : (*mb_ptr2len)(p))
-# define MB_COPY_CHAR(f, t) \
+// Get the length of the character p points to
-  if (has_mbyte) mb_copy_char((const char_u **)(&f), &t); \
+# define MB_PTR2LEN(p)          mb_ptr2len(p)
-  else *t++ = *f++
+// Advance multi-byte pointer, skip over composing chars.
-# define MB_CHARLEN(p)      (has_mbyte ? mb_charlen(p) : (int)STRLEN(p))
+# define mb_ptr_adv(p)      (p += mb_ptr2len((char_u *)p))
-# define MB_CHAR2LEN(c)     (has_mbyte ? mb_char2len(c) : 1)
+// Advance multi-byte pointer, do not skip over composing chars.
-# define PTR2CHAR(p)        (has_mbyte ? mb_ptr2char(p) : (int)*(p))
+# define mb_cptr_adv(p)     (p += utf_ptr2len(p))
 // Backup multi-byte pointer. Only use with "p" > "s" !
 # define mb_ptr_back(s, p)  (p -= mb_head_off((char_u *)s, (char_u *)p - 1) + 1)
 // get length of multi-byte char, not including composing chars
 # define mb_cptr2len(p)     utf_ptr2len(p)
 # define MB_COPY_CHAR(f, t) mb_copy_char((const char_u **)(&f), &t);
 # define MB_CHARLEN(p)      mb_charlen(p)
 # define MB_CHAR2LEN(c)     mb_char2len(c)
 # define PTR2CHAR(p)        mb_ptr2char(p)
 # define RESET_BINDING(wp)  (wp)->w_p_scb = FALSE; (wp)->w_p_crb = FALSE
--- a/src/nvim/main.c
+++ b/src/nvim/main.c
@@ -177,7 +177,6 @@ void early_init(void)
  fs_init();
  handle_init();
  (void)mb_init();      // init mb_bytelen_tab[] to ones
  eval_init();          // init global variables
  // Init the table of Normal mode commands.
--- a/src/nvim/mbyte.c
+++ b/src/nvim/mbyte.c
@@ -1,68 +1,27 @@
-/*
+/// mbyte.c: Code specifically for handling multi-byte characters.
- * mbyte.c: Code specifically for handling multi-byte characters.
+/// Multibyte extensions partly by Sung-Hoon Baek
- * Multibyte extensions partly by Sung-Hoon Baek
+///
- *
+/// The encoding used in nvim is always UTF-8. "enc_utf8" and "has_mbyte" is
- * The encoding used in the core is set with 'encoding'.  When 'encoding' is
+/// thus always true. "enc_dbcs" is always zero. The 'encoding' option is
- * changed, the following four variables are set (for speed).
+/// read-only and always reads "utf-8".
- * Currently these types of character encodings are supported:
+///
- *
+/// The cell width on the display needs to be determined from the character
- * "enc_dbcs"	    When non-zero it tells the type of double byte character
+/// value. Recognizing UTF-8 bytes is easy: 0xxx.xxxx is a single-byte char,
- *		    encoding (Chinese, Korean, Japanese, etc.).
+/// 10xx.xxxx is a trailing byte, 11xx.xxxx is a leading byte of a multi-byte
- *		    The cell width on the display is equal to the number of
+/// character. To make things complicated, up to six composing characters
- *		    bytes.  (exception: DBCS_JPNU with first byte 0x8e)
+/// are allowed. These are drawn on top of the first char. For most editing
- *		    Recognizing the first or second byte is difficult, it
+/// the sequence of bytes with composing characters included is considered to
- *		    requires checking a byte sequence from the start.
+/// be one character.
- * "enc_utf8"	    When TRUE use Unicode characters in UTF-8 encoding.
+///
- *		    The cell width on the display needs to be determined from
+/// UTF-8 is used everywhere in the core. This is in registers, text
- *		    the character value.
+/// manipulation, buffers, etc. Nvim core communicates with external plugins
- *		    Recognizing bytes is easy: 0xxx.xxxx is a single-byte
+/// and GUIs in this encoding.
- *		    char, 10xx.xxxx is a trailing byte, 11xx.xxxx is a leading
+///
- *		    byte of a multi-byte character.
+/// The encoding of a file is specified with 'fileencoding'.  Conversion
- *		    To make things complicated, up to six composing characters
+/// is to be done when it's different from "utf-8".
- *		    are allowed.  These are drawn on top of the first char.
+///
- *		    For most editing the sequence of bytes with composing
+/// Vim scripts may contain an ":scriptencoding" command. This has an effect
- *		    characters included is considered to be one character.
+/// for some commands, like ":menutrans".
 * "enc_unicode"    When 2 use 16-bit Unicode characters (or UTF-16).
 *		    When 4 use 32-but Unicode characters.
 *		    Internally characters are stored in UTF-8 encoding to
 *		    avoid NUL bytes.  Conversion happens when doing I/O.
 *		    "enc_utf8" will also be TRUE.
 *
 * "has_mbyte" is set when "enc_dbcs" or "enc_utf8" is non-zero.
 *
 * If none of these is TRUE, 8-bit bytes are used for a character.  The
 * encoding isn't currently specified (TODO).
 *
 * 'encoding' specifies the encoding used in the core.  This is in registers,
 * text manipulation, buffers, etc.  Conversion has to be done when characters
 * in another encoding are received or send:
 *
 *		       clipboard
 *			   ^
 *			   | (2)
 *			   V
 *		   +---------------+
 *	      (1)  |		   | (3)
 *  keyboard ----->|	 core	   |-----> display
 *		   |		   |
 *		   +---------------+
 *			   ^
 *			   | (4)
 *			   V
 *			 file
 *
 * (1) Typed characters arrive in the current locale.
 * (2) Text will be made available with the encoding specified with
 *     'encoding'.  If this is not sufficient, system-specific conversion
 *     might be required.
 * (3) For the GUI the correct font must be selected, no conversion done.
 * (4) The encoding of the file is specified with 'fileencoding'.  Conversion
 *     is to be done when it's different from 'encoding'.
 *
 * The ShaDa file is a special case: Only text is converted, not file names.
 * Vim scripts may contain an ":encoding" command.  This has an effect for
 * some commands, like ":menutrans"
 */
 #include <inttypes.h>
 #include <stdbool.h>
@@ -115,7 +74,7 @@ struct interval {
 * Bytes which are illegal when used as the first byte have a 1.
 * The NUL byte has length 1.
 */
-static char utf8len_tab[256] =
+char utf8len_tab[256] =
 {
  1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
  1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
@@ -384,207 +343,6 @@ int enc_canon_props(const char_u *name)
  return 0;
 }
 /*
 * Set up for using multi-byte characters.
 * Called in three cases:
 * - by main() to initialize (p_enc == NULL)
 * - by set_init_1() after 'encoding' was set to its default.
 * - by do_set() when 'encoding' has been set.
 * p_enc must have been passed through enc_canonize() already.
 * Sets the "enc_unicode", "enc_utf8", "enc_dbcs" and "has_mbyte" flags.
 * Fills mb_bytelen_tab[] and returns NULL when there are no problems.
 * When there is something wrong: Returns an error message and doesn't change
 * anything.
 */
 char_u * mb_init(void)
 {
  int i;
  int idx;
  int n;
  int enc_dbcs_new = 0;
 #if defined(USE_ICONV) && !defined(WIN3264) && !defined(WIN32UNIX) \
  && !defined(MACOS)
 # define LEN_FROM_CONV
  vimconv_T vimconv;
  char_u      *p;
 #endif
  if (p_enc == NULL) {
    /* Just starting up: set the whole table to one's. */
    for (i = 0; i < 256; ++i)
      mb_bytelen_tab[i] = 1;
    return NULL;
  } else if (STRNCMP(p_enc, "8bit-", 5) == 0
      || STRNCMP(p_enc, "iso-8859-", 9) == 0) {
    /* Accept any "8bit-" or "iso-8859-" name. */
    enc_unicode = 0;
    enc_utf8 = false;
  } else if (STRNCMP(p_enc, "2byte-", 6) == 0) {
    /* Unix: accept any "2byte-" name, assume current locale. */
    enc_dbcs_new = DBCS_2BYTE;
  } else if ((idx = enc_canon_search(p_enc)) >= 0) {
    i = enc_canon_table[idx].prop;
    if (i & ENC_UNICODE) {
      /* Unicode */
      enc_utf8 = true;
      if (i & (ENC_2BYTE | ENC_2WORD))
        enc_unicode = 2;
      else if (i & ENC_4BYTE)
        enc_unicode = 4;
      else
        enc_unicode = 0;
    } else if (i & ENC_DBCS) {
      /* 2byte, handle below */
      enc_dbcs_new = enc_canon_table[idx].codepage;
    } else {
      /* Must be 8-bit. */
      enc_unicode = 0;
      enc_utf8 = false;
    }
  } else    /* Don't know what encoding this is, reject it. */
    return e_invarg;
  if (enc_dbcs_new != 0) {
    enc_unicode = 0;
    enc_utf8 = false;
  }
  enc_dbcs = enc_dbcs_new;
  has_mbyte = (enc_dbcs != 0 || enc_utf8);
  /* Detect an encoding that uses latin1 characters. */
  enc_latin1like = (enc_utf8 || STRCMP(p_enc, "latin1") == 0
      || STRCMP(p_enc, "iso-8859-15") == 0);
  /*
   * Set the function pointers.
   */
  if (enc_utf8) {
    mb_ptr2len = utfc_ptr2len;
    mb_ptr2len_len = utfc_ptr2len_len;
    mb_char2len = utf_char2len;
    mb_char2bytes = utf_char2bytes;
    mb_ptr2cells = utf_ptr2cells;
    mb_ptr2cells_len = utf_ptr2cells_len;
    mb_char2cells = utf_char2cells;
    mb_off2cells = utf_off2cells;
    mb_ptr2char = utf_ptr2char;
    mb_head_off = utf_head_off;
  } else if (enc_dbcs != 0) {
    mb_ptr2len = dbcs_ptr2len;
    mb_ptr2len_len = dbcs_ptr2len_len;
    mb_char2len = dbcs_char2len;
    mb_char2bytes = dbcs_char2bytes;
    mb_ptr2cells = dbcs_ptr2cells;
    mb_ptr2cells_len = dbcs_ptr2cells_len;
    mb_char2cells = dbcs_char2cells;
    mb_off2cells = dbcs_off2cells;
    mb_ptr2char = dbcs_ptr2char;
    mb_head_off = dbcs_head_off;
  } else {
    mb_ptr2len = latin_ptr2len;
    mb_ptr2len_len = latin_ptr2len_len;
    mb_char2len = latin_char2len;
    mb_char2bytes = latin_char2bytes;
    mb_ptr2cells = latin_ptr2cells;
    mb_ptr2cells_len = latin_ptr2cells_len;
    mb_char2cells = latin_char2cells;
    mb_off2cells = latin_off2cells;
    mb_ptr2char = latin_ptr2char;
    mb_head_off = latin_head_off;
  }
  /*
   * Fill the mb_bytelen_tab[] for MB_BYTE2LEN().
   */
 #ifdef LEN_FROM_CONV
  /* When 'encoding' is different from the current locale mblen() won't
   * work.  Use conversion to "utf-8" instead. */
  vimconv.vc_type = CONV_NONE;
  if (enc_dbcs) {
    p = enc_locale();
    if (p == NULL || STRCMP(p, p_enc) != 0) {
      convert_setup(&vimconv, p_enc, (char_u *)"utf-8");
      vimconv.vc_fail = true;
    }
    xfree(p);
  }
 #endif
  for (i = 0; i < 256; ++i) {
    /* Our own function to reliably check the length of UTF-8 characters,
     * independent of mblen(). */
    if (enc_utf8)
      n = utf8len_tab[i];
    else if (enc_dbcs == 0)
      n = 1;
    else {
      char buf[MB_MAXBYTES + 1];
      if (i == NUL)             /* just in case mblen() can't handle "" */
        n = 1;
      else {
        buf[0] = i;
        buf[1] = 0;
 #ifdef LEN_FROM_CONV
        if (vimconv.vc_type != CONV_NONE) {
          /*
           * string_convert() should fail when converting the first
           * byte of a double-byte character.
           */
          p = string_convert(&vimconv, (char_u *)buf, NULL);
          if (p != NULL) {
            xfree(p);
            n = 1;
          } else
            n = 2;
        } else
 #endif
        {
          /*
           * mblen() should return -1 for invalid (means the leading
           * multibyte) character.  However there are some platforms
           * where mblen() returns 0 for invalid character.
           * Therefore, following condition includes 0.
           */
          ignored = mblen(NULL, 0);             /* First reset the state. */
          if (mblen(buf, (size_t)1) <= 0)
            n = 2;
          else
            n = 1;
        }
      }
    }
    mb_bytelen_tab[i] = n;
  }
 #ifdef LEN_FROM_CONV
  convert_setup(&vimconv, NULL, NULL);
 #endif
  /* The cell width depends on the type of multi-byte characters. */
  (void)init_chartab();
  /* When enc_utf8 is set or reset, (de)allocate ScreenLinesUC[] */
  screenalloc(false);
 #ifdef HAVE_WORKING_LIBINTL
  /* GNU gettext 0.10.37 supports this feature: set the codeset used for
   * translated messages independently from the current locale. */
  (void)bind_textdomain_codeset(PROJECT_NAME,
                                enc_utf8 ? "utf-8" : (char *)p_enc);
 #endif
  /* Fire an autocommand to let people do custom font setup. This must be
   * after Vim has been setup for the new encoding. */
  apply_autocmds(EVENT_ENCODINGCHANGED, NULL, (char_u *)"", FALSE, curbuf);
  /* Need to reload spell dictionaries */
  spell_reload();
  return NULL;
 }
 /*
 * Return the size of the BOM for the current buffer:
 * 0 - no BOM
@@ -597,20 +355,15 @@ int bomb_size(void)
  int n = 0;
  if (curbuf->b_p_bomb && !curbuf->b_p_bin) {
-    if (*curbuf->b_p_fenc == NUL) {
+    if (*curbuf->b_p_fenc == NUL
-      if (enc_utf8) {
+        || STRCMP(curbuf->b_p_fenc, "utf-8") == 0) {
        if (enc_unicode != 0)
          n = enc_unicode;
        else
          n = 3;
      }
    } else if (STRCMP(curbuf->b_p_fenc, "utf-8") == 0)
      n = 3;
-    else if (STRNCMP(curbuf->b_p_fenc, "ucs-2", 5) == 0
+    } else if (STRNCMP(curbuf->b_p_fenc, "ucs-2", 5) == 0
-        || STRNCMP(curbuf->b_p_fenc, "utf-16", 6) == 0)
+               || STRNCMP(curbuf->b_p_fenc, "utf-16", 6) == 0) {
      n = 2;
-    else if (STRNCMP(curbuf->b_p_fenc, "ucs-4", 5) == 0)
+    } else if (STRNCMP(curbuf->b_p_fenc, "ucs-4", 5) == 0) {
      n = 4;
    }
  }
  return n;
 }
@@ -1010,7 +763,7 @@ int latin_ptr2cells_len(const char_u *p, int size)
  return 1;
 }
-static int utf_ptr2cells_len(const char_u *p, int size)
+int utf_ptr2cells_len(const char_u *p, int size)
 {
  int c;
@@ -2232,26 +1985,20 @@ int mb_tail_off(char_u *base, char_u *p)
  if (*p == NUL)
    return 0;
-  if (enc_utf8) {
+  // Find the last character that is 10xx.xxxx
-    /* Find the last character that is 10xx.xxxx */
+  for (i = 0; (p[i + 1] & 0xc0) == 0x80; i++) {}
-    for (i = 0; (p[i + 1] & 0xc0) == 0x80; ++i)
+
-      ;
+  // Check for illegal sequence.
-    /* Check for illegal sequence. */
+  for (j = 0; p - j > base; j++) {
-    for (j = 0; p - j > base; ++j)
+    if ((p[-j] & 0xc0) != 0x80) {
-      if ((p[-j] & 0xc0) != 0x80)
+      break;
-        break;
+    }
    if (utf8len_tab[p[-j]] != i + j + 1)
      return 0;
    return i;
  }
-  /* It can't be the first byte if a double-byte when not using DBCS, at the
+  if (utf8len_tab[p[-j]] != i + j + 1) {
   * end of the string or the byte can't start a double-byte. */
  if (enc_dbcs == 0 || p[1] == NUL || MB_BYTE2LEN(*p) == 1)
    return 0;
-
+  }
-  /* Return 1 when on the lead byte, 0 when on the tail byte. */
+  return i;
  return 1 - dbcs_head_off(base, p);
 }
 /*
@@ -2466,13 +2213,10 @@ int mb_fix_col(int col, int row)
 {
  col = check_col(col);
  row = check_row(row);
-  if (has_mbyte && ScreenLines != NULL && col > 0
+  if (ScreenLines != NULL && col > 0
-      && ((enc_dbcs
+      && ScreenLines[LineOffset[row] + col] == 0) {
          && ScreenLines[LineOffset[row] + col] != NUL
          && dbcs_screen_head_off(ScreenLines + LineOffset[row],
            ScreenLines + LineOffset[row] + col))
        || (enc_utf8 && ScreenLines[LineOffset[row] + col] == 0)))
    return col - 1;
  }
  return col;
 }
--- a/src/nvim/mbyte.h
+++ b/src/nvim/mbyte.h
@@ -9,8 +9,8 @@
 * MB_BYTE2LEN_CHECK() can be used to count a special key as one byte.
 * Don't call MB_BYTE2LEN(b) with b < 0 or b > 255!
 */
-#define MB_BYTE2LEN(b)         mb_bytelen_tab[b]
+#define MB_BYTE2LEN(b)         utf8len_tab[b]
-#define MB_BYTE2LEN_CHECK(b)   (((b) < 0 || (b) > 255) ? 1 : mb_bytelen_tab[b])
+#define MB_BYTE2LEN_CHECK(b)   (((b) < 0 || (b) > 255) ? 1 : utf8len_tab[b])
 /* properties used in enc_canon_table[] (first three mutually exclusive) */
 #define ENC_8BIT       0x01
@@ -28,6 +28,18 @@
 #define ENC_LATIN9     0x400       /* Latin9 */
 #define ENC_MACROMAN   0x800       /* Mac Roman (not Macro Man! :-) */
 // TODO(bfredl): eventually we should keep only one of the namings
 #define mb_ptr2len utfc_ptr2len
 #define mb_ptr2len_len utfc_ptr2len_len
 #define mb_char2len utf_char2len
 #define mb_char2bytes utf_char2bytes
 #define mb_ptr2cells utf_ptr2cells
 #define mb_ptr2cells_len utf_ptr2cells_len
 #define mb_char2cells utf_char2cells
 #define mb_off2cells utf_off2cells
 #define mb_ptr2char utf_ptr2char
 #define mb_head_off utf_head_off
 #ifdef INCLUDE_GENERATED_DECLARATIONS
 # include "mbyte.h.generated.h"
 #endif
--- a/src/nvim/ops.c
+++ b/src/nvim/ops.c
@@ -1936,8 +1936,7 @@ int swapchar(int op_type, pos_T *pos)
  if (c >= 0x80 && op_type == OP_ROT13)
    return FALSE;
-  if (op_type == OP_UPPER && c == 0xdf
+  if (op_type == OP_UPPER && c == 0xdf) {
      && (enc_latin1like || STRCMP(p_enc, "iso-8859-2") == 0)) {
    pos_T sp = curwin->w_cursor;
    /* Special handling of German sharp s: change to "SS". */
--- a/src/nvim/option.c
+++ b/src/nvim/option.c
@@ -780,8 +780,11 @@ void set_init_1(void)
  }
  fenc_default = p;
-  // Initialize multibyte (utf-8) handling
+#ifdef HAVE_WORKING_LIBINTL
-  mb_init();
+  // GNU gettext 0.10.37 supports this feature: set the codeset used for
  // translated messages independently from the current locale.
  (void)bind_textdomain_codeset(PROJECT_NAME, (char *)p_enc);
 #endif
  /* Set the default for 'helplang'. */
  set_helplang_default(get_mess_lang());
--- a/src/nvim/screen.c
+++ b/src/nvim/screen.c
@@ -5292,7 +5292,7 @@ void screen_puts_len(char_u *text, int textlen, int row, int col, int attr)
  int force_redraw_next = FALSE;
  int need_redraw;
-  const int l_has_mbyte = has_mbyte;
+  const bool l_has_mbyte = has_mbyte;
  const bool l_enc_utf8 = enc_utf8;
  const int l_enc_dbcs = enc_dbcs;
--- a/src/nvim/spell.c
+++ b/src/nvim/spell.c
@@ -9266,9 +9266,7 @@ static void allcap_copy(char_u *word, char_u *wcopy)
    else
      c = *s++;
-    // We only change 0xdf to SS when we are certain latin1 is used.  It
+    if (c == 0xdf) {
    // would cause weird errors in other 8-bit encodings.
    if (enc_latin1like && c == 0xdf) {
      c = 'S';
      if (d - wcopy >= MAXWLEN - 1)
        break;
@@ -12602,7 +12600,7 @@ static int spell_edit_score(slang_T *slang, char_u *badword, char_u *goodword)
  char_u      *p;
  int wbadword[MAXWLEN];
  int wgoodword[MAXWLEN];
-  const int l_has_mbyte = has_mbyte;
+  const bool l_has_mbyte = has_mbyte;
  if (l_has_mbyte) {
    // Get the characters from the multi-byte strings and put them in an
--- a/src/nvim/tui/input.c
+++ b/src/nvim/tui/input.c
@@ -31,8 +31,8 @@ void term_input_init(TermInput *input, Loop *loop)
  if (!term) {
    term = "";  // termkey_new_abstract assumes non-null (#2745)
  }
-  int enc_flag = enc_utf8 ? TERMKEY_FLAG_UTF8 : TERMKEY_FLAG_RAW;
+
-  input->tk = termkey_new_abstract(term, enc_flag);
+  input->tk = termkey_new_abstract(term, TERMKEY_FLAG_UTF8);
  int curflags = termkey_get_canonflags(input->tk);
  termkey_set_canonflags(input->tk, curflags | TERMKEY_CANON_DELBS);