charset,*: Refactor transstr()

2025-11-22 18:16:32 +00:00 · 2018-04-08 16:01:19 +03:00
parent e670756676
commit 0d7daaad98
9 changed files with 204 additions and 144 deletions
--- a/src/nvim/charset.c
+++ b/src/nvim/charset.c
@@ -313,69 +313,110 @@ void trans_characters(char_u *buf, int bufsize)
  }
 }

-/// Translate a string into allocated memory, replacing special chars with
-/// printable chars.
+/// Find length of a string capable of holding s with all specials replaced
 ///
-/// @param s
+/// Assumes replacing special characters with printable ones just like
+/// strtrans() does.
 ///
-/// @return translated string
-char_u *transstr(char_u *s) FUNC_ATTR_NONNULL_RET
+/// @param[in]  s  String to check.
+///
+/// @return number of bytes needed to hold a translation of `s`, NUL byte not
+///         included.
+size_t transstr_len(const char *const s)
+  FUNC_ATTR_WARN_UNUSED_RESULT FUNC_ATTR_PURE
 {
-  char_u *res;
-  char_u *p;
-  int c;
-  size_t l;
-  char_u hexbuf[11];
+  const char *p = s;
+  size_t len = 0;

+  while (*p) {
+    const size_t l = (size_t)utfc_ptr2len((const char_u *)p);
+    if (l > 1) {
+      int pcc[MAX_MCO + 2];
+      pcc[0] = utfc_ptr2char((const char_u *)p, &pcc[1]);
+
+      if (vim_isprintc(pcc[0])) {
+        len += l;
+      } else {
+        for (size_t i = 0; i < ARRAY_SIZE(pcc); i++) {
+          char hexbuf[11];
+          len += transchar_hex(hexbuf, pcc[i]);
+        }
+      }
+      p += l;
+    } else {
+      const int b2c_l = byte2cells((uint8_t)(*p++));
+      // Illegal byte sequence may occupy up to 4 characters.
+      len += (size_t)(b2c_l > 0 ? b2c_l : 4);
+    }
+  }
+  return len;
+}
+
+/// Replace special characters with printable ones
+///
+/// @param[in]  s  String to replace characters from.
+/// @param[out]  buf  Buffer to which result should be saved.
+/// @param[in]  len  Buffer length. Resulting string may not occupy more then
+///                  len - 1 bytes (one for trailing NUL byte).
+///
+/// @return `buf` argument unchanged.
+char *transstr_buf(const char *const s, char *const buf, const size_t len)
+  FUNC_ATTR_NONNULL_RET FUNC_ATTR_NONNULL_ALL
+{
+  const char *p = s;
+  char *buf_p = buf;
+  char *const buf_e = buf_p + len - 1;
+
+  while (*p != NUL && buf_p < buf_e) {
+    const size_t l = (size_t)utfc_ptr2len((const char_u *)p);
+    if (l > 1) {
+      if (buf_p + l >= buf_e) {
+        break;
+      }
+      int pcc[MAX_MCO + 2];
+      pcc[0] = utfc_ptr2char((const char_u *)p, &pcc[1]);
+
+      if (vim_isprintc(pcc[0])) {
+        memmove(buf_p, p, l);
+        buf_p += l;
+      } else {
+        for (size_t i = 0; i < ARRAY_SIZE(pcc); i++) {
+          char hexbuf[11];
+          const size_t hexlen = transchar_hex(hexbuf, pcc[i]);
+          if (buf_p + hexlen >= buf_e) {
+            break;
+          }
+          memmove(buf_p, hexbuf, hexlen);
+          buf_p += hexlen;
+        }
+      }
+      p += l;
+    } else {
+      const char *const tb = (const char *)transchar_byte((uint8_t)(*p++));
+      const size_t tb_len = strlen(tb);
+      memmove(buf_p, tb, tb_len);
+      buf_p += tb_len;
+    }
+  }
+  *buf_p = NUL;
+  assert(buf_p <= buf_e);
+  return buf;
+}
+
+/// Copy string and replace special characters with printable characters
+///
+/// Works like `strtrans()` does, used for that and in some other places.
+///
+/// @param[in]  s  String to replace characters from.
+///
+/// @return [allocated] translated string
+char *transstr(const char *const s)
+  FUNC_ATTR_NONNULL_RET
+{
  // Compute the length of the result, taking account of unprintable
  // multi-byte characters.
-  size_t len = 0;
-  p = s;
-
-  while (*p != NUL) {
-    if ((l = (size_t)(*mb_ptr2len)(p)) > 1) {
-      c = (*mb_ptr2char)(p);
-      p += l;
-
-      if (vim_isprintc(c)) {
-        len += l;
-      } else {
-        transchar_hex(hexbuf, c);
-        len += STRLEN(hexbuf);
-      }
-    } else {
-      l = (size_t)byte2cells(*p++);
-
-      if (l > 0) {
-        len += l;
-      } else {
-        // illegal byte sequence
-        len += 4;
-      }
-    }
-  }
-  res = xmallocz(len);
-
-  *res = NUL;
-  p = s;
-
-  while (*p != NUL) {
-    if ((l = (size_t)(*mb_ptr2len)(p)) > 1) {
-      c = (*mb_ptr2char)(p);
-
-      if (vim_isprintc(c)) {
-        // append printable multi-byte char
-        STRNCAT(res, p, l);
-      } else {
-        transchar_hex(res + STRLEN(res), c);
-      }
-      p += l;
-    } else {
-      STRCAT(res, transchar_byte(*p++));
-    }
-  }
-
-  return res;
+  const size_t len = transstr_len((const char *)s) + 1;
+  return transstr_buf(s, xmalloc(len), len);
 }

 /// Convert the string "str[orglen]" to do ignore-case comparing.
@@ -504,13 +545,15 @@ char_u* transchar(int c)
  return transchar_buf;
 }

-/// Like transchar(), but called with a byte instead of a character.  Checks
-/// for an illegal UTF-8 byte.
+/// Like transchar(), but called with a byte instead of a character
 ///
-/// @param c
+/// Checks for an illegal UTF-8 byte.
+///
+/// @param[in]  c  Byte to translate.
 ///
 /// @return pointer to translated character in transchar_buf.
-char_u* transchar_byte(int c)
+char_u *transchar_byte(const int c)
+  FUNC_ATTR_WARN_UNUSED_RESULT
 {
  if (c >= 0x80) {
    transchar_nonprint(transchar_buf, c);
@@ -519,12 +562,14 @@ char_u* transchar_byte(int c)
  return transchar(c);
 }

-/// Convert non-printable character to two or more printable characters in
-/// "buf[]".  "buf" needs to be able to hold five bytes.
-/// Does NOT work for multi-byte characters, c must be <= 255.
+/// Convert non-printable characters to 2..4 printable ones
 ///
-/// @param buf
-/// @param c
+/// @warning Does not work for multi-byte characters, c must be <= 255.
+///
+/// @param[out]  buf  Buffer to store result in, must be able to hold at least
+///                   5 bytes (conversion result + NUL).
+/// @param[in]  c  Character to convert. NUL is assumed to be NL according to
+///                `:h NL-used-for-NUL`.
 void transchar_nonprint(char_u *buf, int c)
 {
  if (c == NL) {
@@ -534,54 +579,63 @@ void transchar_nonprint(char_u *buf, int c)
    // we use CR in place of  NL in this case
    c = NL;
  }
+  assert(c <= 0xff);

-  if (dy_flags & DY_UHEX) {
+  if (dy_flags & DY_UHEX || c > 0x7f) {
    // 'display' has "uhex"
-    transchar_hex(buf, c);
-  } else if (c <= 0x7f) {
+    transchar_hex((char *)buf, c);
+  } else {
    // 0x00 - 0x1f and 0x7f
    buf[0] = '^';
    // DEL displayed as ^?
    buf[1] = (char_u)(c ^ 0x40);

    buf[2] = NUL;
-  } else {
-    transchar_hex(buf, c);
  }
 }

-/// Convert a non-printable character to hex.
+/// Convert a non-printable character to hex C string like "<FFFF>"
 ///
-/// @param buf
-/// @param c
-void transchar_hex(char_u *buf, int c)
+/// @param[out]  buf  Buffer to store result in.
+/// @param[in]  c  Character to convert.
+///
+/// @return Number of bytes stored in buffer, excluding trailing NUL byte.
+size_t transchar_hex(char *const buf, const int c)
+  FUNC_ATTR_NONNULL_ALL
 {
-  int i = 0;
+  size_t i = 0;

-  buf[0] = '<';
+  buf[i++] = '<';
  if (c > 255) {
-    buf[++i] = (char_u)nr2hex((unsigned)c >> 12);
-    buf[++i] = (char_u)nr2hex((unsigned)c >> 8);
+    if (c > 255 * 256) {
+      buf[i++] = (char)nr2hex((unsigned)c >> 20);
+      buf[i++] = (char)nr2hex((unsigned)c >> 16);
+    }
+    buf[i++] = (char)nr2hex((unsigned)c >> 12);
+    buf[i++] = (char)nr2hex((unsigned)c >> 8);
  }
-  buf[++i] = (char_u)(nr2hex((unsigned)c >> 4));
-  buf[++i] = (char_u)(nr2hex((unsigned)c));
-  buf[++i] = '>';
-  buf[++i] = NUL;
+  buf[i++] = (char)(nr2hex((unsigned)c >> 4));
+  buf[i++] = (char)(nr2hex((unsigned)c));
+  buf[i++] = '>';
+  buf[i] = NUL;
+  return i;
 }

-/// Convert the lower 4 bits of byte "c" to its hex character.
+/// Convert the lower 4 bits of byte "c" to its hex character
+///
 /// Lower case letters are used to avoid the confusion of <F1> being 0xf1 or
 /// function key 1.
 ///
-/// @param c
+/// @param[in]  n  Number to convert.
 ///
 /// @return the hex character.
-static unsigned nr2hex(unsigned c)
+static inline unsigned nr2hex(unsigned n)
+  FUNC_ATTR_CONST FUNC_ATTR_WARN_UNUSED_RESULT
 {
-  if ((c & 0xf) <= 9) {
-    return (c & 0xf) + '0';
+  if ((n & 0xf) <= 9) {
+    return (n & 0xf) + '0';
  }
-  return (c & 0xf) - 10 + 'a';
+  return (n & 0xf) - 10 + 'a';
 }

 /// Return number of display cells occupied by byte "b".