mirror of
https://github.com/neovim/neovim.git
synced 2025-10-17 23:31:51 +00:00
refactor: delete duplicate utf8-functionality
Also remove British National Replacement Character Set. We keep the DEC Special Graphics and ASCII despite it not being unicode as some old software such as calcurse still rely on this functionality. References: - https://github.com/neovim/neovim/pull/31934#discussion_r1911046426 - https://en.wikipedia.org/wiki/DEC_Special_Graphics - https://vt100.net/docs/vt220-rm/chapter2.html#S2.4.3
This commit is contained in:
@@ -74,3 +74,5 @@ typedef struct {
|
||||
} CharBoundsOff;
|
||||
|
||||
typedef utf8proc_int32_t GraphemeState;
|
||||
|
||||
enum { UNICODE_INVALID = 0xFFFD, };
|
||||
|
@@ -634,40 +634,13 @@ static void eat_bytes(TermKey *tk, size_t count)
|
||||
tk->buffcount -= count;
|
||||
}
|
||||
|
||||
// TODO(dundargoc): we should be able to replace this with utf_char2bytes from mbyte.c
|
||||
int fill_utf8(int codepoint, char *str)
|
||||
{
|
||||
int nbytes = utf_char2len(codepoint);
|
||||
|
||||
int nbytes = utf_char2bytes(codepoint, str);
|
||||
str[nbytes] = 0;
|
||||
|
||||
// This is easier done backwards
|
||||
int b = nbytes;
|
||||
while (b > 1) {
|
||||
b--;
|
||||
str[b] = (char)0x80 | (codepoint & 0x3f);
|
||||
codepoint >>= 6;
|
||||
}
|
||||
|
||||
switch (nbytes) {
|
||||
case 1:
|
||||
str[0] = (codepoint & 0x7f); break;
|
||||
case 2:
|
||||
str[0] = (char)0xc0 | (codepoint & 0x1f); break;
|
||||
case 3:
|
||||
str[0] = (char)0xe0 | (codepoint & 0x0f); break;
|
||||
case 4:
|
||||
str[0] = (char)0xf0 | (codepoint & 0x07); break;
|
||||
case 5:
|
||||
str[0] = (char)0xf8 | (codepoint & 0x03); break;
|
||||
case 6:
|
||||
str[0] = (char)0xfc | (codepoint & 0x01); break;
|
||||
}
|
||||
|
||||
return nbytes;
|
||||
}
|
||||
|
||||
#define UTF8_INVALID 0xFFFD
|
||||
static TermKeyResult parse_utf8(const unsigned char *bytes, size_t len, int *cp, size_t *nbytep)
|
||||
{
|
||||
unsigned nbytes;
|
||||
@@ -681,7 +654,7 @@ static TermKeyResult parse_utf8(const unsigned char *bytes, size_t len, int *cp,
|
||||
return TERMKEY_RES_KEY;
|
||||
} else if (b0 < 0xc0) {
|
||||
// Starts with a continuation byte - that's not right
|
||||
*cp = UTF8_INVALID;
|
||||
*cp = UNICODE_INVALID;
|
||||
*nbytep = 1;
|
||||
return TERMKEY_RES_KEY;
|
||||
} else if (b0 < 0xe0) {
|
||||
@@ -700,7 +673,7 @@ static TermKeyResult parse_utf8(const unsigned char *bytes, size_t len, int *cp,
|
||||
nbytes = 6;
|
||||
*cp = b0 & 0x01;
|
||||
} else {
|
||||
*cp = UTF8_INVALID;
|
||||
*cp = UNICODE_INVALID;
|
||||
*nbytep = 1;
|
||||
return TERMKEY_RES_KEY;
|
||||
}
|
||||
@@ -714,7 +687,7 @@ static TermKeyResult parse_utf8(const unsigned char *bytes, size_t len, int *cp,
|
||||
|
||||
cb = bytes[b];
|
||||
if (cb < 0x80 || cb >= 0xc0) {
|
||||
*cp = UTF8_INVALID;
|
||||
*cp = UNICODE_INVALID;
|
||||
*nbytep = b;
|
||||
return TERMKEY_RES_KEY;
|
||||
}
|
||||
@@ -725,14 +698,14 @@ static TermKeyResult parse_utf8(const unsigned char *bytes, size_t len, int *cp,
|
||||
|
||||
// Check for overlong sequences
|
||||
if ((int)nbytes > utf_char2len(*cp)) {
|
||||
*cp = UTF8_INVALID;
|
||||
*cp = UNICODE_INVALID;
|
||||
}
|
||||
|
||||
// Check for UTF-16 surrogates or invalid *cps
|
||||
if ((*cp >= 0xD800 && *cp <= 0xDFFF)
|
||||
|| *cp == 0xFFFE
|
||||
|| *cp == 0xFFFF) {
|
||||
*cp = UTF8_INVALID;
|
||||
*cp = UNICODE_INVALID;
|
||||
}
|
||||
|
||||
*nbytep = nbytes;
|
||||
@@ -962,9 +935,9 @@ static TermKeyResult peekkey_simple(TermKey *tk, TermKeyKey *key, int force, siz
|
||||
if (res == TERMKEY_RES_AGAIN && force) {
|
||||
// There weren't enough bytes for a complete UTF-8 sequence but caller
|
||||
// demands an answer. About the best thing we can do here is eat as many
|
||||
// bytes as we have, and emit a UTF8_INVALID. If the remaining bytes
|
||||
// bytes as we have, and emit a UNICODE_INVALID. If the remaining bytes
|
||||
// arrive later, they'll be invalid too.
|
||||
codepoint = UTF8_INVALID;
|
||||
codepoint = UNICODE_INVALID;
|
||||
*nbytep = tk->buffcount;
|
||||
res = TERMKEY_RES_KEY;
|
||||
}
|
||||
|
@@ -210,6 +210,7 @@ static void decode_table(VTermEncoding *enc, void *data, uint32_t cp[], int *cpi
|
||||
}
|
||||
}
|
||||
|
||||
// https://en.wikipedia.org/wiki/DEC_Special_Graphics
|
||||
static const struct StaticTableEncoding encoding_DECdrawing = {
|
||||
{ .decode = &decode_table },
|
||||
{
|
||||
@@ -247,13 +248,6 @@ static const struct StaticTableEncoding encoding_DECdrawing = {
|
||||
}
|
||||
};
|
||||
|
||||
static const struct StaticTableEncoding encoding_uk = {
|
||||
{ .decode = &decode_table },
|
||||
{
|
||||
[0x23] = 0x00a3, // £
|
||||
}
|
||||
};
|
||||
|
||||
static struct {
|
||||
VTermEncodingType type;
|
||||
char designation;
|
||||
@@ -262,7 +256,6 @@ static struct {
|
||||
encodings[] = {
|
||||
{ ENC_UTF8, 'u', &encoding_utf8 },
|
||||
{ ENC_SINGLE_94, '0', (VTermEncoding *)&encoding_DECdrawing },
|
||||
{ ENC_SINGLE_94, 'A', (VTermEncoding *)&encoding_uk },
|
||||
{ ENC_SINGLE_94, 'B', &encoding_usascii },
|
||||
{ 0 },
|
||||
};
|
||||
|
Reference in New Issue
Block a user