feat(mbyte): support extended grapheme clusters including more emoji

Use the grapheme break algorithm from utf8proc to support grapheme
clusters from recent unicode versions.

Handle variant selector VS16 turning some codepoints into double-width
emoji. This means we need to use ptr2cells rather than char2cells when
possible.
This commit is contained in:
bfredl
2024-08-08 10:42:08 +02:00
parent 4353996d0f
commit cfdf68a7ac
34 changed files with 657 additions and 221 deletions

View File

@@ -2118,7 +2118,7 @@ static int command_line_handle_key(CommandLineState *s)
s->do_abbr = false; // don't do abbreviation now
ccline.special_char = NUL;
// may need to remove ^ when composing char was typed
if (utf_iscomposing(s->c) && !cmd_silent) {
if (utf_iscomposing_first(s->c) && !cmd_silent) {
if (ui_has(kUICmdline)) {
// TODO(bfredl): why not make unputcmdline also work with true?
unputcmdline();
@@ -3585,7 +3585,9 @@ void put_on_cmdline(const char *str, int len, bool redraw)
// backup to the character before it. There could be two of them.
int i = 0;
int c = utf_ptr2char(ccline.cmdbuff + ccline.cmdpos);
while (ccline.cmdpos > 0 && utf_iscomposing(c)) {
// TODO(bfredl): this can be corrected/simplified as utf_head_off implements the
// correct grapheme cluster breaks
while (ccline.cmdpos > 0 && utf_iscomposing_legacy(c)) {
i = utf_head_off(ccline.cmdbuff, ccline.cmdbuff + ccline.cmdpos - 1) + 1;
ccline.cmdpos -= i;
len += i;