From e40c5cb06d1ce8aeb2612b95805a6152d9a43aaa Mon Sep 17 00:00:00 2001 From: zeertzjq Date: Fri, 6 Feb 2026 07:28:26 +0800 Subject: [PATCH] fix(vterm): handle split UTF-8 after ASCII properly (#37721) Problem: libvterm doesn't handle split UTF-8 sequence after ASCII. Solution: Only use one UTF-8 encoding state per vterm state. --- src/nvim/channel.c | 15 --------------- src/nvim/vterm/state.c | 3 +++ test/unit/vterm_spec.lua | 9 +++++++++ 3 files changed, 12 insertions(+), 15 deletions(-) diff --git a/src/nvim/channel.c b/src/nvim/channel.c index 12bd9a2573..3ffefd1298 100644 --- a/src/nvim/channel.c +++ b/src/nvim/channel.c @@ -660,21 +660,6 @@ static size_t on_channel_output(RStream *stream, Channel *chan, const char *buf, bool eof, CallbackReader *reader) { if (chan->term) { - if (count) { - const char *p = buf; - const char *end = buf + count; - while (p < end) { - // Don't pass incomplete UTF-8 sequences to libvterm. #16245 - // Composing chars can be passed separately, so utf_ptr2len_len() is enough. - int clen = utf_ptr2len_len(p, (int)(end - p)); - if (clen > end - p) { - count = (size_t)(p - buf); - break; - } - p += clen; - } - } - terminal_receive(chan->term, buf, count); } diff --git a/src/nvim/vterm/state.c b/src/nvim/vterm/state.c index c0bfb803ba..13ca49541c 100644 --- a/src/nvim/vterm/state.c +++ b/src/nvim/vterm/state.c @@ -321,6 +321,9 @@ static int on_text(const char bytes[], size_t len, void *user) : state->vt->mode.utf8 ? &state->encoding_utf8 : &state->encoding[state-> gr_set]; + if (encoding->enc == state->encoding_utf8.enc) { + encoding = &state->encoding_utf8; // Only use one UTF-8 encoding state. + } (*encoding->enc->decode)(encoding->enc, encoding->data, codepoints, &npoints, state->gsingle_set ? 1 : (int)maxpoints, diff --git a/test/unit/vterm_spec.lua b/test/unit/vterm_spec.lua index 988ddb7503..c5bd6a755a 100644 --- a/test/unit/vterm_spec.lua +++ b/test/unit/vterm_spec.lua @@ -1827,11 +1827,20 @@ putglyph 1f3f4,200d,2620,fe0f 2 0,4]]) expect('putglyph 2592 1 0,1') vterm.vterm_set_utf8(vt, true) + + -- Mixed US-ASCII and UTF-8 -- U+0108 == c4 88 reset(state, nil) push('\x1b(B', vt) push('AB\xc4\x88D', vt) expect('putglyph 41 1 0,0\nputglyph 42 1 0,1\nputglyph 108 1 0,2\nputglyph 44 1 0,3') + + -- Split UTF-8 after US-ASCII + reset(state, nil) + push('AB\xc4', vt) + expect('putglyph 41 1 0,0\nputglyph 42 1 0,1') + push('\x88D', vt) + expect('putglyph 108 1 0,2\nputglyph 44 1 0,3') end) itp('15state_mode', function()