From e40c5cb06d1ce8aeb2612b95805a6152d9a43aaa Mon Sep 17 00:00:00 2001
From: zeertzjq <zeertzjq@outlook.com>
Date: Fri, 6 Feb 2026 07:28:26 +0800
Subject: [PATCH] fix(vterm): handle split UTF-8 after ASCII properly (#37721)

Problem:  libvterm doesn't handle split UTF-8 sequence after ASCII.
Solution: Only use one UTF-8 encoding state per vterm state.
---
 src/nvim/channel.c       | 15 ---------------
 src/nvim/vterm/state.c   |  3 +++
 test/unit/vterm_spec.lua |  9 +++++++++
 3 files changed, 12 insertions(+), 15 deletions(-)

diff --git a/src/nvim/channel.c b/src/nvim/channel.c
index 12bd9a2573..3ffefd1298 100644
--- a/src/nvim/channel.c
+++ b/src/nvim/channel.c
@@ -660,21 +660,6 @@ static size_t on_channel_output(RStream *stream, Channel *chan, const char *buf,
                                 bool eof, CallbackReader *reader)
 {
   if (chan->term) {
-    if (count) {
-      const char *p = buf;
-      const char *end = buf + count;
-      while (p < end) {
-        // Don't pass incomplete UTF-8 sequences to libvterm. #16245
-        // Composing chars can be passed separately, so utf_ptr2len_len() is enough.
-        int clen = utf_ptr2len_len(p, (int)(end - p));
-        if (clen > end - p) {
-          count = (size_t)(p - buf);
-          break;
-        }
-        p += clen;
-      }
-    }
-
     terminal_receive(chan->term, buf, count);
   }
 
diff --git a/src/nvim/vterm/state.c b/src/nvim/vterm/state.c
index c0bfb803ba..13ca49541c 100644
--- a/src/nvim/vterm/state.c
+++ b/src/nvim/vterm/state.c
@@ -321,6 +321,9 @@ static int on_text(const char bytes[], size_t len, void *user)
                                                 : state->vt->mode.utf8 ? &state->encoding_utf8
                                                                        : &state->encoding[state->
                                                                                           gr_set];
+  if (encoding->enc == state->encoding_utf8.enc) {
+    encoding = &state->encoding_utf8;  // Only use one UTF-8 encoding state.
+  }
 
   (*encoding->enc->decode)(encoding->enc, encoding->data,
                            codepoints, &npoints, state->gsingle_set ? 1 : (int)maxpoints,
diff --git a/test/unit/vterm_spec.lua b/test/unit/vterm_spec.lua
index 988ddb7503..c5bd6a755a 100644
--- a/test/unit/vterm_spec.lua
+++ b/test/unit/vterm_spec.lua
@@ -1827,11 +1827,20 @@ putglyph 1f3f4,200d,2620,fe0f 2 0,4]])
     expect('putglyph 2592 1 0,1')
 
     vterm.vterm_set_utf8(vt, true)
+
+    -- Mixed US-ASCII and UTF-8
     -- U+0108 == c4 88
     reset(state, nil)
     push('\x1b(B', vt)
     push('AB\xc4\x88D', vt)
     expect('putglyph 41 1 0,0\nputglyph 42 1 0,1\nputglyph 108 1 0,2\nputglyph 44 1 0,3')
+
+    -- Split UTF-8 after US-ASCII
+    reset(state, nil)
+    push('AB\xc4', vt)
+    expect('putglyph 41 1 0,0\nputglyph 42 1 0,1')
+    push('\x88D', vt)
+    expect('putglyph 108 1 0,2\nputglyph 44 1 0,3')
   end)
 
   itp('15state_mode', function()