mirror of
https://github.com/neovim/neovim.git
synced 2025-09-14 07:18:17 +00:00
shell: handle split-up UTF-8 sequences
This commit is contained in:
@@ -422,7 +422,7 @@ static void out_data_ring(char *output, size_t size)
|
|||||||
}
|
}
|
||||||
|
|
||||||
if (output == NULL && size == SIZE_MAX) { // Print mode
|
if (output == NULL && size == SIZE_MAX) { // Print mode
|
||||||
out_data_append_to_screen(last_skipped, last_skipped_len, true);
|
out_data_append_to_screen(last_skipped, &last_skipped_len, true);
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -450,30 +450,40 @@ static void out_data_ring(char *output, size_t size)
|
|||||||
/// @param output Data to append to screen lines.
|
/// @param output Data to append to screen lines.
|
||||||
/// @param remaining Size of data.
|
/// @param remaining Size of data.
|
||||||
/// @param new_line If true, next data output will be on a new line.
|
/// @param new_line If true, next data output will be on a new line.
|
||||||
static void out_data_append_to_screen(char *output, size_t remaining,
|
static void out_data_append_to_screen(char *output, size_t *count,
|
||||||
bool new_line)
|
bool eof)
|
||||||
{
|
{
|
||||||
char *p = output, *end = output + remaining;
|
char *p = output, *end = output + *count;
|
||||||
while (p < end) {
|
while (p < end) {
|
||||||
if (*p == '\n' || *p == '\r' || *p == TAB || *p == BELL) {
|
if (*p == '\n' || *p == '\r' || *p == TAB || *p == BELL) {
|
||||||
msg_putchar_attr((uint8_t)(*p), 0);
|
msg_putchar_attr((uint8_t)(*p), 0);
|
||||||
p++;
|
p++;
|
||||||
} else {
|
} else {
|
||||||
|
// Note: this is not 100% precise:
|
||||||
|
// 1. we don't check if received continuation bytes are already invalid
|
||||||
|
// and we thus do some buffering that could be avoided
|
||||||
|
// 2. we don't compose chars over buffer boundaries, even if we see an
|
||||||
|
// incomplete UTF-8 sequence that could be composing with the last
|
||||||
|
// complete sequence.
|
||||||
|
// This will be corrected when we switch to vterm based implementation
|
||||||
int i = *p ? mb_ptr2len_len((char_u *)p, (int)(end-p)) : 1;
|
int i = *p ? mb_ptr2len_len((char_u *)p, (int)(end-p)) : 1;
|
||||||
|
if (!eof && i == 1 && utf8len_tab_zero[*(uint8_t *)p] > (end-p)) {
|
||||||
|
*count = (size_t)(p - output);
|
||||||
|
goto end;
|
||||||
|
}
|
||||||
|
|
||||||
(void)msg_outtrans_len_attr((char_u *)p, i, 0);
|
(void)msg_outtrans_len_attr((char_u *)p, i, 0);
|
||||||
p += i;
|
p += i;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
end:
|
||||||
ui_flush();
|
ui_flush();
|
||||||
}
|
}
|
||||||
|
|
||||||
static void out_data_cb(Stream *stream, RBuffer *buf, size_t count, void *data,
|
static void out_data_cb(Stream *stream, RBuffer *buf, size_t count, void *data,
|
||||||
bool eof)
|
bool eof)
|
||||||
{
|
{
|
||||||
// We always output the whole buffer, so the buffer can never
|
|
||||||
// wrap around.
|
|
||||||
size_t cnt;
|
size_t cnt;
|
||||||
char *ptr = rbuffer_read_ptr(buf, &cnt);
|
char *ptr = rbuffer_read_ptr(buf, &cnt);
|
||||||
|
|
||||||
@@ -482,12 +492,16 @@ static void out_data_cb(Stream *stream, RBuffer *buf, size_t count, void *data,
|
|||||||
// Save the skipped output. If it is the final chunk, we display it later.
|
// Save the skipped output. If it is the final chunk, we display it later.
|
||||||
out_data_ring(ptr, cnt);
|
out_data_ring(ptr, cnt);
|
||||||
} else {
|
} else {
|
||||||
out_data_append_to_screen(ptr, cnt, eof);
|
out_data_append_to_screen(ptr, &cnt, eof);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (cnt) {
|
if (cnt) {
|
||||||
rbuffer_consumed(buf, cnt);
|
rbuffer_consumed(buf, cnt);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Move remaining data to start of buffer, so the buffer can never
|
||||||
|
// wrap around.
|
||||||
|
rbuffer_reset(buf);
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Parses a command string into a sequence of words, taking quotes into
|
/// Parses a command string into a sequence of words, taking quotes into
|
||||||
|
@@ -4,6 +4,13 @@
|
|||||||
#include <stdio.h>
|
#include <stdio.h>
|
||||||
#include <string.h>
|
#include <string.h>
|
||||||
#include <stdint.h>
|
#include <stdint.h>
|
||||||
|
#include <unistd.h>
|
||||||
|
|
||||||
|
static void wait(void)
|
||||||
|
{
|
||||||
|
fflush(stdout);
|
||||||
|
usleep(10*1000);
|
||||||
|
}
|
||||||
|
|
||||||
static void help(void)
|
static void help(void)
|
||||||
{
|
{
|
||||||
@@ -61,6 +68,22 @@ int main(int argc, char **argv)
|
|||||||
for (uint8_t i = 0; i < number; i++) {
|
for (uint8_t i = 0; i < number; i++) {
|
||||||
printf("%d: %s\n", (int) i, argv[3]);
|
printf("%d: %s\n", (int) i, argv[3]);
|
||||||
}
|
}
|
||||||
|
} else if (strcmp(argv[1], "UTF-8") == 0) {
|
||||||
|
// test split-up UTF-8 sequence
|
||||||
|
printf("\xc3"); wait();
|
||||||
|
printf("\xa5\n"); wait();
|
||||||
|
|
||||||
|
// split up a 2+2 grapheme clusters all possible ways
|
||||||
|
printf("ref: \xc3\xa5\xcc\xb2\n"); wait();
|
||||||
|
|
||||||
|
printf("1: \xc3"); wait();
|
||||||
|
printf("\xa5\xcc\xb2\n"); wait();
|
||||||
|
|
||||||
|
printf("2: \xc3\xa5"); wait();
|
||||||
|
printf("\xcc\xb2\n"); wait();
|
||||||
|
|
||||||
|
printf("3: \xc3\xa5\xcc"); wait();
|
||||||
|
printf("\xb2\n"); wait();
|
||||||
} else {
|
} else {
|
||||||
fprintf(stderr, "Unknown first argument\n");
|
fprintf(stderr, "Unknown first argument\n");
|
||||||
return 3;
|
return 3;
|
||||||
|
@@ -9,6 +9,7 @@ local feed_command = helpers.feed_command
|
|||||||
local iswin = helpers.iswin
|
local iswin = helpers.iswin
|
||||||
local clear = helpers.clear
|
local clear = helpers.clear
|
||||||
local command = helpers.command
|
local command = helpers.command
|
||||||
|
local nvim_dir = helpers.nvim_dir
|
||||||
|
|
||||||
describe("shell command :!", function()
|
describe("shell command :!", function()
|
||||||
if helpers.pending_win32(pending) then return end
|
if helpers.pending_win32(pending) then return end
|
||||||
@@ -195,5 +196,29 @@ describe("shell command :!", function()
|
|||||||
eq(true, screen.bell)
|
eq(true, screen.bell)
|
||||||
end)
|
end)
|
||||||
end)
|
end)
|
||||||
|
|
||||||
|
it('handles multibyte sequences split over buffer boundaries', function()
|
||||||
|
command('cd '..nvim_dir)
|
||||||
|
local cmd
|
||||||
|
if iswin() then
|
||||||
|
cmd = '!shell-test UTF-8 '
|
||||||
|
else
|
||||||
|
cmd = '!./shell-test UTF-8'
|
||||||
|
end
|
||||||
|
feed_command(cmd)
|
||||||
|
-- Note: only the first example of split composed char works
|
||||||
|
screen:expect([[
|
||||||
|
{1:~ }|
|
||||||
|
{1:~ }|
|
||||||
|
:]]..cmd..[[ |
|
||||||
|
å |
|
||||||
|
ref: å̲ |
|
||||||
|
1: å̲ |
|
||||||
|
2: å ̲ |
|
||||||
|
3: å ̲ |
|
||||||
|
|
|
||||||
|
{3:Press ENTER or type command to continue}^ |
|
||||||
|
]])
|
||||||
|
end)
|
||||||
end)
|
end)
|
||||||
end)
|
end)
|
||||||
|
Reference in New Issue
Block a user