shell: handle split-up UTF-8 sequences

This commit is contained in:
Björn Linse
2018-02-08 15:11:56 +01:00
parent 01cdeff626
commit f75c4b39ec
3 changed files with 69 additions and 7 deletions

View File

@@ -422,7 +422,7 @@ static void out_data_ring(char *output, size_t size)
} }
if (output == NULL && size == SIZE_MAX) { // Print mode if (output == NULL && size == SIZE_MAX) { // Print mode
out_data_append_to_screen(last_skipped, last_skipped_len, true); out_data_append_to_screen(last_skipped, &last_skipped_len, true);
return; return;
} }
@@ -450,30 +450,40 @@ static void out_data_ring(char *output, size_t size)
/// @param output Data to append to screen lines. /// @param output Data to append to screen lines.
/// @param remaining Size of data. /// @param remaining Size of data.
/// @param new_line If true, next data output will be on a new line. /// @param new_line If true, next data output will be on a new line.
static void out_data_append_to_screen(char *output, size_t remaining, static void out_data_append_to_screen(char *output, size_t *count,
bool new_line) bool eof)
{ {
char *p = output, *end = output + remaining; char *p = output, *end = output + *count;
while (p < end) { while (p < end) {
if (*p == '\n' || *p == '\r' || *p == TAB || *p == BELL) { if (*p == '\n' || *p == '\r' || *p == TAB || *p == BELL) {
msg_putchar_attr((uint8_t)(*p), 0); msg_putchar_attr((uint8_t)(*p), 0);
p++; p++;
} else { } else {
// Note: this is not 100% precise:
// 1. we don't check if received continuation bytes are already invalid
// and we thus do some buffering that could be avoided
// 2. we don't compose chars over buffer boundaries, even if we see an
// incomplete UTF-8 sequence that could be composing with the last
// complete sequence.
// This will be corrected when we switch to vterm based implementation
int i = *p ? mb_ptr2len_len((char_u *)p, (int)(end-p)) : 1; int i = *p ? mb_ptr2len_len((char_u *)p, (int)(end-p)) : 1;
if (!eof && i == 1 && utf8len_tab_zero[*(uint8_t *)p] > (end-p)) {
*count = (size_t)(p - output);
goto end;
}
(void)msg_outtrans_len_attr((char_u *)p, i, 0); (void)msg_outtrans_len_attr((char_u *)p, i, 0);
p += i; p += i;
} }
} }
end:
ui_flush(); ui_flush();
} }
static void out_data_cb(Stream *stream, RBuffer *buf, size_t count, void *data, static void out_data_cb(Stream *stream, RBuffer *buf, size_t count, void *data,
bool eof) bool eof)
{ {
// We always output the whole buffer, so the buffer can never
// wrap around.
size_t cnt; size_t cnt;
char *ptr = rbuffer_read_ptr(buf, &cnt); char *ptr = rbuffer_read_ptr(buf, &cnt);
@@ -482,12 +492,16 @@ static void out_data_cb(Stream *stream, RBuffer *buf, size_t count, void *data,
// Save the skipped output. If it is the final chunk, we display it later. // Save the skipped output. If it is the final chunk, we display it later.
out_data_ring(ptr, cnt); out_data_ring(ptr, cnt);
} else { } else {
out_data_append_to_screen(ptr, cnt, eof); out_data_append_to_screen(ptr, &cnt, eof);
} }
if (cnt) { if (cnt) {
rbuffer_consumed(buf, cnt); rbuffer_consumed(buf, cnt);
} }
// Move remaining data to start of buffer, so the buffer can never
// wrap around.
rbuffer_reset(buf);
} }
/// Parses a command string into a sequence of words, taking quotes into /// Parses a command string into a sequence of words, taking quotes into

View File

@@ -4,6 +4,13 @@
#include <stdio.h> #include <stdio.h>
#include <string.h> #include <string.h>
#include <stdint.h> #include <stdint.h>
#include <unistd.h>
static void wait(void)
{
fflush(stdout);
usleep(10*1000);
}
static void help(void) static void help(void)
{ {
@@ -61,6 +68,22 @@ int main(int argc, char **argv)
for (uint8_t i = 0; i < number; i++) { for (uint8_t i = 0; i < number; i++) {
printf("%d: %s\n", (int) i, argv[3]); printf("%d: %s\n", (int) i, argv[3]);
} }
} else if (strcmp(argv[1], "UTF-8") == 0) {
// test split-up UTF-8 sequence
printf("\xc3"); wait();
printf("\xa5\n"); wait();
// split up a 2+2 grapheme clusters all possible ways
printf("ref: \xc3\xa5\xcc\xb2\n"); wait();
printf("1: \xc3"); wait();
printf("\xa5\xcc\xb2\n"); wait();
printf("2: \xc3\xa5"); wait();
printf("\xcc\xb2\n"); wait();
printf("3: \xc3\xa5\xcc"); wait();
printf("\xb2\n"); wait();
} else { } else {
fprintf(stderr, "Unknown first argument\n"); fprintf(stderr, "Unknown first argument\n");
return 3; return 3;

View File

@@ -9,6 +9,7 @@ local feed_command = helpers.feed_command
local iswin = helpers.iswin local iswin = helpers.iswin
local clear = helpers.clear local clear = helpers.clear
local command = helpers.command local command = helpers.command
local nvim_dir = helpers.nvim_dir
describe("shell command :!", function() describe("shell command :!", function()
if helpers.pending_win32(pending) then return end if helpers.pending_win32(pending) then return end
@@ -195,5 +196,29 @@ describe("shell command :!", function()
eq(true, screen.bell) eq(true, screen.bell)
end) end)
end) end)
it('handles multibyte sequences split over buffer boundaries', function()
command('cd '..nvim_dir)
local cmd
if iswin() then
cmd = '!shell-test UTF-8 '
else
cmd = '!./shell-test UTF-8'
end
feed_command(cmd)
-- Note: only the first example of split composed char works
screen:expect([[
{1:~ }|
{1:~ }|
:]]..cmd..[[ |
å |
ref: å̲ |
1: å̲ |
2: å ̲ |
3: å ̲ |
|
{3:Press ENTER or type command to continue}^ |
]])
end)
end) end)
end) end)