shell: handle split-up UTF-8 sequences

This commit is contained in:
Björn Linse
2018-02-08 15:11:56 +01:00
parent 01cdeff626
commit f75c4b39ec
3 changed files with 69 additions and 7 deletions

View File

@@ -422,7 +422,7 @@ static void out_data_ring(char *output, size_t size)
}
if (output == NULL && size == SIZE_MAX) { // Print mode
out_data_append_to_screen(last_skipped, last_skipped_len, true);
out_data_append_to_screen(last_skipped, &last_skipped_len, true);
return;
}
@@ -450,30 +450,40 @@ static void out_data_ring(char *output, size_t size)
/// @param output Data to append to screen lines.
/// @param remaining Size of data.
/// @param new_line If true, next data output will be on a new line.
static void out_data_append_to_screen(char *output, size_t remaining,
bool new_line)
static void out_data_append_to_screen(char *output, size_t *count,
bool eof)
{
char *p = output, *end = output + remaining;
char *p = output, *end = output + *count;
while (p < end) {
if (*p == '\n' || *p == '\r' || *p == TAB || *p == BELL) {
msg_putchar_attr((uint8_t)(*p), 0);
p++;
} else {
// Note: this is not 100% precise:
// 1. we don't check if received continuation bytes are already invalid
// and we thus do some buffering that could be avoided
// 2. we don't compose chars over buffer boundaries, even if we see an
// incomplete UTF-8 sequence that could be composing with the last
// complete sequence.
// This will be corrected when we switch to vterm based implementation
int i = *p ? mb_ptr2len_len((char_u *)p, (int)(end-p)) : 1;
if (!eof && i == 1 && utf8len_tab_zero[*(uint8_t *)p] > (end-p)) {
*count = (size_t)(p - output);
goto end;
}
(void)msg_outtrans_len_attr((char_u *)p, i, 0);
p += i;
}
}
end:
ui_flush();
}
static void out_data_cb(Stream *stream, RBuffer *buf, size_t count, void *data,
bool eof)
{
// We always output the whole buffer, so the buffer can never
// wrap around.
size_t cnt;
char *ptr = rbuffer_read_ptr(buf, &cnt);
@@ -482,12 +492,16 @@ static void out_data_cb(Stream *stream, RBuffer *buf, size_t count, void *data,
// Save the skipped output. If it is the final chunk, we display it later.
out_data_ring(ptr, cnt);
} else {
out_data_append_to_screen(ptr, cnt, eof);
out_data_append_to_screen(ptr, &cnt, eof);
}
if (cnt) {
rbuffer_consumed(buf, cnt);
}
// Move remaining data to start of buffer, so the buffer can never
// wrap around.
rbuffer_reset(buf);
}
/// Parses a command string into a sequence of words, taking quotes into

View File

@@ -4,6 +4,13 @@
#include <stdio.h>
#include <string.h>
#include <stdint.h>
#include <unistd.h>
static void wait(void)
{
fflush(stdout);
usleep(10*1000);
}
static void help(void)
{
@@ -61,6 +68,22 @@ int main(int argc, char **argv)
for (uint8_t i = 0; i < number; i++) {
printf("%d: %s\n", (int) i, argv[3]);
}
} else if (strcmp(argv[1], "UTF-8") == 0) {
// test split-up UTF-8 sequence
printf("\xc3"); wait();
printf("\xa5\n"); wait();
// split up a 2+2 grapheme clusters all possible ways
printf("ref: \xc3\xa5\xcc\xb2\n"); wait();
printf("1: \xc3"); wait();
printf("\xa5\xcc\xb2\n"); wait();
printf("2: \xc3\xa5"); wait();
printf("\xcc\xb2\n"); wait();
printf("3: \xc3\xa5\xcc"); wait();
printf("\xb2\n"); wait();
} else {
fprintf(stderr, "Unknown first argument\n");
return 3;

View File

@@ -9,6 +9,7 @@ local feed_command = helpers.feed_command
local iswin = helpers.iswin
local clear = helpers.clear
local command = helpers.command
local nvim_dir = helpers.nvim_dir
describe("shell command :!", function()
if helpers.pending_win32(pending) then return end
@@ -195,5 +196,29 @@ describe("shell command :!", function()
eq(true, screen.bell)
end)
end)
it('handles multibyte sequences split over buffer boundaries', function()
command('cd '..nvim_dir)
local cmd
if iswin() then
cmd = '!shell-test UTF-8 '
else
cmd = '!./shell-test UTF-8'
end
feed_command(cmd)
-- Note: only the first example of split composed char works
screen:expect([[
{1:~ }|
{1:~ }|
:]]..cmd..[[ |
å |
ref: å̲ |
1: å̲ |
2: å ̲ |
3: å ̲ |
|
{3:Press ENTER or type command to continue}^ |
]])
end)
end)
end)