Files
neovim/src/nvim/vterm/parser.c
Gabriel Ford b28bbee539 fix(terminal): skip setting string_initial to false on no-op (#34176)
Problem:

Currently undefined behavior can occur when `string_fragment()` is
called with `OSC_COMMAND`. This is because when the state changes to
`OSC_COMMAND`, `string_initial` is set to true. Then in some cases,
directly after this `string_initial` will be set back to false before
the on_osc callback is called, this leads to `term_settermprop()` never
initializing the title.

Solution:

In all of the no-op cases in `string_fragment()` currently, we continue
to the end of the function where `vt->parser.string_initial` is set to
false. This change returns in the no-op cases instead since in these
cases the string has not yet been terminated and sent to the callback.

Note:

This change also adds a test with a byte sequence from the file
in #34028 that caused nvim to crash. This byte sequences is the shortest
sequence I could trim down from that file that still would trigger the
crash. There are also two other tests I added which validate that
setting the title with OSC-0 and OSC-2 still works.

Fixes: #34028
2025-05-29 13:29:16 -05:00

412 lines
11 KiB
C

#include <assert.h>
#include <stdio.h>
#include <string.h>
#include "nvim/vterm/parser.h"
#include "nvim/vterm/vterm.h"
#include "nvim/vterm/vterm_internal_defs.h"
#ifdef INCLUDE_GENERATED_DECLARATIONS
# include "vterm/parser.c.generated.h"
#endif
#undef DEBUG_PARSER
static bool is_intermed(uint8_t c)
{
return c >= 0x20 && c <= 0x2f;
}
static void do_control(VTerm *vt, uint8_t control)
{
if (vt->parser.callbacks && vt->parser.callbacks->control) {
if ((*vt->parser.callbacks->control)(control, vt->parser.cbdata)) {
return;
}
}
DEBUG_LOG("libvterm: Unhandled control 0x%02x\n", control);
}
static void do_csi(VTerm *vt, char command)
{
#ifdef DEBUG_PARSER
printf("Parsed CSI args as:\n", arglen, args);
printf(" leader: %s\n", vt->parser.v.csi.leader);
for (int argi = 0; argi < vt->parser.v.csi.argi; argi++) {
printf(" %lu", CSI_ARG(vt->parser.v.csi.args[argi]));
if (!CSI_ARG_HAS_MORE(vt->parser.v.csi.args[argi])) {
printf("\n");
}
printf(" intermed: %s\n", vt->parser.intermed);
}
#endif
if (vt->parser.callbacks && vt->parser.callbacks->csi) {
if ((*vt->parser.callbacks->csi)(vt->parser.v.csi.leaderlen ? vt->parser.v.csi.leader : NULL,
vt->parser.v.csi.args,
vt->parser.v.csi.argi,
vt->parser.intermedlen ? vt->parser.intermed : NULL,
command,
vt->parser.cbdata)) {
return;
}
}
DEBUG_LOG("libvterm: Unhandled CSI %c\n", command);
}
static void do_escape(VTerm *vt, char command)
{
char seq[INTERMED_MAX + 1];
size_t len = (size_t)vt->parser.intermedlen;
strncpy(seq, vt->parser.intermed, len); // NOLINT(runtime/printf)
seq[len++] = command;
seq[len] = 0;
if (vt->parser.callbacks && vt->parser.callbacks->escape) {
if ((*vt->parser.callbacks->escape)(seq, len, vt->parser.cbdata)) {
return;
}
}
DEBUG_LOG("libvterm: Unhandled escape ESC 0x%02x\n", command);
}
static void string_fragment(VTerm *vt, const char *str, size_t len, bool final)
{
VTermStringFragment frag = {
.str = str,
.len = len,
.initial = vt->parser.string_initial,
.final = final,
};
switch (vt->parser.state) {
case OSC:
if (vt->parser.callbacks && vt->parser.callbacks->osc) {
(*vt->parser.callbacks->osc)(vt->parser.v.osc.command, frag, vt->parser.cbdata);
}
break;
case DCS_VTERM:
if (vt->parser.callbacks && vt->parser.callbacks->dcs) {
(*vt->parser.callbacks->dcs)(vt->parser.v.dcs.command, (size_t)vt->parser.v.dcs.commandlen,
frag,
vt->parser.cbdata);
}
break;
case APC:
if (vt->parser.callbacks && vt->parser.callbacks->apc) {
(*vt->parser.callbacks->apc)(frag, vt->parser.cbdata);
}
break;
case PM:
if (vt->parser.callbacks && vt->parser.callbacks->pm) {
(*vt->parser.callbacks->pm)(frag, vt->parser.cbdata);
}
break;
case SOS:
if (vt->parser.callbacks && vt->parser.callbacks->sos) {
(*vt->parser.callbacks->sos)(frag, vt->parser.cbdata);
}
break;
case NORMAL:
case CSI_LEADER:
case CSI_ARGS:
case CSI_INTERMED:
case OSC_COMMAND:
case DCS_COMMAND:
return;
}
vt->parser.string_initial = false;
}
size_t vterm_input_write(VTerm *vt, const char *bytes, size_t len)
{
size_t pos = 0;
const char *string_start;
switch (vt->parser.state) {
case NORMAL:
case CSI_LEADER:
case CSI_ARGS:
case CSI_INTERMED:
case OSC_COMMAND:
case DCS_COMMAND:
string_start = NULL;
break;
case OSC:
case DCS_VTERM:
case APC:
case PM:
case SOS:
string_start = bytes;
break;
}
#define ENTER_STATE(st) do { vt->parser.state = st; string_start = NULL; } while (0)
#define ENTER_NORMAL_STATE() ENTER_STATE(NORMAL)
#define IS_STRING_STATE() (vt->parser.state >= OSC_COMMAND)
for (; pos < len; pos++) {
uint8_t c = (uint8_t)bytes[pos];
bool c1_allowed = !vt->mode.utf8;
if (c == 0x00 || c == 0x7f) { // NUL, DEL
if (IS_STRING_STATE()) {
string_fragment(vt, string_start, (size_t)(bytes + pos - string_start), false);
string_start = bytes + pos + 1;
}
if (vt->parser.emit_nul) {
do_control(vt, c);
}
continue;
}
if (c == 0x18 || c == 0x1a) { // CAN, SUB
vt->parser.in_esc = false;
ENTER_NORMAL_STATE();
if (vt->parser.emit_nul) {
do_control(vt, c);
}
continue;
} else if (c == 0x1b) { // ESC
vt->parser.intermedlen = 0;
if (!IS_STRING_STATE()) {
vt->parser.state = NORMAL;
}
vt->parser.in_esc = true;
continue;
} else if (c == 0x07 // BEL, can stand for ST in OSC or DCS state
&& IS_STRING_STATE()) {} else if (c < 0x20) { // other C0
if (vt->parser.state == SOS) {
continue; // All other C0s permitted in SOS
}
if (IS_STRING_STATE()) {
string_fragment(vt, string_start, (size_t)(bytes + pos - string_start), false);
}
do_control(vt, c);
if (IS_STRING_STATE()) {
string_start = bytes + pos + 1;
}
continue;
}
size_t string_len = (size_t)(bytes + pos - string_start);
if (vt->parser.in_esc) {
// Hoist an ESC letter into a C1 if we're not in a string mode
// Always accept ESC \ == ST even in string mode
if (!vt->parser.intermedlen
&& c >= 0x40 && c < 0x60
&& ((!IS_STRING_STATE() || c == 0x5c))) {
c += 0x40;
c1_allowed = true;
if (string_len) {
assert(string_len > 0);
string_len -= 1;
}
vt->parser.in_esc = false;
} else {
string_start = NULL;
vt->parser.state = NORMAL;
}
}
switch (vt->parser.state) {
case CSI_LEADER:
// Extract leader bytes 0x3c to 0x3f
if (c >= 0x3c && c <= 0x3f) {
if (vt->parser.v.csi.leaderlen < CSI_LEADER_MAX - 1) {
vt->parser.v.csi.leader[vt->parser.v.csi.leaderlen++] = (char)c;
}
break;
}
vt->parser.v.csi.leader[vt->parser.v.csi.leaderlen] = 0;
vt->parser.v.csi.argi = 0;
vt->parser.v.csi.args[0] = CSI_ARG_MISSING;
vt->parser.state = CSI_ARGS;
FALLTHROUGH;
case CSI_ARGS:
// Numerical value of argument
if (c >= '0' && c <= '9') {
if (vt->parser.v.csi.args[vt->parser.v.csi.argi] == CSI_ARG_MISSING) {
vt->parser.v.csi.args[vt->parser.v.csi.argi] = 0;
}
vt->parser.v.csi.args[vt->parser.v.csi.argi] *= 10;
vt->parser.v.csi.args[vt->parser.v.csi.argi] += c - '0';
break;
}
if (c == ':') {
vt->parser.v.csi.args[vt->parser.v.csi.argi] |= CSI_ARG_FLAG_MORE;
c = ';';
}
if (c == ';') {
vt->parser.v.csi.argi++;
vt->parser.v.csi.args[vt->parser.v.csi.argi] = CSI_ARG_MISSING;
break;
}
vt->parser.v.csi.argi++;
vt->parser.intermedlen = 0;
vt->parser.state = CSI_INTERMED;
FALLTHROUGH;
case CSI_INTERMED:
if (is_intermed(c)) {
if (vt->parser.intermedlen < INTERMED_MAX - 1) {
vt->parser.intermed[vt->parser.intermedlen++] = (char)c;
}
break;
} else if (c == 0x1b) {
// ESC in CSI cancels
} else if (c >= 0x40 && c <= 0x7e) {
vt->parser.intermed[vt->parser.intermedlen] = 0;
do_csi(vt, (char)c);
}
// else was invalid CSI
ENTER_NORMAL_STATE();
break;
case OSC_COMMAND:
// Numerical value of command
if (c >= '0' && c <= '9') {
if (vt->parser.v.osc.command == -1) {
vt->parser.v.osc.command = 0;
} else {
vt->parser.v.osc.command *= 10;
}
vt->parser.v.osc.command += c - '0';
break;
}
if (c == ';') {
vt->parser.state = OSC;
string_start = bytes + pos + 1;
break;
}
string_start = bytes + pos;
string_len = 0;
vt->parser.state = OSC;
goto string_state;
case DCS_COMMAND:
if (vt->parser.v.dcs.commandlen < CSI_LEADER_MAX) {
vt->parser.v.dcs.command[vt->parser.v.dcs.commandlen++] = (char)c;
}
if (c >= 0x40 && c <= 0x7e) {
string_start = bytes + pos + 1;
vt->parser.state = DCS_VTERM;
}
break;
string_state:
case OSC:
case DCS_VTERM:
case APC:
case PM:
case SOS:
if (c == 0x07 || (c1_allowed && c == 0x9c)) {
string_fragment(vt, string_start, string_len, true);
ENTER_NORMAL_STATE();
}
break;
case NORMAL:
if (vt->parser.in_esc) {
if (is_intermed(c)) {
if (vt->parser.intermedlen < INTERMED_MAX - 1) {
vt->parser.intermed[vt->parser.intermedlen++] = (char)c;
}
} else if (c >= 0x30 && c < 0x7f) {
do_escape(vt, (char)c);
vt->parser.in_esc = 0;
ENTER_NORMAL_STATE();
} else {
DEBUG_LOG("TODO: Unhandled byte %02x in Escape\n", c);
}
break;
}
if (c1_allowed && c >= 0x80 && c < 0xa0) {
switch (c) {
case 0x90: // DCS
vt->parser.string_initial = true;
vt->parser.v.dcs.commandlen = 0;
ENTER_STATE(DCS_COMMAND);
break;
case 0x98: // SOS
vt->parser.string_initial = true;
ENTER_STATE(SOS);
string_start = bytes + pos + 1;
break;
case 0x9b: // CSI
vt->parser.v.csi.leaderlen = 0;
ENTER_STATE(CSI_LEADER);
break;
case 0x9d: // OSC
vt->parser.v.osc.command = -1;
vt->parser.string_initial = true;
ENTER_STATE(OSC_COMMAND);
break;
case 0x9e: // PM
vt->parser.string_initial = true;
ENTER_STATE(PM);
string_start = bytes + pos + 1;
break;
case 0x9f: // APC
vt->parser.string_initial = true;
ENTER_STATE(APC);
string_start = bytes + pos + 1;
break;
default:
do_control(vt, c);
break;
}
} else {
size_t eaten = 0;
if (vt->parser.callbacks && vt->parser.callbacks->text) {
eaten = (size_t)(*vt->parser.callbacks->text)(bytes + pos, len - pos, vt->parser.cbdata);
}
if (!eaten) {
DEBUG_LOG("libvterm: Text callback did not consume any input\n");
// force it to make progress
eaten = 1;
}
pos += (eaten - 1); // we'll ++ it again in a moment
}
break;
}
}
if (string_start) {
size_t string_len = (size_t)(bytes + pos - string_start);
if (string_len > 0) {
if (vt->parser.in_esc) {
string_len -= 1;
}
string_fragment(vt, string_start, string_len, false);
}
}
return len;
}
void vterm_parser_set_callbacks(VTerm *vt, const VTermParserCallbacks *callbacks, void *user)
{
vt->parser.callbacks = callbacks;
vt->parser.cbdata = user;
}