input: paste encoding replaces unsafe control characters with spaces (#10746)

For a hardcoded set of control characters, replace them with spaces when
encoding pasted text. This is to prevent unsafe control characters from
being pasted which could trick a user into executing commands
unexpectedly.

This happens regardless of bracketed paste mode, because certain
characters processed by the kernel pty line discipline can break
bracketed paste (source from zsh:

https://zsh-workers.zsh.narkive.com/Kd3evJ7t/bracketed-paste-mode-in-xterm-and-urxvt).

This behavior is based on xterm's behavior
(caac5c35a2/button.c (L2578-L2595)),
including the list of characters. Note that as a comment in the code
says, we should be sourcing some of these from a tcgetattr call instead
of hardcoding them, but this is a good start.
This commit is contained in:
Mitchell Hashimoto
2026-02-16 10:16:27 -08:00
committed by GitHub
2 changed files with 85 additions and 0 deletions

View File

@@ -39,10 +39,57 @@ pub fn encode(
[]const u8 => Error![3][]const u8,
else => unreachable,
} {
// These are the set of byte values that are always replaced by
// a space (per xterm's behavior) for any text insertion method e.g.
// a paste, drag and drop, etc. These are copied directly from xterm's
// source.
const strip: []const u8 = &.{
0x00, // NUL
0x08, // BS
0x05, // ENQ
0x04, // EOT
0x1B, // ESC
0x7F, // DEL
// These can be overridden by the running terminal program
// via tcsetattr, so they aren't totally safe to hardcode like
// this. In practice, I haven't seen modern programs change these
// and its a much bigger architectural change to pass these through
// so for now they're hardcoded.
0x03, // VINTR (Ctrl+C)
0x1C, // VQUIT (Ctrl+\)
0x15, // VKILL (Ctrl+U)
0x1A, // VSUSP (Ctrl+Z)
0x11, // VSTART (Ctrl+Q)
0x13, // VSTOP (Ctrl+S)
0x17, // VWERASE (Ctrl+W)
0x16, // VLNEXT (Ctrl+V)
0x12, // VREPRINT (Ctrl+R)
0x0F, // VDISCARD (Ctrl+O)
};
const mutable = @TypeOf(data) == []u8;
var result: [3][]const u8 = .{ "", data, "" };
// If we have any of the strip values, then we need to replace them
// with spaces. This is what xterm does and it does it regardless
// of bracketed paste mode. This is a security measure to prevent pastes
// from containing bytes that could be used to inject commands.
if (std.mem.indexOfAny(u8, data, strip) != null) {
if (comptime !mutable) return Error.MutableRequired;
var offset: usize = 0;
while (std.mem.indexOfAny(
u8,
data[offset..],
strip,
)) |idx| {
offset += idx;
data[offset] = ' ';
offset += 1;
}
}
// Bracketed paste mode (mode 2004) wraps pasted data in
// fenceposts so that the terminal can ignore things like newlines.
if (opts.bracketed) {
@@ -143,3 +190,39 @@ test "encode unbracketed windows-stye newline" {
try testing.expectEqualStrings("hello\r\rworld", result[1]);
try testing.expectEqualStrings("", result[2]);
}
test "encode strip unsafe bytes const" {
const testing = std.testing;
try testing.expectError(Error.MutableRequired, encode(
@as([]const u8, "hello\x00world"),
.{ .bracketed = true },
));
}
test "encode strip unsafe bytes mutable bracketed" {
const testing = std.testing;
const data: []u8 = try testing.allocator.dupe(u8, "hel\x1blo\x00world");
defer testing.allocator.free(data);
const result = encode(data, .{ .bracketed = true });
try testing.expectEqualStrings("\x1b[200~", result[0]);
try testing.expectEqualStrings("hel lo world", result[1]);
try testing.expectEqualStrings("\x1b[201~", result[2]);
}
test "encode strip unsafe bytes mutable unbracketed" {
const testing = std.testing;
const data: []u8 = try testing.allocator.dupe(u8, "hel\x03lo");
defer testing.allocator.free(data);
const result = encode(data, .{ .bracketed = false });
try testing.expectEqualStrings("", result[0]);
try testing.expectEqualStrings("hel lo", result[1]);
try testing.expectEqualStrings("", result[2]);
}
test "encode strip multiple unsafe bytes" {
const testing = std.testing;
const data: []u8 = try testing.allocator.dupe(u8, "\x00\x08\x7f");
defer testing.allocator.free(data);
const result = encode(data, .{ .bracketed = true });
try testing.expectEqualStrings(" ", result[1]);
}

View File

@@ -40,6 +40,8 @@ extend-ignore-re = [
"kHOM\\d*",
# Ignore "typos" in sprite font draw fn names
"draw[0-9A-F]+(_[0-9A-F]+)?\\(",
# Ignore test data in src/input/paste.zig
"\"hel\\\\x",
]
[default.extend-words]