From d4dcecb071a113f5a27ab2948c721384210114fc Mon Sep 17 00:00:00 2001 From: Mitchell Hashimoto Date: Sat, 4 Oct 2025 13:24:34 -0700 Subject: [PATCH] Move paste encoding to the input package, test, optimize away one alloc This moves our paste logic to `src/input` in preparation for exposing this as part of libghostty-vt. This yields an immediate benefit of unit tests for paste encoding. Additionally, we were able to remove one allocation on every unbracketed paste path unless the input specifically contains a newline. Unlikely to be noticable, but nice. NOTE: This also includes one change in behavior: we no longer encode `\r\n` and a single `\r`, but as a duplicate `\r\r`. This matches xterm behavior and I don't think will result in any issues since duplicate carriage returns should do nothing in well-behaved terminals. --- src/Surface.zig | 76 +++++++------------- src/input.zig | 1 + src/input/paste.zig | 146 ++++++++++++++++++++++++++++++++++++++ src/lib_vt.zig | 13 ++++ src/terminal/main.zig | 3 - src/terminal/modes.zig | 2 +- src/terminal/sanitize.zig | 14 ---- 7 files changed, 186 insertions(+), 69 deletions(-) create mode 100644 src/input/paste.zig delete mode 100644 src/terminal/sanitize.zig diff --git a/src/Surface.zig b/src/Surface.zig index 018c4206b..5aabb2b80 100644 --- a/src/Surface.zig +++ b/src/Surface.zig @@ -5270,13 +5270,10 @@ fn completeClipboardPaste( ) !void { if (data.len == 0) return; - const critical: struct { - bracketed: bool, - } = critical: { + const encode_opts: input.paste.Options = encode_opts: { self.renderer_state.mutex.lock(); defer self.renderer_state.mutex.unlock(); - - const bracketed = self.io.terminal.modes.get(.bracketed_paste); + const opts: input.paste.Options = .fromTerminal(&self.io.terminal); // If we have paste protection enabled, we detect unsafe pastes and return // an error. The error approach allows apprt to attempt to complete the paste @@ -5292,7 +5289,7 @@ fn completeClipboardPaste( // This is set during confirmation usually. if (allow_unsafe) break :unsafe false; - if (bracketed) { + if (opts.bracketed) { // If we're bracketed and the paste contains and ending // bracket then something naughty might be going on and we // never trust it. @@ -5303,7 +5300,7 @@ fn completeClipboardPaste( if (self.config.clipboard_paste_bracketed_safe) break :unsafe false; } - break :unsafe !terminal.isSafePaste(data); + break :unsafe !input.paste.isSafe(data); }; if (unsafe) { @@ -5317,55 +5314,32 @@ fn completeClipboardPaste( log.warn("error scrolling to bottom err={}", .{err}); }; - break :critical .{ - .bracketed = bracketed, - }; + break :encode_opts opts; }; - if (critical.bracketed) { - // If we're bracketd we write the data as-is to the terminal with - // the bracketed paste escape codes around it. - self.io.queueMessage(.{ - .write_stable = "\x1B[200~", - }, .unlocked); + // Encode the data. In most cases this doesn't require any + // copies, so we optimize for that case. + var data_duped: ?[]u8 = null; + const vecs = input.paste.encode(data, encode_opts) catch |err| switch (err) { + error.MutableRequired => vecs: { + const buf: []u8 = try self.alloc.dupe(u8, data); + errdefer self.alloc.free(buf); + data_duped = buf; + break :vecs input.paste.encode(buf, encode_opts); + }, + }; + defer if (data_duped) |v| { + // This code path means the data did require a copy and mutation. + // We must free it. + self.alloc.free(v); + }; + + for (vecs) |vec| if (vec.len > 0) { self.io.queueMessage(try termio.Message.writeReq( self.alloc, - data, + vec, ), .unlocked); - self.io.queueMessage(.{ - .write_stable = "\x1B[201~", - }, .unlocked); - } else { - // If its not bracketed the input bytes are indistinguishable from - // keystrokes, so we must be careful. For example, we must replace - // any newlines with '\r'. - - // We just do a heap allocation here because its easy and I don't think - // worth the optimization of using small messages. - var buf = try self.alloc.alloc(u8, data.len); - defer self.alloc.free(buf); - - // This is super, super suboptimal. We can easily make use of SIMD - // here, but maybe LLVM in release mode is smart enough to figure - // out something clever. Either way, large non-bracketed pastes are - // increasingly rare for modern applications. - var len: usize = 0; - for (data, 0..) |ch, i| { - const dch = switch (ch) { - '\n' => '\r', - '\r' => if (i + 1 < data.len and data[i + 1] == '\n') continue else ch, - else => ch, - }; - - buf[len] = dch; - len += 1; - } - - self.io.queueMessage(try termio.Message.writeReq( - self.alloc, - buf[0..len], - ), .unlocked); - } + }; } fn completeClipboardReadOSC52( diff --git a/src/input.zig b/src/input.zig index caaf80509..06d7dc96a 100644 --- a/src/input.zig +++ b/src/input.zig @@ -9,6 +9,7 @@ pub const command = @import("input/command.zig"); pub const function_keys = @import("input/function_keys.zig"); pub const keycodes = @import("input/keycodes.zig"); pub const kitty = @import("input/kitty.zig"); +pub const paste = @import("input/paste.zig"); pub const ctrlOrSuper = key.ctrlOrSuper; pub const Action = key.Action; diff --git a/src/input/paste.zig b/src/input/paste.zig new file mode 100644 index 000000000..29787c385 --- /dev/null +++ b/src/input/paste.zig @@ -0,0 +1,146 @@ +const std = @import("std"); +const assert = std.debug.assert; +const Terminal = @import("../terminal/Terminal.zig"); + +pub const Options = struct { + /// True if bracketed paste mode is on. + bracketed: bool, + + /// Return the encoding options based on the current terminal state. + pub fn fromTerminal(t: *const Terminal) Options { + return .{ + .bracketed = t.modes.get(.bracketed_paste), + }; + } +}; + +/// Encode the given data for pasting. The resulting value can be written +/// to the pty to perform a paste of the input data. +/// +/// The data can be either a `[]u8` or a `[]const u8`. If the data +/// type is const then `EncodeError` may be returned. If the data type +/// is mutable then this function can't return an error. +/// +/// This slightly complex calling style allows for initially const +/// data to be passed in without an allocation, since it is rare in normal +/// use cases that the data will need to be modified. In the unlikely case +/// data does need to be modified, the caller can make a mutable copy +/// after seeing the error. +/// +/// The data is returned as a set of slices to limit allocations. The caller +/// can combine the slices into a single buffer if desired. +/// +/// WARNING: The input data is not checked for safety. See the `isSafe` +/// function to check if the data is safe to paste. +pub fn encode( + data: anytype, + opts: Options, +) switch (@TypeOf(data)) { + []u8 => [3][]const u8, + []const u8 => Error![3][]const u8, + else => unreachable, +} { + const mutable = @TypeOf(data) == []u8; + + var result: [3][]const u8 = .{ "", data, "" }; + + // Bracketed paste mode (mode 2004) wraps pasted data in + // fenceposts so that the terminal can ignore things like newlines. + if (opts.bracketed) { + result[0] = "\x1b[200~"; + result[2] = "\x1b[201~"; + return result; + } + + // Non-bracketed. We have to replace newline with `\r`. This matches + // the behavior of xterm and other terminals. For `\r\n` this will + // result in `\r\r` which does match xterm. + if (comptime mutable) { + std.mem.replaceScalar(u8, data, '\n', '\r'); + } else if (std.mem.indexOfScalar(u8, data, '\n') != null) { + return Error.MutableRequired; + } + + return result; +} + +pub const Error = error{ + /// Returned if encoding requires a mutable copy of the data. This + /// can only be returned if the input data type is const. + MutableRequired, +}; + +/// Returns true if the data looks safe to paste. Data is considered +/// unsafe if it contains any of the following: +/// +/// - `\n`: Newlines can be used to inject commands. +/// - `\x1b[201~`: This is the end of a bracketed paste. This cane be used +/// to exit a bracketed paste and inject commands. +/// +/// We consider any scenario unsafe regardless of current terminal state. +/// For example, even if bracketed paste mode is not active, we still +/// consider `\x1b[201~` unsafe. The existence of these types of bytes +/// should raise suspicion that the producer of the paste data is +/// acting strangely. +pub fn isSafe(data: []const u8) bool { + return std.mem.indexOf(u8, data, "\n") == null and + std.mem.indexOf(u8, data, "\x1b[201~") == null; +} + +test isSafe { + const testing = std.testing; + try testing.expect(isSafe("hello")); + try testing.expect(!isSafe("hello\n")); + try testing.expect(!isSafe("hello\nworld")); + try testing.expect(!isSafe("he\x1b[201~llo")); +} + +test "encode bracketed" { + const testing = std.testing; + const result = try encode( + @as([]const u8, "hello"), + .{ .bracketed = true }, + ); + try testing.expectEqualStrings("\x1b[200~", result[0]); + try testing.expectEqualStrings("hello", result[1]); + try testing.expectEqualStrings("\x1b[201~", result[2]); +} + +test "encode unbracketed no newlines" { + const testing = std.testing; + const result = try encode( + @as([]const u8, "hello"), + .{ .bracketed = false }, + ); + try testing.expectEqualStrings("", result[0]); + try testing.expectEqualStrings("hello", result[1]); + try testing.expectEqualStrings("", result[2]); +} + +test "encode unbracketed newlines const" { + const testing = std.testing; + try testing.expectError(Error.MutableRequired, encode( + @as([]const u8, "hello\nworld"), + .{ .bracketed = false }, + )); +} + +test "encode unbracketed newlines" { + const testing = std.testing; + const data: []u8 = try testing.allocator.dupe(u8, "hello\nworld"); + defer testing.allocator.free(data); + const result = encode(data, .{ .bracketed = false }); + try testing.expectEqualStrings("", result[0]); + try testing.expectEqualStrings("hello\rworld", result[1]); + try testing.expectEqualStrings("", result[2]); +} + +test "encode unbracketed windows-stye newline" { + const testing = std.testing; + const data: []u8 = try testing.allocator.dupe(u8, "hello\r\nworld"); + defer testing.allocator.free(data); + const result = encode(data, .{ .bracketed = false }); + try testing.expectEqualStrings("", result[0]); + try testing.expectEqualStrings("hello\r\rworld", result[1]); + try testing.expectEqualStrings("", result[2]); +} diff --git a/src/lib_vt.zig b/src/lib_vt.zig index 8c49b4900..4b064dc0d 100644 --- a/src/lib_vt.zig +++ b/src/lib_vt.zig @@ -66,6 +66,18 @@ pub const EraseLine = terminal.EraseLine; pub const TabClear = terminal.TabClear; pub const Attribute = terminal.Attribute; +/// Terminal-specific input encoding is also part of libghostty-vt. +pub const input = struct { + // We have to be careful to only import targeted files within + // the input package because the full package brings in too many + // other dependencies. + const paste = @import("input/paste.zig"); + pub const PasteError = paste.Error; + pub const PasteOptions = paste.Options; + pub const isSafePaste = paste.isSafe; + pub const encodePaste = paste.encode; +}; + comptime { // If we're building the C library (vs. the Zig module) then // we want to reference the C API so that it gets exported. @@ -84,6 +96,7 @@ comptime { test { _ = terminal; _ = @import("lib/main.zig"); + @import("std").testing.refAllDecls(input); if (comptime terminal.options.c_abi) { _ = terminal.c_api; } diff --git a/src/terminal/main.zig b/src/terminal/main.zig index 6875ba89d..4498a5def 100644 --- a/src/terminal/main.zig +++ b/src/terminal/main.zig @@ -1,7 +1,6 @@ const builtin = @import("builtin"); const charsets = @import("charsets.zig"); -const sanitize = @import("sanitize.zig"); const stream = @import("stream.zig"); const ansi = @import("ansi.zig"); const csi = @import("csi.zig"); @@ -59,8 +58,6 @@ pub const EraseLine = csi.EraseLine; pub const TabClear = csi.TabClear; pub const Attribute = sgr.Attribute; -pub const isSafePaste = sanitize.isSafePaste; - pub const Options = @import("build_options.zig").Options; pub const options = @import("terminal_options"); diff --git a/src/terminal/modes.zig b/src/terminal/modes.zig index 9a74db73c..13b7c1eac 100644 --- a/src/terminal/modes.zig +++ b/src/terminal/modes.zig @@ -43,7 +43,7 @@ pub const ModeState = struct { } /// Get the value of a mode. - pub fn get(self: *ModeState, mode: Mode) bool { + pub fn get(self: *const ModeState, mode: Mode) bool { switch (mode) { inline else => |mode_comptime| { const entry = comptime entryForMode(mode_comptime); diff --git a/src/terminal/sanitize.zig b/src/terminal/sanitize.zig deleted file mode 100644 index f96e8a00e..000000000 --- a/src/terminal/sanitize.zig +++ /dev/null @@ -1,14 +0,0 @@ -const std = @import("std"); - -/// Returns true if the data looks safe to paste. -pub fn isSafePaste(data: []const u8) bool { - return std.mem.indexOf(u8, data, "\n") == null and - std.mem.indexOf(u8, data, "\x1b[201~") == null; -} - -test isSafePaste { - const testing = std.testing; - try testing.expect(isSafePaste("hello")); - try testing.expect(!isSafePaste("hello\n")); - try testing.expect(!isSafePaste("hello\nworld")); -}