From a162fa8f55589386d3d81c8bce445fdc72ceb69f Mon Sep 17 00:00:00 2001 From: benodiwal Date: Thu, 6 Nov 2025 13:13:32 +0530 Subject: [PATCH 1/4] feat: add clipboard-codepoint-map configuration parsing --- src/config/ClipboardCodepointMap.zig | 143 ++++++++++++++++++ src/config/Config.zig | 212 +++++++++++++++++++++++++++ 2 files changed, 355 insertions(+) create mode 100644 src/config/ClipboardCodepointMap.zig diff --git a/src/config/ClipboardCodepointMap.zig b/src/config/ClipboardCodepointMap.zig new file mode 100644 index 000000000..dd6a172c6 --- /dev/null +++ b/src/config/ClipboardCodepointMap.zig @@ -0,0 +1,143 @@ +/// ClipboardCodepointMap is a map of codepoints to replacement values +/// for clipboard operations. When copying text to clipboard, matching +/// codepoints will be replaced with their mapped values. +const ClipboardCodepointMap = @This(); + +const std = @import("std"); +const assert = std.debug.assert; +const Allocator = std.mem.Allocator; + +pub const Replacement = union(enum) { + /// Replace with a single codepoint + codepoint: u21, + /// Replace with a UTF-8 string + string: []const u8, +}; + +pub const Entry = struct { + /// Unicode codepoint range. Asserts range[0] <= range[1]. + range: [2]u21, + + /// The replacement value for this range. + replacement: Replacement, +}; + +/// The list of entries. We use a multiarraylist for cache-friendly lookups. +/// +/// Note: we do a linear search because we expect to always have very +/// few entries, so the overhead of a binary search is not worth it. +list: std.MultiArrayList(Entry) = .{}, + +pub fn deinit(self: *ClipboardCodepointMap, alloc: Allocator) void { + self.list.deinit(alloc); +} + +/// Deep copy of the struct. The given allocator is expected to +/// be an arena allocator of some sort since the struct itself +/// doesn't support fine-grained deallocation of fields. +pub fn clone(self: *const ClipboardCodepointMap, alloc: Allocator) !ClipboardCodepointMap { + var list = try self.list.clone(alloc); + for (list.items(.replacement)) |*r| { + switch (r.*) { + .string => |s| r.string = try alloc.dupe(u8, s), + .codepoint => {}, // no allocation needed + } + } + + return .{ .list = list }; +} + +/// Add an entry to the map. +/// +/// For conflicting codepoints, entries added later take priority over +/// entries added earlier. +pub fn add(self: *ClipboardCodepointMap, alloc: Allocator, entry: Entry) !void { + assert(entry.range[0] <= entry.range[1]); + try self.list.append(alloc, entry); +} + +/// Get a replacement for a codepoint. +pub fn get(self: *const ClipboardCodepointMap, cp: u21) ?Replacement { + const items = self.list.items(.range); + for (0..items.len) |forward_i| { + const i = items.len - forward_i - 1; + const range = items[i]; + if (range[0] <= cp and cp <= range[1]) { + const replacements = self.list.items(.replacement); + return replacements[i]; + } + } + + return null; +} + +/// Hash with the given hasher. +pub fn hash(self: *const ClipboardCodepointMap, hasher: anytype) void { + const autoHash = std.hash.autoHash; + autoHash(hasher, self.list.len); + const slice = self.list.slice(); + for (0..slice.len) |i| { + const entry = slice.get(i); + autoHash(hasher, entry.range); + switch (entry.replacement) { + .codepoint => |cp| autoHash(hasher, cp), + .string => |s| autoHash(hasher, s), + } + } +} + +/// Returns a hash code that can be used to uniquely identify this +/// action. +pub fn hashcode(self: *const ClipboardCodepointMap) u64 { + var hasher = std.hash.Wyhash.init(0); + self.hash(&hasher); + return hasher.final(); +} + +test "clipboard codepoint map" { + const testing = std.testing; + const alloc = testing.allocator; + + var m: ClipboardCodepointMap = .{}; + defer m.deinit(alloc); + + // Test no matches initially + try testing.expect(m.get(1) == null); + + // Add exact range with codepoint replacement + try m.add(alloc, .{ + .range = .{ 1, 1 }, + .replacement = .{ .codepoint = 65 }, // 'A' + }); + { + const replacement = m.get(1).?; + try testing.expect(replacement == .codepoint); + try testing.expectEqual(@as(u21, 65), replacement.codepoint); + } + + // Later entry takes priority + try m.add(alloc, .{ + .range = .{ 1, 2 }, + .replacement = .{ .string = "B" }, + }); + { + const replacement = m.get(1).?; + try testing.expect(replacement == .string); + try testing.expectEqualStrings("B", replacement.string); + } + + // Non-matching + try testing.expect(m.get(0) == null); + try testing.expect(m.get(3) == null); + + // Test range matching + try m.add(alloc, .{ + .range = .{ 3, 5 }, + .replacement = .{ .string = "range" }, + }); + { + const replacement = m.get(4).?; + try testing.expectEqualStrings("range", replacement.string); + } + try testing.expect(m.get(6) == null); +} \ No newline at end of file diff --git a/src/config/Config.zig b/src/config/Config.zig index 7420075af..6469c333e 100644 --- a/src/config/Config.zig +++ b/src/config/Config.zig @@ -38,6 +38,7 @@ const RepeatableReadableIO = @import("io.zig").RepeatableReadableIO; const RepeatableStringMap = @import("RepeatableStringMap.zig"); pub const Path = @import("path.zig").Path; pub const RepeatablePath = @import("path.zig").RepeatablePath; +const ClipboardCodepointMap = @import("ClipboardCodepointMap.zig"); // We do this instead of importing all of terminal/main.zig to // limit the dependency graph. This is important because some things @@ -279,6 +280,30 @@ pub const compatibility = std.StaticStringMap( /// i.e. new windows, tabs, etc. @"font-codepoint-map": RepeatableCodepointMap = .{}, +/// Map specific Unicode codepoints to replacement values when copying text +/// to clipboard. +/// +/// This configuration allows you to replace specific Unicode characters with +/// other characters or strings when copying terminal content to the clipboard. +/// This is useful for converting special terminal symbols to more compatible +/// characters for pasting into other applications. +/// +/// The syntax is similar to `font-codepoint-map`: +/// - Single codepoint: `U+1234=U+ABCD` or `U+1234=replacement_text` +/// - Codepoint range: `U+1234-U+5678=U+ABCD` +/// +/// Examples: +/// - `clipboard-codepoint-map = U+2500=U+002D` (box drawing horizontal → hyphen) +/// - `clipboard-codepoint-map = U+2502=U+007C` (box drawing vertical → pipe) +/// - `clipboard-codepoint-map = U+03A3=SUM` (Greek sigma → "SUM") +/// +/// This configuration can be repeated multiple times to specify multiple +/// mappings. Later entries take priority over earlier ones for overlapping +/// ranges. +/// +/// Note: This only applies to text copying operations, not URL copying. +@"clipboard-codepoint-map": RepeatableClipboardCodepointMap = .{}, + /// Draw fonts with a thicker stroke, if supported. /// This is currently only supported on macOS. @"font-thicken": bool = false, @@ -6868,6 +6893,193 @@ pub const RepeatableCodepointMap = struct { } }; +/// See "clipboard-codepoint-map" for documentation. +pub const RepeatableClipboardCodepointMap = struct { + const Self = @This(); + + map: ClipboardCodepointMap = .{}, + + pub fn parseCLI(self: *Self, alloc: Allocator, input_: ?[]const u8) !void { + const input = input_ orelse return error.ValueRequired; + const eql_idx = std.mem.indexOf(u8, input, "=") orelse return error.InvalidValue; + const whitespace = " \t"; + const key = std.mem.trim(u8, input[0..eql_idx], whitespace); + const value = std.mem.trim(u8, input[eql_idx + 1 ..], whitespace); + + // Parse the replacement value - either a codepoint or string + const replacement: ClipboardCodepointMap.Replacement = if (std.mem.startsWith(u8, value, "U+")) blk: { + // Parse as codepoint + const cp_str = value[2..]; // Skip "U+" + const cp = std.fmt.parseInt(u21, cp_str, 16) catch return error.InvalidValue; + break :blk .{ .codepoint = cp }; + } else blk: { + // Parse as UTF-8 string - validate it's valid UTF-8 + if (!std.unicode.utf8ValidateSlice(value)) return error.InvalidValue; + const value_copy = try alloc.dupe(u8, value); + break :blk .{ .string = value_copy }; + }; + + var p: UnicodeRangeParser = .{ .input = key }; + while (try p.next()) |range| { + try self.map.add(alloc, .{ + .range = range, + .replacement = replacement, + }); + } + } + + /// Deep copy of the struct. Required by Config. + pub fn clone(self: *const Self, alloc: Allocator) Allocator.Error!Self { + return .{ .map = try self.map.clone(alloc) }; + } + + /// Compare if two of our value are equal. Required by Config. + pub fn equal(self: Self, other: Self) bool { + const itemsA = self.map.list.slice(); + const itemsB = other.map.list.slice(); + if (itemsA.len != itemsB.len) return false; + for (0..itemsA.len) |i| { + const a = itemsA.get(i); + const b = itemsB.get(i); + if (!std.meta.eql(a.range, b.range)) return false; + switch (a.replacement) { + .codepoint => |cp_a| switch (b.replacement) { + .codepoint => |cp_b| if (cp_a != cp_b) return false, + .string => return false, + }, + .string => |str_a| switch (b.replacement) { + .string => |str_b| if (!std.mem.eql(u8, str_a, str_b)) return false, + .codepoint => return false, + }, + } + } + return true; + } + + /// Used by Formatter + pub fn formatEntry( + self: Self, + formatter: anytype, + ) !void { + if (self.map.list.len == 0) { + try formatter.formatEntry(void, {}); + return; + } + + var buf: [1024]u8 = undefined; + var value_buf: [32]u8 = undefined; + const ranges = self.map.list.items(.range); + const replacements = self.map.list.items(.replacement); + for (ranges, replacements) |range, replacement| { + const value_str = switch (replacement) { + .codepoint => |cp| try std.fmt.bufPrint(&value_buf, "U+{X:0>4}", .{cp}), + .string => |s| s, + }; + + if (range[0] == range[1]) { + try formatter.formatEntry( + []const u8, + std.fmt.bufPrint( + &buf, + "U+{X:0>4}={s}", + .{ range[0], value_str }, + ) catch return error.OutOfMemory, + ); + } else { + try formatter.formatEntry( + []const u8, + std.fmt.bufPrint( + &buf, + "U+{X:0>4}-U+{X:0>4}={s}", + .{ range[0], range[1], value_str }, + ) catch return error.OutOfMemory, + ); + } + } + } + + /// Reuse the same UnicodeRangeParser from RepeatableCodepointMap + const UnicodeRangeParser = RepeatableCodepointMap.UnicodeRangeParser; + + test "parseCLI codepoint replacement" { + const testing = std.testing; + var arena = ArenaAllocator.init(testing.allocator); + defer arena.deinit(); + const alloc = arena.allocator(); + + var list: Self = .{}; + try list.parseCLI(alloc, "U+2500=U+002D"); // box drawing → hyphen + + try testing.expectEqual(@as(usize, 1), list.map.list.len); + const entry = list.map.list.get(0); + try testing.expectEqual([2]u21{ 0x2500, 0x2500 }, entry.range); + try testing.expect(entry.replacement == .codepoint); + try testing.expectEqual(@as(u21, 0x002D), entry.replacement.codepoint); + } + + test "parseCLI string replacement" { + const testing = std.testing; + var arena = ArenaAllocator.init(testing.allocator); + defer arena.deinit(); + const alloc = arena.allocator(); + + var list: Self = .{}; + try list.parseCLI(alloc, "U+03A3=SUM"); // Greek sigma → "SUM" + + try testing.expectEqual(@as(usize, 1), list.map.list.len); + const entry = list.map.list.get(0); + try testing.expectEqual([2]u21{ 0x03A3, 0x03A3 }, entry.range); + try testing.expect(entry.replacement == .string); + try testing.expectEqualStrings("SUM", entry.replacement.string); + } + + test "parseCLI range replacement" { + const testing = std.testing; + var arena = ArenaAllocator.init(testing.allocator); + defer arena.deinit(); + const alloc = arena.allocator(); + + var list: Self = .{}; + try list.parseCLI(alloc, "U+2500-U+2503=|"); // box drawing range → pipe + + try testing.expectEqual(@as(usize, 1), list.map.list.len); + const entry = list.map.list.get(0); + try testing.expectEqual([2]u21{ 0x2500, 0x2503 }, entry.range); + try testing.expect(entry.replacement == .string); + try testing.expectEqualStrings("|", entry.replacement.string); + } + + test "formatConfig codepoint" { + const testing = std.testing; + var buf: std.Io.Writer.Allocating = .init(testing.allocator); + defer buf.deinit(); + + var arena = ArenaAllocator.init(testing.allocator); + defer arena.deinit(); + const alloc = arena.allocator(); + + var list: Self = .{}; + try list.parseCLI(alloc, "U+2500=U+002D"); + try list.formatEntry(formatterpkg.entryFormatter("a", &buf.writer)); + try std.testing.expectEqualSlices(u8, "a = U+2500=U+002D\n", buf.written()); + } + + test "formatConfig string" { + const testing = std.testing; + var buf: std.Io.Writer.Allocating = .init(testing.allocator); + defer buf.deinit(); + + var arena = ArenaAllocator.init(testing.allocator); + defer arena.deinit(); + const alloc = arena.allocator(); + + var list: Self = .{}; + try list.parseCLI(alloc, "U+03A3=SUM"); + try list.formatEntry(formatterpkg.entryFormatter("a", &buf.writer)); + try std.testing.expectEqualSlices(u8, "a = U+03A3=SUM\n", buf.written()); + } +}; + pub const FontStyle = union(enum) { const Self = @This(); From 11274cd9e5edeb8df0997ace341735e6dae47dbd Mon Sep 17 00:00:00 2001 From: benodiwal Date: Thu, 6 Nov 2025 13:43:22 +0530 Subject: [PATCH 2/4] feat: integrate clipboard-codepoint-map with clipboard pipeline --- src/Surface.zig | 84 +++++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 82 insertions(+), 2 deletions(-) diff --git a/src/Surface.zig b/src/Surface.zig index 346cbb8bf..8f875becf 100644 --- a/src/Surface.zig +++ b/src/Surface.zig @@ -260,6 +260,7 @@ const DerivedConfig = struct { clipboard_trim_trailing_spaces: bool, clipboard_paste_protection: bool, clipboard_paste_bracketed_safe: bool, + clipboard_codepoint_map: configpkg.Config.RepeatableClipboardCodepointMap, copy_on_select: configpkg.CopyOnSelect, right_click_action: configpkg.RightClickAction, confirm_close_surface: configpkg.ConfirmCloseSurface, @@ -334,6 +335,7 @@ const DerivedConfig = struct { .clipboard_trim_trailing_spaces = config.@"clipboard-trim-trailing-spaces", .clipboard_paste_protection = config.@"clipboard-paste-protection", .clipboard_paste_bracketed_safe = config.@"clipboard-paste-bracketed-safe", + .clipboard_codepoint_map = try config.@"clipboard-codepoint-map".clone(alloc), .copy_on_select = config.@"copy-on-select", .right_click_action = config.@"right-click-action", .confirm_close_surface = config.@"confirm-close-surface", @@ -1954,6 +1956,54 @@ fn clipboardWrite(self: *const Surface, data: []const u8, loc: apprt.Clipboard) }; } +/// Apply clipboard codepoint mappings to transform text content. +/// Returns the transformed text, which may be the same as input if no mappings apply. +fn applyClipboardCodepointMappings( + alloc: Allocator, + input_text: []const u8, + mappings: *const configpkg.Config.RepeatableClipboardCodepointMap, +) ![]const u8 { + // If no mappings configured, return input unchanged + if (mappings.map.list.len == 0) { + return try alloc.dupe(u8, input_text); + } + + // We'll build the output in this list + var output: std.ArrayList(u8) = .empty; + defer output.deinit(alloc); + + // UTF-8 decode and process each codepoint + var iter = std.unicode.Utf8Iterator{ .bytes = input_text, .i = 0 }; + while (iter.nextCodepoint()) |codepoint| { + if (mappings.map.get(codepoint)) |replacement| { + switch (replacement) { + .codepoint => |cp| { + // Encode the replacement codepoint to UTF-8 + var utf8_buf: [4]u8 = undefined; + const len = std.unicode.utf8Encode(cp, &utf8_buf) catch { + // If encoding fails, use original codepoint + const orig_len = std.unicode.utf8Encode(codepoint, &utf8_buf) catch continue; + try output.appendSlice(alloc, utf8_buf[0..orig_len]); + continue; + }; + try output.appendSlice(alloc, utf8_buf[0..len]); + }, + .string => |s| { + // Append the replacement string directly + try output.appendSlice(alloc, s); + }, + } + } else { + // No mapping found, keep original codepoint + var utf8_buf: [4]u8 = undefined; + const len = std.unicode.utf8Encode(codepoint, &utf8_buf) catch continue; + try output.appendSlice(alloc, utf8_buf[0..len]); + } + } + + return try output.toOwnedSlice(alloc); +} + fn copySelectionToClipboards( self: *Surface, sel: terminal.Selection, @@ -1984,9 +2034,19 @@ fn copySelectionToClipboards( var formatter: ScreenFormatter = .init(&self.io.terminal.screen, opts); formatter.content = .{ .selection = sel }; try formatter.format(&aw.writer); + + // Apply clipboard codepoint mappings + const original_text = try aw.toOwnedSlice(); + const transformed_text = try applyClipboardCodepointMappings( + alloc, + original_text, + &self.config.clipboard_codepoint_map, + ); + const transformed_text_z = try alloc.dupeZ(u8, transformed_text); + try contents.append(alloc, .{ .mime = "text/plain", - .data = try aw.toOwnedSliceSentinel(0), + .data = transformed_text_z, }); }, @@ -1998,6 +2058,9 @@ fn copySelectionToClipboards( }); formatter.content = .{ .selection = sel }; try formatter.format(&aw.writer); + + // Note: We don't apply codepoint mappings to VT format since it contains + // escape sequences that should be preserved as-is try contents.append(alloc, .{ .mime = "text/plain", .data = try aw.toOwnedSliceSentinel(0), @@ -2012,6 +2075,9 @@ fn copySelectionToClipboards( }); formatter.content = .{ .selection = sel }; try formatter.format(&aw.writer); + + // Note: We don't apply codepoint mappings to HTML format since HTML + // has its own character encoding and entity system try contents.append(alloc, .{ .mime = "text/html", .data = try aw.toOwnedSliceSentinel(0), @@ -2019,15 +2085,27 @@ fn copySelectionToClipboards( }, .mixed => { + // First, generate plain text with codepoint mappings applied var formatter: ScreenFormatter = .init(&self.io.terminal.screen, opts); formatter.content = .{ .selection = sel }; try formatter.format(&aw.writer); + + // Apply clipboard codepoint mappings to plain text + const original_text = try aw.toOwnedSlice(); + const transformed_text = try applyClipboardCodepointMappings( + alloc, + original_text, + &self.config.clipboard_codepoint_map, + ); + const transformed_text_z = try alloc.dupeZ(u8, transformed_text); + try contents.append(alloc, .{ .mime = "text/plain", - .data = try aw.toOwnedSliceSentinel(0), + .data = transformed_text_z, }); assert(aw.written().len == 0); + // Second, generate HTML without codepoint mappings formatter = .init(&self.io.terminal.screen, opts: { var copy = opts; copy.emit = .html; @@ -2042,6 +2120,8 @@ fn copySelectionToClipboards( }); formatter.content = .{ .selection = sel }; try formatter.format(&aw.writer); + + // Note: We don't apply codepoint mappings to HTML format try contents.append(alloc, .{ .mime = "text/html", .data = try aw.toOwnedSliceSentinel(0), From 422fa8d3048435c688fa7fc1186bec967d88db51 Mon Sep 17 00:00:00 2001 From: benodiwal Date: Thu, 6 Nov 2025 13:58:03 +0530 Subject: [PATCH 3/4] refactor: remove unused hash methods from ClipboardCodepointMap --- src/config/ClipboardCodepointMap.zig | 22 ---------------------- 1 file changed, 22 deletions(-) diff --git a/src/config/ClipboardCodepointMap.zig b/src/config/ClipboardCodepointMap.zig index dd6a172c6..a0e8fed36 100644 --- a/src/config/ClipboardCodepointMap.zig +++ b/src/config/ClipboardCodepointMap.zig @@ -71,28 +71,6 @@ pub fn get(self: *const ClipboardCodepointMap, cp: u21) ?Replacement { return null; } -/// Hash with the given hasher. -pub fn hash(self: *const ClipboardCodepointMap, hasher: anytype) void { - const autoHash = std.hash.autoHash; - autoHash(hasher, self.list.len); - const slice = self.list.slice(); - for (0..slice.len) |i| { - const entry = slice.get(i); - autoHash(hasher, entry.range); - switch (entry.replacement) { - .codepoint => |cp| autoHash(hasher, cp), - .string => |s| autoHash(hasher, s), - } - } -} - -/// Returns a hash code that can be used to uniquely identify this -/// action. -pub fn hashcode(self: *const ClipboardCodepointMap) u64 { - var hasher = std.hash.Wyhash.init(0); - self.hash(&hasher); - return hasher.final(); -} test "clipboard codepoint map" { const testing = std.testing; From 43d81600ded98b241495ea071d55634a272847f3 Mon Sep 17 00:00:00 2001 From: Mitchell Hashimoto Date: Fri, 7 Nov 2025 14:45:16 -0800 Subject: [PATCH 4/4] terminal: add codepoint mapping to the formatter itself --- src/Surface.zig | 73 +---- src/config/ClipboardCodepointMap.zig | 91 +----- src/terminal/formatter.zig | 449 ++++++++++++++++++++++++++- 3 files changed, 457 insertions(+), 156 deletions(-) diff --git a/src/Surface.zig b/src/Surface.zig index 8f875becf..a44563ad4 100644 --- a/src/Surface.zig +++ b/src/Surface.zig @@ -1956,54 +1956,6 @@ fn clipboardWrite(self: *const Surface, data: []const u8, loc: apprt.Clipboard) }; } -/// Apply clipboard codepoint mappings to transform text content. -/// Returns the transformed text, which may be the same as input if no mappings apply. -fn applyClipboardCodepointMappings( - alloc: Allocator, - input_text: []const u8, - mappings: *const configpkg.Config.RepeatableClipboardCodepointMap, -) ![]const u8 { - // If no mappings configured, return input unchanged - if (mappings.map.list.len == 0) { - return try alloc.dupe(u8, input_text); - } - - // We'll build the output in this list - var output: std.ArrayList(u8) = .empty; - defer output.deinit(alloc); - - // UTF-8 decode and process each codepoint - var iter = std.unicode.Utf8Iterator{ .bytes = input_text, .i = 0 }; - while (iter.nextCodepoint()) |codepoint| { - if (mappings.map.get(codepoint)) |replacement| { - switch (replacement) { - .codepoint => |cp| { - // Encode the replacement codepoint to UTF-8 - var utf8_buf: [4]u8 = undefined; - const len = std.unicode.utf8Encode(cp, &utf8_buf) catch { - // If encoding fails, use original codepoint - const orig_len = std.unicode.utf8Encode(codepoint, &utf8_buf) catch continue; - try output.appendSlice(alloc, utf8_buf[0..orig_len]); - continue; - }; - try output.appendSlice(alloc, utf8_buf[0..len]); - }, - .string => |s| { - // Append the replacement string directly - try output.appendSlice(alloc, s); - }, - } - } else { - // No mapping found, keep original codepoint - var utf8_buf: [4]u8 = undefined; - const len = std.unicode.utf8Encode(codepoint, &utf8_buf) catch continue; - try output.appendSlice(alloc, utf8_buf[0..len]); - } - } - - return try output.toOwnedSlice(alloc); -} - fn copySelectionToClipboards( self: *Surface, sel: terminal.Selection, @@ -2021,6 +1973,7 @@ fn copySelectionToClipboards( .emit = .plain, // We'll override this below .unwrap = true, .trim = self.config.clipboard_trim_trailing_spaces, + .codepoint_map = self.config.clipboard_codepoint_map.map.list, .background = self.io.terminal.colors.background.get(), .foreground = self.io.terminal.colors.foreground.get(), .palette = &self.io.terminal.colors.palette.current, @@ -2034,19 +1987,9 @@ fn copySelectionToClipboards( var formatter: ScreenFormatter = .init(&self.io.terminal.screen, opts); formatter.content = .{ .selection = sel }; try formatter.format(&aw.writer); - - // Apply clipboard codepoint mappings - const original_text = try aw.toOwnedSlice(); - const transformed_text = try applyClipboardCodepointMappings( - alloc, - original_text, - &self.config.clipboard_codepoint_map, - ); - const transformed_text_z = try alloc.dupeZ(u8, transformed_text); - try contents.append(alloc, .{ .mime = "text/plain", - .data = transformed_text_z, + .data = try aw.toOwnedSliceSentinel(0), }); }, @@ -2089,19 +2032,9 @@ fn copySelectionToClipboards( var formatter: ScreenFormatter = .init(&self.io.terminal.screen, opts); formatter.content = .{ .selection = sel }; try formatter.format(&aw.writer); - - // Apply clipboard codepoint mappings to plain text - const original_text = try aw.toOwnedSlice(); - const transformed_text = try applyClipboardCodepointMappings( - alloc, - original_text, - &self.config.clipboard_codepoint_map, - ); - const transformed_text_z = try alloc.dupeZ(u8, transformed_text); - try contents.append(alloc, .{ .mime = "text/plain", - .data = transformed_text_z, + .data = try aw.toOwnedSliceSentinel(0), }); assert(aw.written().len == 0); diff --git a/src/config/ClipboardCodepointMap.zig b/src/config/ClipboardCodepointMap.zig index a0e8fed36..354db10d9 100644 --- a/src/config/ClipboardCodepointMap.zig +++ b/src/config/ClipboardCodepointMap.zig @@ -7,20 +7,9 @@ const std = @import("std"); const assert = std.debug.assert; const Allocator = std.mem.Allocator; -pub const Replacement = union(enum) { - /// Replace with a single codepoint - codepoint: u21, - /// Replace with a UTF-8 string - string: []const u8, -}; - -pub const Entry = struct { - /// Unicode codepoint range. Asserts range[0] <= range[1]. - range: [2]u21, - - /// The replacement value for this range. - replacement: Replacement, -}; +// To ease our usage later, we map it directly to formatter entries. +pub const Entry = @import("../terminal/formatter.zig").CodepointMap; +pub const Replacement = Entry.Replacement; /// The list of entries. We use a multiarraylist for cache-friendly lookups. /// @@ -37,12 +26,10 @@ pub fn deinit(self: *ClipboardCodepointMap, alloc: Allocator) void { /// doesn't support fine-grained deallocation of fields. pub fn clone(self: *const ClipboardCodepointMap, alloc: Allocator) !ClipboardCodepointMap { var list = try self.list.clone(alloc); - for (list.items(.replacement)) |*r| { - switch (r.*) { - .string => |s| r.string = try alloc.dupe(u8, s), - .codepoint => {}, // no allocation needed - } - } + for (list.items(.replacement)) |*r| switch (r.*) { + .string => |s| r.string = try alloc.dupe(u8, s), + .codepoint => {}, // no allocation needed + }; return .{ .list = list }; } @@ -55,67 +42,3 @@ pub fn add(self: *ClipboardCodepointMap, alloc: Allocator, entry: Entry) !void { assert(entry.range[0] <= entry.range[1]); try self.list.append(alloc, entry); } - -/// Get a replacement for a codepoint. -pub fn get(self: *const ClipboardCodepointMap, cp: u21) ?Replacement { - const items = self.list.items(.range); - for (0..items.len) |forward_i| { - const i = items.len - forward_i - 1; - const range = items[i]; - if (range[0] <= cp and cp <= range[1]) { - const replacements = self.list.items(.replacement); - return replacements[i]; - } - } - - return null; -} - - -test "clipboard codepoint map" { - const testing = std.testing; - const alloc = testing.allocator; - - var m: ClipboardCodepointMap = .{}; - defer m.deinit(alloc); - - // Test no matches initially - try testing.expect(m.get(1) == null); - - // Add exact range with codepoint replacement - try m.add(alloc, .{ - .range = .{ 1, 1 }, - .replacement = .{ .codepoint = 65 }, // 'A' - }); - { - const replacement = m.get(1).?; - try testing.expect(replacement == .codepoint); - try testing.expectEqual(@as(u21, 65), replacement.codepoint); - } - - // Later entry takes priority - try m.add(alloc, .{ - .range = .{ 1, 2 }, - .replacement = .{ .string = "B" }, - }); - { - const replacement = m.get(1).?; - try testing.expect(replacement == .string); - try testing.expectEqualStrings("B", replacement.string); - } - - // Non-matching - try testing.expect(m.get(0) == null); - try testing.expect(m.get(3) == null); - - // Test range matching - try m.add(alloc, .{ - .range = .{ 3, 5 }, - .replacement = .{ .string = "range" }, - }); - { - const replacement = m.get(4).?; - try testing.expectEqualStrings("range", replacement.string); - } - try testing.expect(m.get(6) == null); -} \ No newline at end of file diff --git a/src/terminal/formatter.zig b/src/terminal/formatter.zig index ddb6d5334..46cc971c8 100644 --- a/src/terminal/formatter.zig +++ b/src/terminal/formatter.zig @@ -59,6 +59,24 @@ pub const Format = enum { } }; +pub const CodepointMap = struct { + /// Unicode codepoint range to replace. + /// Asserts: range[0] <= range[1] + range: [2]u21, + + /// Replacement value for this range. + replacement: Replacement, + + pub const Replacement = union(enum) { + /// A single replacement codepoint. + codepoint: u21, + + /// A UTF-8 encoded string to replace with. Asserts the + /// UTF-8 encoding (must be valid). + string: []const u8, + }; +}; + /// Common encoding options regardless of what exact formatter is used. pub const Options = struct { /// The format to emit. @@ -74,6 +92,10 @@ pub const Options = struct { /// is currently only space characters (0x20). trim: bool = true, + /// Replace matching Unicode codepoints with some other values. + /// This will use the last matching range found in the list. + codepoint_map: ?std.MultiArrayList(CodepointMap) = .{}, + /// Set a background and foreground color to use for the "screen". /// For styled formats, this will emit the proper sequences or styles. background: ?color.RGB = null, @@ -1241,14 +1263,58 @@ pub const PageFormatter = struct { writer: *std.Io.Writer, cell: *const Cell, ) !void { - try self.writeCodepoint(writer, cell.content.codepoint); + try self.writeCodepointWithReplacement(writer, cell.content.codepoint); if (comptime tag == .codepoint_grapheme) { for (self.page.lookupGrapheme(cell).?) |cp| { - try self.writeCodepoint(writer, cp); + try self.writeCodepointWithReplacement(writer, cp); } } } + fn writeCodepointWithReplacement( + self: PageFormatter, + writer: *std.Io.Writer, + codepoint: u21, + ) !void { + // Search for our replacement + const r_: ?CodepointMap.Replacement = replacement: { + const map = self.opts.codepoint_map orelse break :replacement null; + const items = map.items(.range); + for (0..items.len) |forward_i| { + const i = items.len - forward_i - 1; + const range = items[i]; + if (range[0] <= codepoint and codepoint <= range[1]) { + const replacements = map.items(.replacement); + break :replacement replacements[i]; + } + } + + break :replacement null; + }; + + // If no replacement, write it directly. + const r = r_ orelse return try self.writeCodepoint( + writer, + codepoint, + ); + + switch (r) { + .codepoint => |v| try self.writeCodepoint( + writer, + v, + ), + + .string => |s| { + const view = std.unicode.Utf8View.init(s) catch unreachable; + var it = view.iterator(); + while (it.nextCodepoint()) |cp| try self.writeCodepoint( + writer, + cp, + ); + }, + } + } + fn writeCodepoint( self: PageFormatter, writer: *std.Io.Writer, @@ -5302,3 +5368,382 @@ test "Page VT style reset properly closes styles" { // The reset should properly close the bold style try testing.expectEqualStrings("\x1b[0m\x1b[1mbold\x1b[0mnormal", output); } + +test "Page codepoint_map single replacement" { + const testing = std.testing; + const alloc = testing.allocator; + + var builder: std.Io.Writer.Allocating = .init(alloc); + defer builder.deinit(); + + var t = try Terminal.init(alloc, .{ + .cols = 80, + .rows = 24, + }); + defer t.deinit(alloc); + + var s = t.vtStream(); + defer s.deinit(); + + try s.nextSlice("hello world"); + + const pages = &t.screen.pages; + const page = &pages.pages.last.?.data; + + // Replace 'o' with 'x' + var map: std.MultiArrayList(CodepointMap) = .{}; + defer map.deinit(alloc); + try map.append(alloc, .{ + .range = .{ 'o', 'o' }, + .replacement = .{ .codepoint = 'x' }, + }); + + var opts: Options = .plain; + opts.codepoint_map = map; + var formatter: PageFormatter = .init(page, opts); + + var point_map: std.ArrayList(Coordinate) = .empty; + defer point_map.deinit(alloc); + formatter.point_map = .{ .alloc = alloc, .map = &point_map }; + + try formatter.format(&builder.writer); + const output = builder.writer.buffered(); + try testing.expectEqualStrings("hellx wxrld", output); + + // Verify point map - each output byte should map to original cell position + try testing.expectEqual(output.len, point_map.items.len); + // "hello world" -> "hellx wxrld" + // h e l l o w o r l d + // 0 1 2 3 4 5 6 7 8 9 10 + try testing.expectEqual(Coordinate{ .x = 0, .y = 0 }, point_map.items[0]); // h + try testing.expectEqual(Coordinate{ .x = 1, .y = 0 }, point_map.items[1]); // e + try testing.expectEqual(Coordinate{ .x = 2, .y = 0 }, point_map.items[2]); // l + try testing.expectEqual(Coordinate{ .x = 3, .y = 0 }, point_map.items[3]); // l + try testing.expectEqual(Coordinate{ .x = 4, .y = 0 }, point_map.items[4]); // x (was o) + try testing.expectEqual(Coordinate{ .x = 5, .y = 0 }, point_map.items[5]); // space + try testing.expectEqual(Coordinate{ .x = 6, .y = 0 }, point_map.items[6]); // w + try testing.expectEqual(Coordinate{ .x = 7, .y = 0 }, point_map.items[7]); // x (was o) + try testing.expectEqual(Coordinate{ .x = 8, .y = 0 }, point_map.items[8]); // r + try testing.expectEqual(Coordinate{ .x = 9, .y = 0 }, point_map.items[9]); // l + try testing.expectEqual(Coordinate{ .x = 10, .y = 0 }, point_map.items[10]); // d +} + +test "Page codepoint_map conflicting replacement prefers last" { + const testing = std.testing; + const alloc = testing.allocator; + + var builder: std.Io.Writer.Allocating = .init(alloc); + defer builder.deinit(); + + var t = try Terminal.init(alloc, .{ + .cols = 80, + .rows = 24, + }); + defer t.deinit(alloc); + + var s = t.vtStream(); + defer s.deinit(); + + try s.nextSlice("hello"); + + const pages = &t.screen.pages; + const page = &pages.pages.last.?.data; + + // Replace 'o' with 'x', then with 'y' - should prefer last + var map: std.MultiArrayList(CodepointMap) = .{}; + defer map.deinit(alloc); + try map.append(alloc, .{ + .range = .{ 'o', 'o' }, + .replacement = .{ .codepoint = 'x' }, + }); + try map.append(alloc, .{ + .range = .{ 'o', 'o' }, + .replacement = .{ .codepoint = 'y' }, + }); + + var opts: Options = .plain; + opts.codepoint_map = map; + var formatter: PageFormatter = .init(page, opts); + + try formatter.format(&builder.writer); + const output = builder.writer.buffered(); + try testing.expectEqualStrings("helly", output); +} + +test "Page codepoint_map replace with string" { + const testing = std.testing; + const alloc = testing.allocator; + + var builder: std.Io.Writer.Allocating = .init(alloc); + defer builder.deinit(); + + var t = try Terminal.init(alloc, .{ + .cols = 80, + .rows = 24, + }); + defer t.deinit(alloc); + + var s = t.vtStream(); + defer s.deinit(); + + try s.nextSlice("hello"); + + const pages = &t.screen.pages; + const page = &pages.pages.last.?.data; + + // Replace 'o' with a multi-byte string + var map: std.MultiArrayList(CodepointMap) = .{}; + defer map.deinit(alloc); + try map.append(alloc, .{ + .range = .{ 'o', 'o' }, + .replacement = .{ .string = "XYZ" }, + }); + + var opts: Options = .plain; + opts.codepoint_map = map; + var formatter: PageFormatter = .init(page, opts); + + var point_map: std.ArrayList(Coordinate) = .empty; + defer point_map.deinit(alloc); + formatter.point_map = .{ .alloc = alloc, .map = &point_map }; + + try formatter.format(&builder.writer); + const output = builder.writer.buffered(); + try testing.expectEqualStrings("hellXYZ", output); + + // Verify point map - string replacements should all map to the original cell + try testing.expectEqual(output.len, point_map.items.len); + // "hello" -> "hellXYZ" + // h e l l o + // 0 1 2 3 4 + try testing.expectEqual(Coordinate{ .x = 0, .y = 0 }, point_map.items[0]); // h + try testing.expectEqual(Coordinate{ .x = 1, .y = 0 }, point_map.items[1]); // e + try testing.expectEqual(Coordinate{ .x = 2, .y = 0 }, point_map.items[2]); // l + try testing.expectEqual(Coordinate{ .x = 3, .y = 0 }, point_map.items[3]); // l + // All bytes of the replacement string "XYZ" should point to position 4 (where 'o' was) + try testing.expectEqual(Coordinate{ .x = 4, .y = 0 }, point_map.items[4]); // X + try testing.expectEqual(Coordinate{ .x = 4, .y = 0 }, point_map.items[5]); // Y + try testing.expectEqual(Coordinate{ .x = 4, .y = 0 }, point_map.items[6]); // Z +} + +test "Page codepoint_map range replacement" { + const testing = std.testing; + const alloc = testing.allocator; + + var builder: std.Io.Writer.Allocating = .init(alloc); + defer builder.deinit(); + + var t = try Terminal.init(alloc, .{ + .cols = 80, + .rows = 24, + }); + defer t.deinit(alloc); + + var s = t.vtStream(); + defer s.deinit(); + + try s.nextSlice("abcdefg"); + + const pages = &t.screen.pages; + const page = &pages.pages.last.?.data; + + // Replace 'b' through 'e' with 'X' + var map: std.MultiArrayList(CodepointMap) = .{}; + defer map.deinit(alloc); + try map.append(alloc, .{ + .range = .{ 'b', 'e' }, + .replacement = .{ .codepoint = 'X' }, + }); + + var opts: Options = .plain; + opts.codepoint_map = map; + var formatter: PageFormatter = .init(page, opts); + + try formatter.format(&builder.writer); + const output = builder.writer.buffered(); + try testing.expectEqualStrings("aXXXXfg", output); +} + +test "Page codepoint_map multiple ranges" { + const testing = std.testing; + const alloc = testing.allocator; + + var builder: std.Io.Writer.Allocating = .init(alloc); + defer builder.deinit(); + + var t = try Terminal.init(alloc, .{ + .cols = 80, + .rows = 24, + }); + defer t.deinit(alloc); + + var s = t.vtStream(); + defer s.deinit(); + + try s.nextSlice("hello world"); + + const pages = &t.screen.pages; + const page = &pages.pages.last.?.data; + + // Replace 'a'-'m' with 'A' and 'n'-'z' with 'Z' + var map: std.MultiArrayList(CodepointMap) = .{}; + defer map.deinit(alloc); + try map.append(alloc, .{ + .range = .{ 'a', 'm' }, + .replacement = .{ .codepoint = 'A' }, + }); + try map.append(alloc, .{ + .range = .{ 'n', 'z' }, + .replacement = .{ .codepoint = 'Z' }, + }); + + var opts: Options = .plain; + opts.codepoint_map = map; + var formatter: PageFormatter = .init(page, opts); + + try formatter.format(&builder.writer); + const output = builder.writer.buffered(); + // h e l l o w o r l d + // A A A A Z Z Z Z A A + try testing.expectEqualStrings("AAAAZ ZZZAA", output); +} + +test "Page codepoint_map unicode replacement" { + const testing = std.testing; + const alloc = testing.allocator; + + var builder: std.Io.Writer.Allocating = .init(alloc); + defer builder.deinit(); + + var t = try Terminal.init(alloc, .{ + .cols = 80, + .rows = 24, + }); + defer t.deinit(alloc); + + var s = t.vtStream(); + defer s.deinit(); + + try s.nextSlice("hello ⚡ world"); + + const pages = &t.screen.pages; + const page = &pages.pages.last.?.data; + + // Replace lightning bolt with fire emoji + var map: std.MultiArrayList(CodepointMap) = .{}; + defer map.deinit(alloc); + try map.append(alloc, .{ + .range = .{ '⚡', '⚡' }, + .replacement = .{ .string = "🔥" }, + }); + + var opts: Options = .plain; + opts.codepoint_map = map; + var formatter: PageFormatter = .init(page, opts); + + var point_map: std.ArrayList(Coordinate) = .empty; + defer point_map.deinit(alloc); + formatter.point_map = .{ .alloc = alloc, .map = &point_map }; + + try formatter.format(&builder.writer); + const output = builder.writer.buffered(); + try testing.expectEqualStrings("hello 🔥 world", output); + + // Verify point map + try testing.expectEqual(output.len, point_map.items.len); + // "hello ⚡ world" + // h e l l o ⚡ w o r l d + // 0 1 2 3 4 5 6 8 9 10 11 12 + // Note: ⚡ is a wide character occupying cells 6-7 + for (0..6) |i| try testing.expectEqual( + Coordinate{ .x = @intCast(i), .y = 0 }, + point_map.items[i], + ); + // 🔥 is 4 UTF-8 bytes, all should map to cell 6 (where ⚡ was) + const fire_start = 6; // "hello " is 6 bytes + for (0..4) |i| try testing.expectEqual( + Coordinate{ .x = 6, .y = 0 }, + point_map.items[fire_start + i], + ); + // " world" follows + const world_start = fire_start + 4; + for (0..6) |i| try testing.expectEqual( + Coordinate{ .x = @intCast(8 + i), .y = 0 }, + point_map.items[world_start + i], + ); +} + +test "Page codepoint_map with styled formats" { + const testing = std.testing; + const alloc = testing.allocator; + + var builder: std.Io.Writer.Allocating = .init(alloc); + defer builder.deinit(); + + var t = try Terminal.init(alloc, .{ + .cols = 10, + .rows = 24, + }); + defer t.deinit(alloc); + + var s = t.vtStream(); + defer s.deinit(); + + try s.nextSlice("\x1b[31mred text\x1b[0m"); + + const pages = &t.screen.pages; + const page = &pages.pages.last.?.data; + + // Replace 'e' with 'X' in styled text + var map: std.MultiArrayList(CodepointMap) = .{}; + defer map.deinit(alloc); + try map.append(alloc, .{ + .range = .{ 'e', 'e' }, + .replacement = .{ .codepoint = 'X' }, + }); + + var opts: Options = .vt; + opts.codepoint_map = map; + var formatter: PageFormatter = .init(page, opts); + + try formatter.format(&builder.writer); + const output = builder.writer.buffered(); + // Should preserve styles while replacing text + // "red text" becomes "rXd tXxt" + // VT format uses \x1b[38;5;1m for palette color 1 + try testing.expectEqualStrings("\x1b[0m\x1b[38;5;1mrXd tXxt\x1b[0m", output); +} + +test "Page codepoint_map empty map" { + const testing = std.testing; + const alloc = testing.allocator; + + var builder: std.Io.Writer.Allocating = .init(alloc); + defer builder.deinit(); + + var t = try Terminal.init(alloc, .{ + .cols = 80, + .rows = 24, + }); + defer t.deinit(alloc); + + var s = t.vtStream(); + defer s.deinit(); + + try s.nextSlice("hello world"); + + const pages = &t.screen.pages; + const page = &pages.pages.last.?.data; + + // Empty map should not change anything + var map: std.MultiArrayList(CodepointMap) = .{}; + defer map.deinit(alloc); + + var opts: Options = .plain; + opts.codepoint_map = map; + var formatter: PageFormatter = .init(page, opts); + + try formatter.format(&builder.writer); + const output = builder.writer.buffered(); + try testing.expectEqualStrings("hello world", output); +}