Add configurable word boundary characters for text selection (#9335)

## Summary

This PR adds a new `selection-word-chars` configuration option that
allows users to customize which characters mark word boundaries during
text selection operations (double-click, word selection, etc.).

## Motivation

This's been on my wishlist for a while. Inspired by #9069 which added
semicolon as a hardcoded word boundary, this PR takes the concept
further by making word boundaries fully configurable. Different
workflows and use cases benefit from different boundary characters - SQL
developers might want semicolons as boundaries, while others working
with file paths or URLs might prefer different settings.

This approach is similar to zsh's `WORDCHARS` environment variable,
giving users fine-grained control over text selection behavior.

## Changes

- **New config option**: `selection-word-chars` with default value `` `
\t'"│`|:;,()[]{}<>$` ``
- **Runtime UTF-8 parsing**: Boundary characters are parsed from UTF-8
string to u32 codepoints
- **Updated function signatures**: `selectWord()` and
`selectWordBetween()` now accept boundary characters as parameters
- **All call sites updated**: Surface.zig, embedded.zig, and all test
cases updated

## Usage

Users can now customize word boundaries in their config:

```ini
# Remove semicolon from boundaries (treat as part of words)
selection-word-chars = " \t'\"│`|:,()[]{}<>$"

# Remove periods for better URL selection
selection-word-chars = " \t'\"│`|:;,()[]{}<>$"
```

## Implementation Details

- Boundary characters are stored in `DerivedConfig` and passed through
to selection functions
- UTF-8 parsing happens at runtime with graceful fallback for invalid
input
- Null character (U+0000) is always included as a boundary automatically
- Multi-byte UTF-8 characters are fully supported

## AI Assistance Disclosure

With gratitude for the team and respect for the [Contributing
Guidelines](https://github.com/ghostty-org/ghostty/blob/main/CONTRIBUTING.md),
I want to disclose that this PR was written with AI assistance (Claude
Code). I have reviewed all the code, and to the extent of my
understanding, I'm prepared to answer any questions about the changes.

## Related

- Inspired by #9069
This commit is contained in:
Mitchell Hashimoto
2026-01-20 09:42:21 -08:00
committed by GitHub
4 changed files with 216 additions and 56 deletions

View File

@@ -316,6 +316,7 @@ const DerivedConfig = struct {
macos_option_as_alt: ?input.OptionAsAlt,
selection_clear_on_copy: bool,
selection_clear_on_typing: bool,
selection_word_chars: []const u21,
vt_kam_allowed: bool,
wait_after_command: bool,
window_padding_top: u32,
@@ -392,6 +393,7 @@ const DerivedConfig = struct {
.macos_option_as_alt = config.@"macos-option-as-alt",
.selection_clear_on_copy = config.@"selection-clear-on-copy",
.selection_clear_on_typing = config.@"selection-clear-on-typing",
.selection_word_chars = try alloc.dupe(u21, config.@"selection-word-chars".codepoints),
.vt_kam_allowed = config.@"vt-kam-allowed",
.wait_after_command = config.@"wait-after-command",
.window_padding_top = config.@"window-padding-y".top_left,
@@ -4180,7 +4182,7 @@ pub fn mouseButtonCallback(
// Ignore any errors, likely regex errors.
}
break :sel self.io.terminal.screens.active.selectWord(pin.*);
break :sel self.io.terminal.screens.active.selectWord(pin.*, self.config.selection_word_chars);
};
if (sel_) |sel| {
try self.io.terminal.screens.active.select(sel);
@@ -4262,7 +4264,10 @@ pub fn mouseButtonCallback(
if (try self.linkAtPos(pos)) |link| {
try self.setSelection(link.selection);
} else {
const sel = screen.selectWord(pin) orelse break :sel;
const sel = screen.selectWord(
pin,
self.config.selection_word_chars,
) orelse break :sel;
try self.setSelection(sel);
}
try self.queueRender();
@@ -4583,7 +4588,10 @@ pub fn mousePressureCallback(
// This should always be set in this state but we don't want
// to handle state inconsistency here.
const pin = self.mouse.left_click_pin orelse break :select;
const sel = self.io.terminal.screens.active.selectWord(pin.*) orelse break :select;
const sel = self.io.terminal.screens.active.selectWord(
pin.*,
self.config.selection_word_chars,
) orelse break :select;
try self.io.terminal.screens.active.select(sel);
try self.queueRender();
}
@@ -4806,7 +4814,11 @@ fn dragLeftClickDouble(
const click_pin = self.mouse.left_click_pin.?.*;
// Get the word closest to our starting click.
const word_start = screen.selectWordBetween(click_pin, drag_pin) orelse {
const word_start = screen.selectWordBetween(
click_pin,
drag_pin,
self.config.selection_word_chars,
) orelse {
try self.setSelection(null);
return;
};
@@ -4815,6 +4827,7 @@ fn dragLeftClickDouble(
const word_current = screen.selectWordBetween(
drag_pin,
click_pin,
self.config.selection_word_chars,
) orelse {
try self.setSelection(null);
return;

View File

@@ -2165,7 +2165,10 @@ pub const CAPI = struct {
if (comptime std.debug.runtime_safety) unreachable;
return false;
};
break :sel surface.io.terminal.screens.active.selectWord(pin) orelse return false;
break :sel surface.io.terminal.screens.active.selectWord(
pin,
surface.config.selection_word_chars,
) orelse return false;
};
// Read the selection

View File

@@ -712,6 +712,32 @@ foreground: Color = .{ .r = 0xFF, .g = 0xFF, .b = 0xFF },
/// on the same selection.
@"selection-clear-on-copy": bool = false,
/// Characters that mark word boundaries during text selection operations such
/// as double-clicking. When selecting a word, the selection will stop at any
/// of these characters.
///
/// This is similar to the `WORDCHARS` environment variable in zsh, except this
/// specifies the boundary characters rather than the word characters. The
/// default includes common delimiters and punctuation that typically separate
/// words in code and prose.
///
/// Each character in this string becomes a word boundary. Multi-byte UTF-8
/// characters are supported, but only single codepoints can be specified.
/// Multi-codepoint sequences (e.g. emoji) are not supported.
///
/// The null character (U+0000) is always treated as a boundary and does not
/// need to be included in this configuration.
///
/// Default: ` \t'"│`|:;,()[]{}<>$`
///
/// To add or remove specific characters, you can set this to a custom value.
/// For example, to treat semicolons as part of words:
///
/// selection-word-chars = " \t'\"│`|:,()[]{}<>$"
///
/// Available since: 1.3.0
@"selection-word-chars": SelectionWordChars = .{},
/// The minimum contrast ratio between the foreground and background colors.
/// The contrast ratio is a value between 1 and 21. A value of 1 allows for no
/// contrast (e.g. black on black). This value is the contrast ratio as defined
@@ -5763,6 +5789,113 @@ pub const RepeatableString = struct {
}
};
/// SelectionWordChars stores the parsed codepoints for word boundary
/// characters used during text selection. The string is parsed once
/// during configuration and stored as u21 codepoints for efficient
/// lookup during selection operations.
pub const SelectionWordChars = struct {
const Self = @This();
/// Default boundary characters: ` \t'"│`|:;,()[]{}<>$`
const default_codepoints = [_]u21{
0, // null
' ', // space
'\t', // tab
'\'', // single quote
'"', // double quote
'│', // U+2502 box drawing
'`', // backtick
'|', // pipe
':', // colon
';', // semicolon
',', // comma
'(', // left paren
')', // right paren
'[', // left bracket
']', // right bracket
'{', // left brace
'}', // right brace
'<', // less than
'>', // greater than
'$', // dollar
};
/// The parsed codepoints. Always includes null (U+0000) at index 0.
codepoints: []const u21 = &default_codepoints,
pub fn parseCLI(self: *Self, alloc: Allocator, input: ?[]const u8) !void {
const value = input orelse return error.ValueRequired;
// Parse UTF-8 string into codepoints
var list: std.ArrayList(u21) = .empty;
defer list.deinit(alloc);
// Always include null as first boundary
try list.append(alloc, 0);
// Parse the UTF-8 string
const utf8_view = std.unicode.Utf8View.init(value) catch {
// Invalid UTF-8, just use null boundary
self.codepoints = try list.toOwnedSlice(alloc);
return;
};
var utf8_it = utf8_view.iterator();
while (utf8_it.nextCodepoint()) |codepoint| {
try list.append(alloc, codepoint);
}
self.codepoints = try list.toOwnedSlice(alloc);
}
/// Deep copy of the struct. Required by Config.
pub fn clone(self: *const Self, alloc: Allocator) Allocator.Error!Self {
const copy = try alloc.dupe(u21, self.codepoints);
return .{ .codepoints = copy };
}
/// Compare if two values are equal. Required by Config.
pub fn equal(self: Self, other: Self) bool {
return std.mem.eql(u21, self.codepoints, other.codepoints);
}
/// Used by Formatter
pub fn formatEntry(self: Self, formatter: formatterpkg.EntryFormatter) !void {
// Convert codepoints back to UTF-8 string for display
var buf: [4096]u8 = undefined;
var pos: usize = 0;
// Skip the null character at index 0
for (self.codepoints[1..]) |codepoint| {
var utf8_buf: [4]u8 = undefined;
const len = std.unicode.utf8Encode(codepoint, &utf8_buf) catch continue;
if (pos + len > buf.len) break;
@memcpy(buf[pos..][0..len], utf8_buf[0..len]);
pos += len;
}
try formatter.formatEntry([]const u8, buf[0..pos]);
}
test "parseCLI" {
const testing = std.testing;
var arena = ArenaAllocator.init(testing.allocator);
defer arena.deinit();
const alloc = arena.allocator();
var chars: Self = .{};
try chars.parseCLI(alloc, " \t;,");
// Should have null + 4 characters
try testing.expectEqual(@as(usize, 5), chars.codepoints.len);
try testing.expectEqual(@as(u21, 0), chars.codepoints[0]);
try testing.expectEqual(@as(u21, ' '), chars.codepoints[1]);
try testing.expectEqual(@as(u21, '\t'), chars.codepoints[2]);
try testing.expectEqual(@as(u21, ';'), chars.codepoints[3]);
try testing.expectEqual(@as(u21, ','), chars.codepoints[4]);
}
};
/// FontVariation is a repeatable configuration value that sets a single
/// font variation value. Font variations are configurations for what
/// are often called "variable fonts." The font files usually end in

View File

@@ -2617,11 +2617,15 @@ pub fn selectAll(self: *Screen) ?Selection {
/// end_pt (inclusive). Because it selects "nearest" to start point, start
/// point can be before or after end point.
///
/// The boundary_codepoints parameter should be a slice of u21 codepoints that
/// mark word boundaries, passed through to selectWord.
///
/// TODO: test this
pub fn selectWordBetween(
self: *Screen,
start: Pin,
end: Pin,
boundary_codepoints: []const u21,
) ?Selection {
const dir: PageList.Direction = if (start.before(end)) .right_down else .left_up;
var it = start.cellIterator(dir, end);
@@ -2633,7 +2637,7 @@ pub fn selectWordBetween(
}
// If we found a word, then return it
if (self.selectWord(pin)) |sel| return sel;
if (self.selectWord(pin, boundary_codepoints)) |sel| return sel;
}
return null;
@@ -2645,33 +2649,16 @@ pub fn selectWordBetween(
///
/// This will return null if a selection is impossible. The only scenario
/// this happens is if the point pt is outside of the written screen space.
pub fn selectWord(self: *Screen, pin: Pin) ?Selection {
///
/// The boundary_codepoints parameter should be a slice of u21 codepoints that
/// mark word boundaries. This is expected to be pre-parsed from the config.
pub fn selectWord(
self: *Screen,
pin: Pin,
boundary_codepoints: []const u21,
) ?Selection {
_ = self;
// Boundary characters for selection purposes
const boundary = &[_]u32{
0,
' ',
'\t',
'\'',
'"',
'│',
'`',
'|',
':',
';',
',',
'(',
')',
'[',
']',
'{',
'}',
'<',
'>',
'$',
};
// If our cell is empty we can't select a word, because we can't select
// areas where the screen is not yet written.
const start_cell = pin.rowAndCell().cell;
@@ -2679,9 +2666,9 @@ pub fn selectWord(self: *Screen, pin: Pin) ?Selection {
// Determine if we are a boundary or not to determine what our boundary is.
const expect_boundary = std.mem.indexOfAny(
u32,
boundary,
&[_]u32{start_cell.content.codepoint},
u21,
boundary_codepoints,
&[_]u21{start_cell.content.codepoint},
) != null;
// Go forwards to find our end boundary
@@ -2697,9 +2684,9 @@ pub fn selectWord(self: *Screen, pin: Pin) ?Selection {
// If we do not match our expected set, we hit a boundary
const this_boundary = std.mem.indexOfAny(
u32,
boundary,
&[_]u32{cell.content.codepoint},
u21,
boundary_codepoints,
&[_]u21{cell.content.codepoint},
) != null;
if (this_boundary != expect_boundary) break :end prev;
@@ -2734,9 +2721,9 @@ pub fn selectWord(self: *Screen, pin: Pin) ?Selection {
// If we do not match our expected set, we hit a boundary
const this_boundary = std.mem.indexOfAny(
u32,
boundary,
&[_]u32{cell.content.codepoint},
u21,
boundary_codepoints,
&[_]u21{cell.content.codepoint},
) != null;
if (this_boundary != expect_boundary) break :start prev;
@@ -7699,6 +7686,12 @@ test "Screen: selectWord" {
defer s.deinit();
try s.testWriteString("ABC DEF\n 123\n456");
// Default boundary codepoints for word selection
const boundary_codepoints = &[_]u21{
0, ' ', '\t', '\'', '"', '│', '`', '|', ':', ';',
',', '(', ')', '[', ']', '{', '}', '<', '>', '$',
};
// Outside of active area
// try testing.expect(s.selectWord(.{ .x = 9, .y = 0 }) == null);
// try testing.expect(s.selectWord(.{ .x = 0, .y = 5 }) == null);
@@ -7708,7 +7701,7 @@ test "Screen: selectWord" {
var sel = s.selectWord(s.pages.pin(.{ .active = .{
.x = 0,
.y = 0,
} }).?).?;
} }).?, boundary_codepoints).?;
defer sel.deinit(&s);
try testing.expectEqual(point.Point{ .screen = .{
.x = 0,
@@ -7725,7 +7718,7 @@ test "Screen: selectWord" {
var sel = s.selectWord(s.pages.pin(.{ .active = .{
.x = 2,
.y = 0,
} }).?).?;
} }).?, boundary_codepoints).?;
defer sel.deinit(&s);
try testing.expectEqual(point.Point{ .screen = .{
.x = 0,
@@ -7742,7 +7735,7 @@ test "Screen: selectWord" {
var sel = s.selectWord(s.pages.pin(.{ .active = .{
.x = 1,
.y = 0,
} }).?).?;
} }).?, boundary_codepoints).?;
defer sel.deinit(&s);
try testing.expectEqual(point.Point{ .screen = .{
.x = 0,
@@ -7759,7 +7752,7 @@ test "Screen: selectWord" {
var sel = s.selectWord(s.pages.pin(.{ .active = .{
.x = 3,
.y = 0,
} }).?).?;
} }).?, boundary_codepoints).?;
defer sel.deinit(&s);
try testing.expectEqual(point.Point{ .screen = .{
.x = 3,
@@ -7776,7 +7769,7 @@ test "Screen: selectWord" {
var sel = s.selectWord(s.pages.pin(.{ .active = .{
.x = 0,
.y = 1,
} }).?).?;
} }).?, boundary_codepoints).?;
defer sel.deinit(&s);
try testing.expectEqual(point.Point{ .screen = .{
.x = 0,
@@ -7793,7 +7786,7 @@ test "Screen: selectWord" {
var sel = s.selectWord(s.pages.pin(.{ .active = .{
.x = 1,
.y = 2,
} }).?).?;
} }).?, boundary_codepoints).?;
defer sel.deinit(&s);
try testing.expectEqual(point.Point{ .screen = .{
.x = 0,
@@ -7814,6 +7807,12 @@ test "Screen: selectWord across soft-wrap" {
defer s.deinit();
try s.testWriteString(" 1234012\n 123");
// Default boundary codepoints for word selection
const boundary_codepoints = &[_]u21{
0, ' ', '\t', '\'', '"', '│', '`', '|', ':', ';',
',', '(', ')', '[', ']', '{', '}', '<', '>', '$',
};
{
const contents = try s.dumpStringAlloc(alloc, .{ .screen = .{} });
defer alloc.free(contents);
@@ -7825,7 +7824,7 @@ test "Screen: selectWord across soft-wrap" {
var sel = s.selectWord(s.pages.pin(.{ .active = .{
.x = 1,
.y = 0,
} }).?).?;
} }).?, boundary_codepoints).?;
defer sel.deinit(&s);
try testing.expectEqual(point.Point{ .screen = .{
.x = 1,
@@ -7842,7 +7841,7 @@ test "Screen: selectWord across soft-wrap" {
var sel = s.selectWord(s.pages.pin(.{ .active = .{
.x = 1,
.y = 1,
} }).?).?;
} }).?, boundary_codepoints).?;
defer sel.deinit(&s);
try testing.expectEqual(point.Point{ .screen = .{
.x = 1,
@@ -7859,7 +7858,7 @@ test "Screen: selectWord across soft-wrap" {
var sel = s.selectWord(s.pages.pin(.{ .active = .{
.x = 3,
.y = 0,
} }).?).?;
} }).?, boundary_codepoints).?;
defer sel.deinit(&s);
try testing.expectEqual(point.Point{ .screen = .{
.x = 1,
@@ -7880,12 +7879,18 @@ test "Screen: selectWord whitespace across soft-wrap" {
defer s.deinit();
try s.testWriteString("1 1\n 123");
// Default boundary codepoints for word selection
const boundary_codepoints = &[_]u21{
0, ' ', '\t', '\'', '"', '│', '`', '|', ':', ';',
',', '(', ')', '[', ']', '{', '}', '<', '>', '$',
};
// Going forward
{
var sel = s.selectWord(s.pages.pin(.{ .active = .{
.x = 1,
.y = 0,
} }).?).?;
} }).?, boundary_codepoints).?;
defer sel.deinit(&s);
try testing.expectEqual(point.Point{ .screen = .{
.x = 1,
@@ -7902,7 +7907,7 @@ test "Screen: selectWord whitespace across soft-wrap" {
var sel = s.selectWord(s.pages.pin(.{ .active = .{
.x = 1,
.y = 1,
} }).?).?;
} }).?, boundary_codepoints).?;
defer sel.deinit(&s);
try testing.expectEqual(point.Point{ .screen = .{
.x = 1,
@@ -7919,7 +7924,7 @@ test "Screen: selectWord whitespace across soft-wrap" {
var sel = s.selectWord(s.pages.pin(.{ .active = .{
.x = 3,
.y = 0,
} }).?).?;
} }).?, boundary_codepoints).?;
defer sel.deinit(&s);
try testing.expectEqual(point.Point{ .screen = .{
.x = 1,
@@ -7936,6 +7941,12 @@ test "Screen: selectWord with character boundary" {
const testing = std.testing;
const alloc = testing.allocator;
// Default boundary codepoints for word selection
const boundary_codepoints = &[_]u21{
0, ' ', '\t', '\'', '"', '│', '`', '|', ':', ';',
',', '(', ')', '[', ']', '{', '}', '<', '>', '$',
};
const cases = [_][]const u8{
" 'abc' \n123",
" \"abc\" \n123",
@@ -7966,7 +7977,7 @@ test "Screen: selectWord with character boundary" {
var sel = s.selectWord(s.pages.pin(.{ .active = .{
.x = 2,
.y = 0,
} }).?).?;
} }).?, boundary_codepoints).?;
defer sel.deinit(&s);
try testing.expectEqual(point.Point{ .screen = .{
.x = 2,
@@ -7983,7 +7994,7 @@ test "Screen: selectWord with character boundary" {
var sel = s.selectWord(s.pages.pin(.{ .active = .{
.x = 4,
.y = 0,
} }).?).?;
} }).?, boundary_codepoints).?;
defer sel.deinit(&s);
try testing.expectEqual(point.Point{ .screen = .{
.x = 2,
@@ -8000,7 +8011,7 @@ test "Screen: selectWord with character boundary" {
var sel = s.selectWord(s.pages.pin(.{ .active = .{
.x = 3,
.y = 0,
} }).?).?;
} }).?, boundary_codepoints).?;
defer sel.deinit(&s);
try testing.expectEqual(point.Point{ .screen = .{
.x = 2,
@@ -8019,7 +8030,7 @@ test "Screen: selectWord with character boundary" {
var sel = s.selectWord(s.pages.pin(.{ .active = .{
.x = 1,
.y = 0,
} }).?).?;
} }).?, boundary_codepoints).?;
defer sel.deinit(&s);
try testing.expectEqual(point.Point{ .screen = .{
.x = 0,