libghostty: add utf-8 grapheme cell getter to C API

Add a render-state row-cells getter that encodes the current cell's
full grapheme cluster directly as UTF-8 into a caller-provided
GhosttyBuffer. The getter writes the base codepoint first, followed by
any extra grapheme codepoints, and follows the existing buffer-writer
convention where len is bytes written on success or required capacity
on GHOSTTY_OUT_OF_SPACE.

Previously C consumers could query grapheme codepoints, but bindings
that needed UTF-8 text had to reconstruct and encode the cluster
themselves. That duplicated terminal internals in downstream bindings
and made users pay for awkward cross-language struct handling. By
owning the UTF-8/grapheme behavior in libghostty, bindings can use one
stable C API and optionally wrap it with small binding-local helpers.
This commit is contained in:
Mitchell Hashimoto
2026-05-28 09:29:57 -07:00
parent 54ac5fd21e
commit 3cf01e8445
7 changed files with 138 additions and 0 deletions

View File

@@ -614,6 +614,19 @@ typedef enum GHOSTTY_ENUM_TYPED {
* GhosttyCell for renderers that only need to know whether fetching the
* full style is necessary. */
GHOSTTY_RENDER_STATE_ROW_CELLS_DATA_HAS_STYLING = 8,
/**
* Encode the current cell's full grapheme cluster as UTF-8 into a
* caller-provided buffer (GhosttyBuffer).
*
* The base codepoint is encoded first, followed by any extra grapheme
* codepoints. Returns GHOSTTY_SUCCESS with len=0 when the cell has no text.
*
* If ptr is NULL or cap is too small for a non-empty cell, returns
* GHOSTTY_OUT_OF_SPACE without writing any bytes and sets len to the required
* buffer size in bytes.
*/
GHOSTTY_RENDER_STATE_ROW_CELLS_DATA_GRAPHEMES_UTF8 = 9,
GHOSTTY_RENDER_STATE_ROW_CELLS_DATA_MAX_VALUE = GHOSTTY_ENUM_MAX_VALUE,
} GhosttyRenderStateRowCellsData;

View File

@@ -227,6 +227,23 @@ typedef struct {
size_t len;
} GhosttyString;
/**
* A caller-provided byte buffer.
*
* APIs that write to this type use `len` for the number of bytes written on
* GHOSTTY_SUCCESS and the required byte capacity on GHOSTTY_OUT_OF_SPACE.
*/
typedef struct {
/** Destination buffer for bytes. May be NULL when cap is 0 to query required size. */
uint8_t* ptr;
/** Capacity of ptr in bytes. */
size_t cap;
/** Bytes written on success, or required byte capacity on GHOSTTY_OUT_OF_SPACE. */
size_t len;
} GhosttyBuffer;
/**
* A surface-space position in pixels.
*

View File

@@ -5,6 +5,7 @@ const types = @import("types.zig");
const unionpkg = @import("union.zig");
pub const allocator = @import("allocator.zig");
pub const Buffer = types.Buffer;
pub const Enum = enumpkg.Enum;
pub const checkGhosttyHEnum = enumpkg.checkGhosttyHEnum;
pub const String = types.String;

View File

@@ -11,3 +11,9 @@ pub const String = extern struct {
};
}
};
pub const Buffer = extern struct {
ptr: ?[*]u8 = null,
cap: usize = 0,
len: usize = 0,
};

View File

@@ -467,6 +467,7 @@ pub const RowCellsData = enum(c_int) {
fg_color = 6,
selected = 7,
has_styling = 8,
graphemes_utf8 = 9,
/// Output type expected for querying the data of the given kind.
pub fn OutType(comptime self: RowCellsData) type {
@@ -478,6 +479,7 @@ pub const RowCellsData = enum(c_int) {
.graphemes_buf => u32,
.bg_color, .fg_color => colorpkg.RGB.C,
.selected, .has_styling => bool,
.graphemes_utf8 => lib.Buffer,
};
}
};
@@ -493,6 +495,7 @@ pub fn row_cells_get(
return .invalid_value;
};
}
if (out == null) return .invalid_value;
return switch (data) {
.invalid => .invalid_value,
@@ -573,11 +576,44 @@ fn rowCellsGetTyped(
else
false,
.has_styling => out.* = cell.hasStyling(),
.graphemes_utf8 => return rowCellsGetGraphemesUtf8(cell, if (cell.hasGrapheme()) cells.graphemes[x] else &.{}, out),
}
return .success;
}
fn rowCellsGetGraphemesUtf8(
cell: page.Cell,
extra: []const u21,
out: *lib.Buffer,
) Result {
out.len = 0;
if (!cell.hasText()) return .success;
var needed = std.unicode.utf8CodepointSequenceLength(cell.codepoint()) catch
return .invalid_value;
for (extra) |cp| {
needed += std.unicode.utf8CodepointSequenceLength(cp) catch
return .invalid_value;
}
out.len = needed;
if (out.ptr == null or out.cap < needed) return .out_of_space;
const buf = out.ptr.?[0..out.cap];
var i: usize = 0;
i += std.unicode.utf8Encode(cell.codepoint(), buf[i..]) catch
return .invalid_value;
for (extra) |cp| {
i += std.unicode.utf8Encode(cp, buf[i..]) catch
return .invalid_value;
}
out.len = i;
return .success;
}
/// C: GhosttyRenderStateRowData
pub const RowData = enum(c_int) {
invalid = 0,
@@ -1256,6 +1292,69 @@ test "render: row cells get has_styling" {
try testing.expect(has_styling);
}
test "render: row cells get graphemes utf8" {
var terminal: terminal_c.Terminal = null;
try testing.expectEqual(Result.success, terminal_c.new(
&lib.alloc.test_allocator,
&terminal,
.{ .cols = 10, .rows = 3, .max_scrollback = 10_000 },
));
defer terminal_c.free(terminal);
const input = "e\u{301}";
terminal_c.vt_write(terminal, input, input.len);
var state: RenderState = null;
try testing.expectEqual(Result.success, new(
&lib.alloc.test_allocator,
&state,
));
defer free(state);
try testing.expectEqual(Result.success, update(state, terminal));
var it: RowIterator = null;
try testing.expectEqual(Result.success, row_iterator_new(
&lib.alloc.test_allocator,
&it,
));
defer row_iterator_free(it);
var cells: RowCells = null;
try testing.expectEqual(Result.success, row_cells_new(
&lib.alloc.test_allocator,
&cells,
));
defer row_cells_free(cells);
try testing.expectEqual(Result.success, get(state, .row_iterator, @ptrCast(&it)));
try testing.expect(row_iterator_next(it));
try testing.expectEqual(Result.success, row_get(it, .cells, @ptrCast(&cells)));
try testing.expectEqual(Result.success, row_cells_select(cells, 0));
var text: lib.Buffer = .{};
try testing.expectEqual(Result.out_of_space, row_cells_get(cells, .graphemes_utf8, @ptrCast(&text)));
try testing.expectEqual(@as(usize, input.len), text.len);
var small = [_]u8{ 'x', 'x' };
text = .{ .ptr = &small, .cap = small.len };
try testing.expectEqual(Result.out_of_space, row_cells_get(cells, .graphemes_utf8, @ptrCast(&text)));
try testing.expectEqual(@as(usize, input.len), text.len);
try testing.expectEqualSlices(u8, &.{ 'x', 'x' }, &small);
var buf: [8]u8 = undefined;
text = .{ .ptr = &buf, .cap = buf.len };
try testing.expectEqual(Result.success, row_cells_get(cells, .graphemes_utf8, @ptrCast(&text)));
try testing.expectEqual(input.len, text.len);
try testing.expectEqualStrings(input, buf[0..text.len]);
try testing.expectEqual(Result.success, row_cells_select(cells, 1));
text = .{ .ptr = &buf, .cap = buf.len };
try testing.expectEqual(Result.success, row_cells_get(cells, .graphemes_utf8, @ptrCast(&text)));
try testing.expectEqual(@as(usize, 0), text.len);
}
test "render: row iterator next" {
var terminal: terminal_c.Terminal = null;
try testing.expectEqual(Result.success, terminal_c.new(

View File

@@ -36,6 +36,7 @@ pub const Codepoints = extern struct {
pub const structs: std.StaticStringMap(StructInfo) = structs: {
@setEvalBranchQuota(10_000);
break :structs .initComptime(.{
.{ "GhosttyBuffer", StructInfo.init(lib.Buffer) },
.{ "GhosttyCodepoints", StructInfo.init(Codepoints) },
.{ "GhosttyColorRgb", StructInfo.init(color.RGB.C) },
.{ "GhosttyDeviceAttributes", StructInfo.init(terminal.DeviceAttributes) },

View File

@@ -14,6 +14,7 @@ pub const calling_conv: std.builtin.CallingConvention = .c;
/// Forwarded decls from lib that are used.
pub const alloc = lib.allocator;
pub const Buffer = lib.Buffer;
pub const Enum = lib.Enum;
pub const TaggedUnion = lib.TaggedUnion;
pub const Struct = lib.Struct;