font/opentype: add glyf table entry validation

We want to have this for the glyph protocol so that we can validate passed glyf data in libghostty without having to link freetype or anything like that.
2026-05-23 21:30:19 +00:00 · 2026-04-21 18:28:21 -04:00
parent 6e0b0311e4
commit d778be20dd
3 changed files with 438 additions and 0 deletions
--- a/src/font/opentype.zig
+++ b/src/font/opentype.zig
@@ -5,12 +5,14 @@ const os2 = @import("opentype/os2.zig");
 const post = @import("opentype/post.zig");
 const hhea = @import("opentype/hhea.zig");
 const head = @import("opentype/head.zig");
+const glyf = @import("opentype/glyf.zig");

 pub const SVG = svg.SVG;
 pub const OS2 = os2.OS2;
 pub const Post = post.Post;
 pub const Hhea = hhea.Hhea;
 pub const Head = head.Head;
+pub const Glyf = glyf.Glyf;

 test {
    @import("std").testing.refAllDecls(@This());
--- a/src/font/opentype/glyf.zig
+++ b/src/font/opentype/glyf.zig
@@ -0,0 +1,434 @@
+const std = @import("std");
+const sfnt = @import("sfnt.zig");
+
+/// Glyph Data Table
+///
+/// This takes a little bit of a different form than other tables that we
+/// have parsers for. Due to the fact that this table contains arrays of
+/// arbitrary length, we store a pointer (slice) to the underlying data,
+/// and then have functions for getting and interpreting specific parts.
+///
+/// References:
+/// - https://learn.microsoft.com/en-us/typography/opentype/spec/glyf
+///
+/// Field names are in camelCase to match names in spec.
+pub const Glyf = struct {
+    data: []const u8,
+
+    /// https://learn.microsoft.com/en-us/typography/opentype/spec/glyf#table-organization
+    pub const Entry = struct {
+        header: Header,
+
+        /// We store a reference to the original bytes so that we can
+        /// validate or iterate the contours or components of the glyph.
+        ///
+        /// This data starts immediately after the header.
+        data: []const u8,
+
+        /// The header that's always present at
+        /// the start of any glyph in the table.
+        ///
+        /// Depending on the number of contours, the data that
+        /// comes afterwards must be interpreted differently.
+        ///
+        /// References:
+        /// - https://learn.microsoft.com/en-us/typography/opentype/spec/glyf#glyph-headers
+        pub const Header = extern struct {
+            /// If the number of contours is greater than
+            /// or equal to zero, this is a simple glyph.
+            ///
+            /// If negative, this is a composite glyph — the
+            /// value -1 should be used for composite glyphs.
+            numberOfContours: sfnt.int16 align(1),
+
+            /// Minimum x for coordinate data.
+            xMin: sfnt.int16 align(1),
+
+            /// Minimum y for coordinate data.
+            yMin: sfnt.int16 align(1),
+
+            /// Maximum x for coordinate data.
+            xMax: sfnt.int16 align(1),
+
+            /// Maximum y for coordinate data.
+            yMax: sfnt.int16 align(1),
+        };
+
+        pub const Type = enum {
+            /// A glyph made of standard contours.
+            simple,
+            /// A glyph made of references to other glyphs.
+            composite,
+        };
+
+        /// Initialize an entry from the provided data.
+        ///
+        /// This DOES NOT COPY the data, it only stores a pointer to it.
+        ///
+        /// The lifetime of this struct, then, is the same as the
+        /// lifetime of the data that is used to initialize it.
+        pub fn init(data: []const u8) error{EndOfStream}!Entry {
+            var fbs = std.io.fixedBufferStream(data);
+            const reader = fbs.reader();
+            const header = try reader.readStructEndian(Header, .big);
+            return .{ .header = header, .data = data[fbs.pos..] };
+        }
+
+        /// Identifies what type (simple or composite) of entry this is.
+        pub fn entryType(self: Entry) Type {
+            return if (self.header.numberOfContours >= 0)
+                .simple
+            else
+                .composite;
+        }
+
+        /// Errors that can be returned from `Entry.size()`.
+        pub const SizeError = error{
+            /// The entry's data wasn't large enough, ran
+            /// out of bytes before we were done reading.
+            EndOfStream,
+
+            /// The entry contains hinting instructions,
+            /// which we don't currently support.
+            InstructionsNotSupported,
+
+            /// The entry is a composite glyph,
+            /// which we don't currently support.
+            CompositeNotSupported,
+
+            /// The elements of the end points array
+            /// must strictly monotonically increase.
+            ///
+            /// This error means the provided entry violated that.
+            EndPointsOutOfOrder,
+
+            /// This entry defines points past the index determined
+            /// by the final element of the endPtsOfContours array.
+            TooManyPoints,
+        };
+
+        /// Determines the size (in bytes) of this entry.
+        ///
+        /// If the entry is valid, returns the number of bytes
+        /// taken up by this entry, including its header.
+        ///
+        /// NOTE: Currently produces errors when given composite glyphs
+        ///       or any glyphs that have hinting instructions included.
+        pub fn size(self: Entry) SizeError!usize {
+            var fbs = std.io.fixedBufferStream(self.data);
+            const reader = fbs.reader();
+            switch (self.entryType()) {
+                // https://learn.microsoft.com/en-us/typography/opentype/spec/glyf#simple-glyph-description
+                .simple => {
+                    const num_contours: usize = @intCast(self.header.numberOfContours);
+                    // uint16 endPtsOfContours[numberOfContours]
+                    //
+                    // Array of point indices for the last point
+                    // of each contour, in increasing numeric order.
+                    var max_point_index: isize = -1;
+                    for (0..num_contours) |_| {
+                        const index = try reader.readInt(sfnt.uint16, .big);
+                        // The endpoints are supposed to monotonically increase.
+                        if (index <= max_point_index) return error.EndPointsOutOfOrder;
+                        max_point_index = index;
+                    }
+
+                    // uint16 instructionLength
+                    //
+                    // Total number of bytes for instructions.
+                    //
+                    // If instructionLength is zero, no instructions
+                    // are present for this glyph, and this field is
+                    // followed directly by the flags field.
+                    const instructions_length = try reader.readInt(sfnt.uint16, .big);
+
+                    // Since we don't have code that validates instruction
+                    // byte code, we just reject all glyphs that contain any.
+                    //
+                    // In the future we could change this to just ignore the
+                    // instructions, or even validate them, but for now this
+                    // is fine, since we only need this function at all to
+                    // validate glyf entries from the glyph protocol, which
+                    // explicitly forbids instructions anyway.
+                    if (instructions_length > 0) return error.InstructionsNotSupported;
+
+                    // uint8 flags[variable]
+                    //
+                    // Array of flag elements.
+                    //
+                    // ---
+                    //
+                    // We do additional accounting here to figure out how many
+                    // bytes the next two fields (the [x|y]Coordinates arrays)
+                    // should take, so that we can just try to throw out that
+                    // many bytes in order to validate them. This is because
+                    // the length of each one depends on the flags.
+                    //
+                    // We're using `i` here to count the number of logical
+                    // entries we have, which should reach the number of
+                    // points defined by the final endpoint (from earlier).
+                    var i: usize = 0;
+                    var x_coords_len: usize = 0;
+                    var y_coords_len: usize = 0;
+                    while (i <= max_point_index) : (i += 1) {
+                        const flag = try reader.readByte();
+
+                        // 0x02 X_SHORT_VECTOR
+                        //
+                        // Bit 1: If set, the corresponding x-coordinate
+                        // is 1 byte long, and the sign is determined by
+                        // the X_IS_SAME_OR_POSITIVE_X_SHORT_VECTOR flag.
+                        //
+                        // If not set, its interpretation depends on the
+                        // X_IS_SAME_OR_POSITIVE_X_SHORT_VECTOR flag:
+                        //
+                        // If that other flag is set, the x-coordinate is the
+                        // same as the previous x-coordinate, and no element
+                        // is added to the xCoordinates array.
+                        //
+                        // If both flags are not set, the corresponding
+                        // element in the xCoordinates array is two bytes
+                        // and interpreted as a signed integer.
+                        x_coords_len +=
+                            if (flag & 0x02 != 0) 1 else
+                            // 0x10 X_IS_SAME_OR_POSITIVE_X_SHORT_VECTOR
+                            if (flag & 0x10 != 0) 0 else 2;
+
+                        // 0x04 Y_SHORT_VECTOR
+                        //
+                        // See X_SHORT_VECTOR logic above for explanation.
+                        y_coords_len +=
+                            if (flag & 0x04 != 0) 1 else
+                            // 0x20 Y_IS_SAME_OR_POSITIVE_Y_SHORT_VECTOR
+                            if (flag & 0x20 != 0) 0 else 2;
+
+                        // 0x08 REPEAT_FLAG
+                        // Bit 3: If set, the next byte (read as unsigned)
+                        // specifies the number of additional times this flag
+                        // byte is to be repeated in the logical flags array
+                        // — that is, the number of additional logical flag
+                        // entries inserted after this entry.
+                        if (flag & 0x08 != 0) {
+                            i += try reader.readByte();
+
+                            // If the repeat count pushes our logical point
+                            // number beyond the max point index which we
+                            // figured out earlier from the end points, then
+                            // there's an issue with this entry, error out.
+                            if (i > max_point_index) return error.TooManyPoints;
+                        }
+                    }
+
+                    // uint8 or int16 xCoordinates[variable]
+                    //
+                    // Contour point x-coordinates.
+                    //
+                    // ---
+                    //
+                    // We determined the length of this section (in bytes)
+                    // above while processing the flags, so that we can just
+                    // skip that many bytes to validate this field.
+                    try reader.skipBytes(x_coords_len, .{});
+
+                    // uint8 or int16 yCoordinates[variable]
+                    //
+                    // Contour point y-coordinates.
+                    //
+                    // ---
+                    //
+                    // We determined the length of this section (in bytes)
+                    // above while processing the flags, so that we can just
+                    // skip that many bytes to validate this field.
+                    try reader.skipBytes(y_coords_len, .{});
+                },
+
+                .composite => {
+                    // We don't have code for validating composite glyphs,
+                    // mainly because we don't need it, since we only use
+                    // this function for the glyph protocol which explicitly
+                    // forbids composite glyphs anyway.
+                    //
+                    // So we return false for composite glyphs.
+                    return error.CompositeNotSupported;
+                },
+            }
+
+            // No issues found, the glyf entry is valid, return its length.
+            return @sizeOf(Header) + fbs.pos;
+        }
+    };
+
+    /// Initialize the table from the provided data.
+    ///
+    /// This DOES NOT COPY the data, it only stores a pointer to it.
+    ///
+    /// The lifetime of this struct, then, is the same as the
+    /// lifetime of the data that is used to initialize it.
+    pub fn init(data: []const u8) Glyf {
+        return .{ .data = data };
+    }
+
+    /// Retrieve the entry at the provided offset.
+    pub fn entry(self: Glyf, index: usize) error{EndOfStream}!Entry {
+        return try Entry.init(self.data[index..]);
+    }
+};
+
+/// TESTING ONLY
+///
+/// Retrieves the glyf at the provided index from the provided font.
+///
+/// Returns it in a tuple with the expected length based on the loca table, and the entry.
+pub fn getGlyph(font: sfnt.SFNT, index: usize) !struct { usize, Glyf.Entry } {
+    comptime if (!@import("builtin").is_test)
+        @compileError("This function is for testing only! It doesn't check bounds or anything!");
+
+    const glyf = Glyf.init(font.getTable("glyf").?);
+    const head = try @import("head.zig").Head.init(font.getTable("head").?);
+    const loca = font.getTable("loca").?;
+
+    const start_offset = switch (head.indexToLocFormat) {
+        0 => @as(usize, std.mem.bigToNative(
+            u16,
+            std.mem.bytesAsSlice(u16, loca)[index],
+        )) * 2,
+        1 => @as(usize, std.mem.bigToNative(
+            u32,
+            std.mem.bytesAsSlice(u32, loca)[index],
+        )),
+        else => unreachable,
+    };
+
+    const end_offset = switch (head.indexToLocFormat) {
+        0 => @as(usize, std.mem.bigToNative(
+            u16,
+            std.mem.bytesAsSlice(u16, loca)[index + 1],
+        )) * 2,
+        1 => @as(usize, std.mem.bigToNative(
+            u32,
+            std.mem.bytesAsSlice(u32, loca)[index + 1],
+        )),
+        else => unreachable,
+    };
+
+    return .{ end_offset - start_offset, try glyf.entry(start_offset) };
+}
+
+test "glyf" {
+    const testing = std.testing;
+    const alloc = testing.allocator;
+    // Cozette because it doesn't have any hinting.
+    const test_font = @import("../embedded.zig").cozette;
+
+    const font = try sfnt.SFNT.init(test_font, alloc);
+    defer font.deinit(alloc);
+
+    // Cozette doesn't actually include a glyph for notdef,
+    // but does include a glyph for `\0` (nul), at index 1.
+    const len_nul, const glyph_nul = try getGlyph(font, 1);
+    try testing.expect(glyph_nul.entryType() == .simple);
+    // It is legal for there to be extra data between two entries, just
+    // as long as the next entry starts after the previous one ends, so
+    // it's okay for the parsed size of the entry to be less than the size
+    // determined from the difference between subsequent loca offsets.
+    try testing.expect(len_nul >= try glyph_nul.size());
+
+    // Glyph "A" is at index 66.
+    const len_A, const glyph_A = try getGlyph(font, 66);
+    try testing.expect(glyph_A.entryType() == .simple);
+    try testing.expect(len_A >= try glyph_A.size());
+
+    // Glyph "Ĩ" is at index 265.
+    const len_Itilde, const glyph_Itilde = try getGlyph(font, 265);
+    try testing.expect(glyph_Itilde.entryType() == .simple);
+    try testing.expect(len_Itilde >= try glyph_Itilde.size());
+}
+
+test "glyf: reject glyphs with instructions and composite glyphs" {
+    const testing = std.testing;
+    const alloc = testing.allocator;
+    const test_font = @import("../embedded.zig").jetbrains_mono;
+
+    const font = try sfnt.SFNT.init(test_font, alloc);
+    defer font.deinit(alloc);
+
+    const len_notdef, const glyph_notdef = try getGlyph(font, 0);
+    try testing.expectEqual(100, len_notdef);
+    try testing.expect(glyph_notdef.entryType() == .simple);
+    try testing.expectError(
+        Glyf.Entry.SizeError.InstructionsNotSupported,
+        glyph_notdef.size(),
+    );
+
+    // Glyph "Á" is at index 2.
+    const len_Aacute, const glyph_Aacute = try getGlyph(font, 2);
+    try testing.expectEqual(24, len_Aacute);
+    try testing.expect(glyph_Aacute.entryType() == .composite);
+    try testing.expectError(
+        Glyf.Entry.SizeError.CompositeNotSupported,
+        glyph_Aacute.size(),
+    );
+}
+
+test "glyf: reject truncated" {
+    const testing = std.testing;
+    const alloc = testing.allocator;
+    // Cozette because it doesn't have any hinting.
+    const test_font = @import("../embedded.zig").cozette;
+
+    const font = try sfnt.SFNT.init(test_font, alloc);
+    defer font.deinit(alloc);
+
+    _, var glyph_nul = try getGlyph(font, 1);
+    try testing.expect(glyph_nul.entryType() == .simple);
+    // Mess with the entry's data slice, truncating
+    // it before the full length (which is 228 bytes).
+    glyph_nul.data = glyph_nul.data[0 .. 227 - @sizeOf(Glyf.Entry.Header)];
+    try testing.expectError(Glyf.Entry.SizeError.EndOfStream, glyph_nul.size());
+}
+
+test "glyf: reject endpoints out of order" {
+    const testing = std.testing;
+    const alloc = testing.allocator;
+    // Cozette because it doesn't have any hinting.
+    //
+    // Also we copy it with the allocator so we can mess with it.
+    const test_font = try alloc.dupe(u8, @import("../embedded.zig").cozette[0..]);
+    defer alloc.free(test_font);
+
+    const font = try sfnt.SFNT.init(test_font, alloc);
+    defer font.deinit(alloc);
+
+    _, var glyph_nul = try getGlyph(font, 1);
+    try testing.expect(glyph_nul.entryType() == .simple);
+    // Mess with the entry's data, insert a 0 in the middle of the endpoints.
+    //
+    // Because we know the underlying data is something we
+    // copied, we can just const cast it back to mutable lol.
+    std.mem.bytesAsSlice(u16, @as([]u8, @constCast(glyph_nul.data)))[3] = 0;
+    try testing.expectError(Glyf.Entry.SizeError.EndPointsOutOfOrder, glyph_nul.size());
+}
+
+test "glyf: reject too many points" {
+    const testing = std.testing;
+    const alloc = testing.allocator;
+    // Cozette because it doesn't have any hinting.
+    //
+    // Also we copy it with the allocator so we can mess with it.
+    const test_font = try alloc.dupe(u8, @import("../embedded.zig").cozette[0..]);
+    defer alloc.free(test_font);
+
+    const font = try sfnt.SFNT.init(test_font, alloc);
+    defer font.deinit(alloc);
+
+    _, var glyph_nul = try getGlyph(font, 1);
+    try testing.expect(glyph_nul.entryType() == .simple);
+    // Mess with the entry's data, make the final two bytes of the flags
+    // array be a large number repeat to exceed the correct points count.
+    //
+    // Because we know the underlying data is something we
+    // copied, we can just const cast it back to mutable lol.
+    @as([]u8, @constCast(glyph_nul.data))[107] |= 0x08;
+    @as([]u8, @constCast(glyph_nul.data))[108] = 0xFF;
+    try testing.expectError(Glyf.Entry.SizeError.TooManyPoints, glyph_nul.size());
+}
--- a/typos.toml
+++ b/typos.toml
@@ -76,6 +76,8 @@ GIR = "GIR"
 rin = "rin"
 # sprites
 ower = "ower"
+# OpenType table names
+loca = "loca"

 [type.po]
 extend-glob = ["*.po"]