diff --git a/src/font/opentype.zig b/src/font/opentype.zig index dd02efeb3..0cba92255 100644 --- a/src/font/opentype.zig +++ b/src/font/opentype.zig @@ -5,12 +5,14 @@ const os2 = @import("opentype/os2.zig"); const post = @import("opentype/post.zig"); const hhea = @import("opentype/hhea.zig"); const head = @import("opentype/head.zig"); +const glyf = @import("opentype/glyf.zig"); pub const SVG = svg.SVG; pub const OS2 = os2.OS2; pub const Post = post.Post; pub const Hhea = hhea.Hhea; pub const Head = head.Head; +pub const Glyf = glyf.Glyf; test { @import("std").testing.refAllDecls(@This()); diff --git a/src/font/opentype/glyf.zig b/src/font/opentype/glyf.zig new file mode 100644 index 000000000..4f60e5292 --- /dev/null +++ b/src/font/opentype/glyf.zig @@ -0,0 +1,434 @@ +const std = @import("std"); +const sfnt = @import("sfnt.zig"); + +/// Glyph Data Table +/// +/// This takes a little bit of a different form than other tables that we +/// have parsers for. Due to the fact that this table contains arrays of +/// arbitrary length, we store a pointer (slice) to the underlying data, +/// and then have functions for getting and interpreting specific parts. +/// +/// References: +/// - https://learn.microsoft.com/en-us/typography/opentype/spec/glyf +/// +/// Field names are in camelCase to match names in spec. +pub const Glyf = struct { + data: []const u8, + + /// https://learn.microsoft.com/en-us/typography/opentype/spec/glyf#table-organization + pub const Entry = struct { + header: Header, + + /// We store a reference to the original bytes so that we can + /// validate or iterate the contours or components of the glyph. + /// + /// This data starts immediately after the header. + data: []const u8, + + /// The header that's always present at + /// the start of any glyph in the table. + /// + /// Depending on the number of contours, the data that + /// comes afterwards must be interpreted differently. + /// + /// References: + /// - https://learn.microsoft.com/en-us/typography/opentype/spec/glyf#glyph-headers + pub const Header = extern struct { + /// If the number of contours is greater than + /// or equal to zero, this is a simple glyph. + /// + /// If negative, this is a composite glyph — the + /// value -1 should be used for composite glyphs. + numberOfContours: sfnt.int16 align(1), + + /// Minimum x for coordinate data. + xMin: sfnt.int16 align(1), + + /// Minimum y for coordinate data. + yMin: sfnt.int16 align(1), + + /// Maximum x for coordinate data. + xMax: sfnt.int16 align(1), + + /// Maximum y for coordinate data. + yMax: sfnt.int16 align(1), + }; + + pub const Type = enum { + /// A glyph made of standard contours. + simple, + /// A glyph made of references to other glyphs. + composite, + }; + + /// Initialize an entry from the provided data. + /// + /// This DOES NOT COPY the data, it only stores a pointer to it. + /// + /// The lifetime of this struct, then, is the same as the + /// lifetime of the data that is used to initialize it. + pub fn init(data: []const u8) error{EndOfStream}!Entry { + var fbs = std.io.fixedBufferStream(data); + const reader = fbs.reader(); + const header = try reader.readStructEndian(Header, .big); + return .{ .header = header, .data = data[fbs.pos..] }; + } + + /// Identifies what type (simple or composite) of entry this is. + pub fn entryType(self: Entry) Type { + return if (self.header.numberOfContours >= 0) + .simple + else + .composite; + } + + /// Errors that can be returned from `Entry.size()`. + pub const SizeError = error{ + /// The entry's data wasn't large enough, ran + /// out of bytes before we were done reading. + EndOfStream, + + /// The entry contains hinting instructions, + /// which we don't currently support. + InstructionsNotSupported, + + /// The entry is a composite glyph, + /// which we don't currently support. + CompositeNotSupported, + + /// The elements of the end points array + /// must strictly monotonically increase. + /// + /// This error means the provided entry violated that. + EndPointsOutOfOrder, + + /// This entry defines points past the index determined + /// by the final element of the endPtsOfContours array. + TooManyPoints, + }; + + /// Determines the size (in bytes) of this entry. + /// + /// If the entry is valid, returns the number of bytes + /// taken up by this entry, including its header. + /// + /// NOTE: Currently produces errors when given composite glyphs + /// or any glyphs that have hinting instructions included. + pub fn size(self: Entry) SizeError!usize { + var fbs = std.io.fixedBufferStream(self.data); + const reader = fbs.reader(); + switch (self.entryType()) { + // https://learn.microsoft.com/en-us/typography/opentype/spec/glyf#simple-glyph-description + .simple => { + const num_contours: usize = @intCast(self.header.numberOfContours); + // uint16 endPtsOfContours[numberOfContours] + // + // Array of point indices for the last point + // of each contour, in increasing numeric order. + var max_point_index: isize = -1; + for (0..num_contours) |_| { + const index = try reader.readInt(sfnt.uint16, .big); + // The endpoints are supposed to monotonically increase. + if (index <= max_point_index) return error.EndPointsOutOfOrder; + max_point_index = index; + } + + // uint16 instructionLength + // + // Total number of bytes for instructions. + // + // If instructionLength is zero, no instructions + // are present for this glyph, and this field is + // followed directly by the flags field. + const instructions_length = try reader.readInt(sfnt.uint16, .big); + + // Since we don't have code that validates instruction + // byte code, we just reject all glyphs that contain any. + // + // In the future we could change this to just ignore the + // instructions, or even validate them, but for now this + // is fine, since we only need this function at all to + // validate glyf entries from the glyph protocol, which + // explicitly forbids instructions anyway. + if (instructions_length > 0) return error.InstructionsNotSupported; + + // uint8 flags[variable] + // + // Array of flag elements. + // + // --- + // + // We do additional accounting here to figure out how many + // bytes the next two fields (the [x|y]Coordinates arrays) + // should take, so that we can just try to throw out that + // many bytes in order to validate them. This is because + // the length of each one depends on the flags. + // + // We're using `i` here to count the number of logical + // entries we have, which should reach the number of + // points defined by the final endpoint (from earlier). + var i: usize = 0; + var x_coords_len: usize = 0; + var y_coords_len: usize = 0; + while (i <= max_point_index) : (i += 1) { + const flag = try reader.readByte(); + + // 0x02 X_SHORT_VECTOR + // + // Bit 1: If set, the corresponding x-coordinate + // is 1 byte long, and the sign is determined by + // the X_IS_SAME_OR_POSITIVE_X_SHORT_VECTOR flag. + // + // If not set, its interpretation depends on the + // X_IS_SAME_OR_POSITIVE_X_SHORT_VECTOR flag: + // + // If that other flag is set, the x-coordinate is the + // same as the previous x-coordinate, and no element + // is added to the xCoordinates array. + // + // If both flags are not set, the corresponding + // element in the xCoordinates array is two bytes + // and interpreted as a signed integer. + x_coords_len += + if (flag & 0x02 != 0) 1 else + // 0x10 X_IS_SAME_OR_POSITIVE_X_SHORT_VECTOR + if (flag & 0x10 != 0) 0 else 2; + + // 0x04 Y_SHORT_VECTOR + // + // See X_SHORT_VECTOR logic above for explanation. + y_coords_len += + if (flag & 0x04 != 0) 1 else + // 0x20 Y_IS_SAME_OR_POSITIVE_Y_SHORT_VECTOR + if (flag & 0x20 != 0) 0 else 2; + + // 0x08 REPEAT_FLAG + // Bit 3: If set, the next byte (read as unsigned) + // specifies the number of additional times this flag + // byte is to be repeated in the logical flags array + // — that is, the number of additional logical flag + // entries inserted after this entry. + if (flag & 0x08 != 0) { + i += try reader.readByte(); + + // If the repeat count pushes our logical point + // number beyond the max point index which we + // figured out earlier from the end points, then + // there's an issue with this entry, error out. + if (i > max_point_index) return error.TooManyPoints; + } + } + + // uint8 or int16 xCoordinates[variable] + // + // Contour point x-coordinates. + // + // --- + // + // We determined the length of this section (in bytes) + // above while processing the flags, so that we can just + // skip that many bytes to validate this field. + try reader.skipBytes(x_coords_len, .{}); + + // uint8 or int16 yCoordinates[variable] + // + // Contour point y-coordinates. + // + // --- + // + // We determined the length of this section (in bytes) + // above while processing the flags, so that we can just + // skip that many bytes to validate this field. + try reader.skipBytes(y_coords_len, .{}); + }, + + .composite => { + // We don't have code for validating composite glyphs, + // mainly because we don't need it, since we only use + // this function for the glyph protocol which explicitly + // forbids composite glyphs anyway. + // + // So we return false for composite glyphs. + return error.CompositeNotSupported; + }, + } + + // No issues found, the glyf entry is valid, return its length. + return @sizeOf(Header) + fbs.pos; + } + }; + + /// Initialize the table from the provided data. + /// + /// This DOES NOT COPY the data, it only stores a pointer to it. + /// + /// The lifetime of this struct, then, is the same as the + /// lifetime of the data that is used to initialize it. + pub fn init(data: []const u8) Glyf { + return .{ .data = data }; + } + + /// Retrieve the entry at the provided offset. + pub fn entry(self: Glyf, index: usize) error{EndOfStream}!Entry { + return try Entry.init(self.data[index..]); + } +}; + +/// TESTING ONLY +/// +/// Retrieves the glyf at the provided index from the provided font. +/// +/// Returns it in a tuple with the expected length based on the loca table, and the entry. +pub fn getGlyph(font: sfnt.SFNT, index: usize) !struct { usize, Glyf.Entry } { + comptime if (!@import("builtin").is_test) + @compileError("This function is for testing only! It doesn't check bounds or anything!"); + + const glyf = Glyf.init(font.getTable("glyf").?); + const head = try @import("head.zig").Head.init(font.getTable("head").?); + const loca = font.getTable("loca").?; + + const start_offset = switch (head.indexToLocFormat) { + 0 => @as(usize, std.mem.bigToNative( + u16, + std.mem.bytesAsSlice(u16, loca)[index], + )) * 2, + 1 => @as(usize, std.mem.bigToNative( + u32, + std.mem.bytesAsSlice(u32, loca)[index], + )), + else => unreachable, + }; + + const end_offset = switch (head.indexToLocFormat) { + 0 => @as(usize, std.mem.bigToNative( + u16, + std.mem.bytesAsSlice(u16, loca)[index + 1], + )) * 2, + 1 => @as(usize, std.mem.bigToNative( + u32, + std.mem.bytesAsSlice(u32, loca)[index + 1], + )), + else => unreachable, + }; + + return .{ end_offset - start_offset, try glyf.entry(start_offset) }; +} + +test "glyf" { + const testing = std.testing; + const alloc = testing.allocator; + // Cozette because it doesn't have any hinting. + const test_font = @import("../embedded.zig").cozette; + + const font = try sfnt.SFNT.init(test_font, alloc); + defer font.deinit(alloc); + + // Cozette doesn't actually include a glyph for notdef, + // but does include a glyph for `\0` (nul), at index 1. + const len_nul, const glyph_nul = try getGlyph(font, 1); + try testing.expect(glyph_nul.entryType() == .simple); + // It is legal for there to be extra data between two entries, just + // as long as the next entry starts after the previous one ends, so + // it's okay for the parsed size of the entry to be less than the size + // determined from the difference between subsequent loca offsets. + try testing.expect(len_nul >= try glyph_nul.size()); + + // Glyph "A" is at index 66. + const len_A, const glyph_A = try getGlyph(font, 66); + try testing.expect(glyph_A.entryType() == .simple); + try testing.expect(len_A >= try glyph_A.size()); + + // Glyph "Ĩ" is at index 265. + const len_Itilde, const glyph_Itilde = try getGlyph(font, 265); + try testing.expect(glyph_Itilde.entryType() == .simple); + try testing.expect(len_Itilde >= try glyph_Itilde.size()); +} + +test "glyf: reject glyphs with instructions and composite glyphs" { + const testing = std.testing; + const alloc = testing.allocator; + const test_font = @import("../embedded.zig").jetbrains_mono; + + const font = try sfnt.SFNT.init(test_font, alloc); + defer font.deinit(alloc); + + const len_notdef, const glyph_notdef = try getGlyph(font, 0); + try testing.expectEqual(100, len_notdef); + try testing.expect(glyph_notdef.entryType() == .simple); + try testing.expectError( + Glyf.Entry.SizeError.InstructionsNotSupported, + glyph_notdef.size(), + ); + + // Glyph "Á" is at index 2. + const len_Aacute, const glyph_Aacute = try getGlyph(font, 2); + try testing.expectEqual(24, len_Aacute); + try testing.expect(glyph_Aacute.entryType() == .composite); + try testing.expectError( + Glyf.Entry.SizeError.CompositeNotSupported, + glyph_Aacute.size(), + ); +} + +test "glyf: reject truncated" { + const testing = std.testing; + const alloc = testing.allocator; + // Cozette because it doesn't have any hinting. + const test_font = @import("../embedded.zig").cozette; + + const font = try sfnt.SFNT.init(test_font, alloc); + defer font.deinit(alloc); + + _, var glyph_nul = try getGlyph(font, 1); + try testing.expect(glyph_nul.entryType() == .simple); + // Mess with the entry's data slice, truncating + // it before the full length (which is 228 bytes). + glyph_nul.data = glyph_nul.data[0 .. 227 - @sizeOf(Glyf.Entry.Header)]; + try testing.expectError(Glyf.Entry.SizeError.EndOfStream, glyph_nul.size()); +} + +test "glyf: reject endpoints out of order" { + const testing = std.testing; + const alloc = testing.allocator; + // Cozette because it doesn't have any hinting. + // + // Also we copy it with the allocator so we can mess with it. + const test_font = try alloc.dupe(u8, @import("../embedded.zig").cozette[0..]); + defer alloc.free(test_font); + + const font = try sfnt.SFNT.init(test_font, alloc); + defer font.deinit(alloc); + + _, var glyph_nul = try getGlyph(font, 1); + try testing.expect(glyph_nul.entryType() == .simple); + // Mess with the entry's data, insert a 0 in the middle of the endpoints. + // + // Because we know the underlying data is something we + // copied, we can just const cast it back to mutable lol. + std.mem.bytesAsSlice(u16, @as([]u8, @constCast(glyph_nul.data)))[3] = 0; + try testing.expectError(Glyf.Entry.SizeError.EndPointsOutOfOrder, glyph_nul.size()); +} + +test "glyf: reject too many points" { + const testing = std.testing; + const alloc = testing.allocator; + // Cozette because it doesn't have any hinting. + // + // Also we copy it with the allocator so we can mess with it. + const test_font = try alloc.dupe(u8, @import("../embedded.zig").cozette[0..]); + defer alloc.free(test_font); + + const font = try sfnt.SFNT.init(test_font, alloc); + defer font.deinit(alloc); + + _, var glyph_nul = try getGlyph(font, 1); + try testing.expect(glyph_nul.entryType() == .simple); + // Mess with the entry's data, make the final two bytes of the flags + // array be a large number repeat to exceed the correct points count. + // + // Because we know the underlying data is something we + // copied, we can just const cast it back to mutable lol. + @as([]u8, @constCast(glyph_nul.data))[107] |= 0x08; + @as([]u8, @constCast(glyph_nul.data))[108] = 0xFF; + try testing.expectError(Glyf.Entry.SizeError.TooManyPoints, glyph_nul.size()); +} diff --git a/typos.toml b/typos.toml index a2f4129cf..42303dd00 100644 --- a/typos.toml +++ b/typos.toml @@ -76,6 +76,8 @@ GIR = "GIR" rin = "rin" # sprites ower = "ower" +# OpenType table names +loca = "loca" [type.po] extend-glob = ["*.po"]