font: rework coretext discovery sorting

This should make the sorting more robust to fonts with questionable metadata or atypical style names. I was originally just going to change the scoring slightly to account for fonts whose regular italic style is named "Regular Italic" - which previously resulted in the Bold Italic or Thin Italic style being chosen instead because they're shorter names, but I decided to do some better inspection of the metadata and looser style name matching while I was changing code here anyway. Also adds a unit test to verify the sorting works correctly, though a more comprehensive set of tests may be desirable in the future.
2025-09-05 19:08:17 +00:00 · 2025-05-30 14:59:53 -06:00
parent 9b45638c15
commit 34f08a450e
1 changed files with 316 additions and 116 deletions
--- a/src/font/discovery.zig
+++ b/src/font/discovery.zig
@@ -4,6 +4,7 @@ const Allocator = std.mem.Allocator;
 const assert = std.debug.assert;
 const fontconfig = @import("fontconfig");
 const macos = @import("macos");
+const opentype = @import("opentype.zig");
 const options = @import("main.zig").options;
 const Collection = @import("main.zig").Collection;
 const DeferredFace = @import("main.zig").DeferredFace;
@@ -562,149 +563,266 @@ pub const CoreText = struct {
        desc: *const Descriptor,
        list: []*macos.text.FontDescriptor,
    ) void {
-        var desc_mut = desc.*;
-        if (desc_mut.style == null) {
-            // If there is no explicit style set, we set a preferred
-            // based on the style bool attributes.
-            //
-            // TODO: doesn't handle i18n font names well, we should have
-            // another mechanism that uses the weight attribute if it exists.
-            // Wait for this to be a real problem.
-            desc_mut.style = if (desc_mut.bold and desc_mut.italic)
-                "Bold Italic"
-            else if (desc_mut.bold)
-                "Bold"
-            else if (desc_mut.italic)
-                "Italic"
-            else
-                null;
-        }
-
-        std.mem.sortUnstable(*macos.text.FontDescriptor, list, &desc_mut, struct {
+        std.mem.sortUnstable(*macos.text.FontDescriptor, list, desc, struct {
            fn lessThan(
                desc_inner: *const Descriptor,
                lhs: *macos.text.FontDescriptor,
                rhs: *macos.text.FontDescriptor,
            ) bool {
-                const lhs_score = score(desc_inner, lhs);
-                const rhs_score = score(desc_inner, rhs);
+                const lhs_score: Score = .score(desc_inner, lhs);
+                const rhs_score: Score = .score(desc_inner, rhs);
                // Higher score is "less" (earlier)
                return lhs_score.int() > rhs_score.int();
            }
        }.lessThan);
    }

-    /// We represent our sorting score as a packed struct so that we can
-    /// compare scores numerically but build scores symbolically.
+    /// We represent our sorting score as a packed struct so that we
+    /// can compare scores numerically but build scores symbolically.
+    ///
+    /// Note that packed structs store their fields from least to most
+    /// significant, so the fields here are defined in increasing order
+    /// of precedence.
    const Score = packed struct {
        const Backing = @typeInfo(@This()).@"struct".backing_integer.?;

-        glyph_count: u16 = 0, // clamped if > intmax
-        traits: Traits = .unmatched,
-        style: Style = .unmatched,
+        /// Number of glyphs in the font, if two fonts have identical
+        /// scores otherwise then we prefer the one with more glyphs.
+        ///
+        /// (Number of glyphs clamped at u16 intmax)
+        glyph_count: u16 = 0,
+        /// A fuzzy match on the style string, less important than
+        /// an exact match, and less important than trait matches.
+        fuzzy_style: u8 = 0,
+        /// Whether the bold-ness of the font matches the descriptor.
+        /// This is less important than italic because a font that's italic
+        /// when it shouldn't be or not italic when it should be is a bigger
+        /// problem (subjectively) than being the wrong weight.
+        bold: bool = false,
+        /// Whether the italic-ness of the font matches the descriptor.
+        /// This is less important than an exact match on the style string
+        /// because we want users to be allowed to override trait matching
+        /// for the bold/italic/bold italic styles if they want.
+        italic: bool = false,
+        /// An exact (case-insensitive) match on the style string.
+        exact_style: bool = false,
+        /// Whether the font is monospace, this is more important than any of
+        /// the other fields unless we're looking for a specific codepoint,
+        /// in which case that is the most important thing.
        monospace: bool = false,
+        /// If we're looking for a codepoint, whether this font has it.
        codepoint: bool = false,

-        const Traits = enum(u8) { unmatched = 0, _ };
-        const Style = enum(u8) { unmatched = 0, match = 0xFF, _ };
-
        pub fn int(self: Score) Backing {
            return @bitCast(self);
        }
-    };

-    fn score(desc: *const Descriptor, ct_desc: *const macos.text.FontDescriptor) Score {
-        var score_acc: Score = .{};
+        fn score(desc: *const Descriptor, ct_desc: *const macos.text.FontDescriptor) Score {
+            var self: Score = .{};

-        // We always load the font if we can since some things can only be
-        // inspected on the font itself.
-        const font_: ?*macos.text.Font = macos.text.Font.createWithFontDescriptor(
-            ct_desc,
-            12,
-        ) catch null;
-        defer if (font_) |font| font.release();
+            // We always load the font if we can since some things can only be
+            // inspected on the font itself. Fonts that can't be loaded score
+            // 0 automatically because we don't want a font we can't load.
+            const font: *macos.text.Font = macos.text.Font.createWithFontDescriptor(
+                ct_desc,
+                12,
+            ) catch return self;
+            defer font.release();

-        // If we have a font, prefer the font with more glyphs.
-        if (font_) |font| {
-            const Type = @TypeOf(score_acc.glyph_count);
-            score_acc.glyph_count = std.math.cast(
-                Type,
-                font.getGlyphCount(),
-            ) orelse std.math.maxInt(Type);
-        }
-
-        // If we're searching for a codepoint, prioritize fonts that
-        // have that codepoint.
-        if (desc.codepoint > 0) codepoint: {
-            const font = font_ orelse break :codepoint;
-
-            // Turn UTF-32 into UTF-16 for CT API
-            var unichars: [2]u16 = undefined;
-            const pair = macos.foundation.stringGetSurrogatePairForLongCharacter(
-                desc.codepoint,
-                &unichars,
-            );
-            const len: usize = if (pair) 2 else 1;
-
-            // Get our glyphs
-            var glyphs = [2]macos.graphics.Glyph{ 0, 0 };
-            score_acc.codepoint = font.getGlyphsForCharacters(unichars[0..len], glyphs[0..len]);
-        }
-
-        // Get our symbolic traits for the descriptor so we can compare
-        // boolean attributes like bold, monospace, etc.
-        const symbolic_traits: macos.text.FontSymbolicTraits = traits: {
-            const traits = ct_desc.copyAttribute(.traits) orelse break :traits .{};
-            defer traits.release();
-
-            const key = macos.text.FontTraitKey.symbolic.key();
-            const symbolic = traits.getValue(macos.foundation.Number, key) orelse
-                break :traits .{};
-
-            break :traits macos.text.FontSymbolicTraits.init(symbolic);
-        };
-
-        score_acc.monospace = symbolic_traits.monospace;
-
-        score_acc.style = style: {
-            const style = ct_desc.copyAttribute(.style_name) orelse
-                break :style .unmatched;
-            defer style.release();
-
-            // Get our style string
-            var buf: [128]u8 = undefined;
-            const style_str = style.cstring(&buf, .utf8) orelse break :style .unmatched;
-
-            // If we have a specific desired style, attempt to search for that.
-            if (desc.style) |desired_style| {
-                // Matching style string gets highest score
-                if (std.mem.eql(u8, desired_style, style_str)) break :style .match;
-            } else if (!desc.bold and !desc.italic) {
-                // If we do not, and we have no symbolic traits, then we try
-                // to find "regular" (or no style). If we have symbolic traits
-                // we do nothing but we can improve scoring by taking that into
-                // account, too.
-                if (std.mem.eql(u8, "Regular", style_str)) {
-                    break :style .match;
-                }
+            // We prefer fonts with more glyphs, all else being equal.
+            {
+                const Type = @TypeOf(self.glyph_count);
+                self.glyph_count = std.math.cast(
+                    Type,
+                    font.getGlyphCount(),
+                ) orelse std.math.maxInt(Type);
            }

-            // Otherwise the score is based on the length of the style string.
-            // Shorter styles are scored higher. This is a heuristic that
-            // if we don't have a desired style then shorter tends to be
-            // more often the "regular" style.
-            break :style @enumFromInt(100 -| style_str.len);
-        };
+            // If we're searching for a codepoint, then we
+            // prioritize fonts that have that codepoint.
+            if (desc.codepoint > 0) {
+                // Turn UTF-32 into UTF-16 for CT API
+                var unichars: [2]u16 = undefined;
+                const pair = macos.foundation.stringGetSurrogatePairForLongCharacter(
+                    desc.codepoint,
+                    &unichars,
+                );
+                const len: usize = if (pair) 2 else 1;

-        score_acc.traits = traits: {
-            var count: u8 = 0;
-            if (desc.bold == symbolic_traits.bold) count += 1;
-            if (desc.italic == symbolic_traits.italic) count += 1;
-            break :traits @enumFromInt(count);
-        };
+                // Get our glyphs
+                var glyphs = [2]macos.graphics.Glyph{ 0, 0 };
+                self.codepoint = font.getGlyphsForCharacters(
+                    unichars[0..len],
+                    glyphs[0..len],
+                );
+            }

-        return score_acc;
-    }
+            // Get our symbolic traits for the descriptor so we can
+            // compare boolean attributes like bold, monospace, etc.
+            const symbolic_traits: macos.text.FontSymbolicTraits = traits: {
+                const traits = ct_desc.copyAttribute(.traits) orelse break :traits .{};
+                defer traits.release();
+
+                const key = macos.text.FontTraitKey.symbolic.key();
+                const symbolic = traits.getValue(macos.foundation.Number, key) orelse
+                    break :traits .{};
+
+                break :traits macos.text.FontSymbolicTraits.init(symbolic);
+            };
+
+            self.monospace = symbolic_traits.monospace;
+
+            // We try to derived data from the font itself, which is generally
+            // more reliable than only using the symbolic traits for this.
+            const is_bold: bool, const is_italic: bool = derived: {
+                // We start with initial guesses based on the symbolic traits,
+                // but refine these with more information if we can get it.
+                var is_italic = symbolic_traits.italic;
+                var is_bold = symbolic_traits.bold;
+
+                // Read the 'head' table out of the font data if it's available.
+                if (head: {
+                    const tag = macos.text.FontTableTag.init("head");
+                    const data = font.copyTable(tag) orelse break :head null;
+                    defer data.release();
+                    const ptr = data.getPointer();
+                    const len = data.getLength();
+                    break :head opentype.Head.init(ptr[0..len]) catch |err| {
+                        log.warn("error parsing head table: {}", .{err});
+                        break :head null;
+                    };
+                }) |head_| {
+                    const head: opentype.Head = head_;
+                    is_bold = is_bold or (head.macStyle & 1 == 1);
+                    is_italic = is_italic or (head.macStyle & 2 == 2);
+                }
+
+                // Read the 'OS/2' table out of the font data if it's available.
+                if (os2: {
+                    const tag = macos.text.FontTableTag.init("OS/2");
+                    const data = font.copyTable(tag) orelse break :os2 null;
+                    defer data.release();
+                    const ptr = data.getPointer();
+                    const len = data.getLength();
+                    break :os2 opentype.OS2.init(ptr[0..len]) catch |err| {
+                        log.warn("error parsing OS/2 table: {}", .{err});
+                        break :os2 null;
+                    };
+                }) |os2| {
+                    is_bold = is_bold or os2.fsSelection.bold;
+                    is_italic = is_italic or os2.fsSelection.italic;
+                }
+
+                // Check if we have variation axes in our descriptor, if we
+                // do then we can derive weight italic-ness or both from them.
+                if (font.copyAttribute(.variation_axes)) |axes| variations: {
+                    defer axes.release();
+
+                    // Copy the variation values for this instance of the font.
+                    // if there are none then we just break out immediately.
+                    const values: *macos.foundation.Dictionary =
+                        font.copyAttribute(.variation) orelse break :variations;
+                    defer values.release();
+
+                    var buf: [1024]u8 = undefined;
+
+                    // If we see the 'ital' value then we ignore 'slnt'.
+                    var ital_seen = false;
+
+                    const len = axes.getCount();
+                    for (0..len) |i| {
+                        const dict = axes.getValueAtIndex(macos.foundation.Dictionary, i);
+                        const Key = macos.text.FontVariationAxisKey;
+                        const cf_id = dict.getValue(Key.identifier.Value(), Key.identifier.key()).?;
+                        const cf_name = dict.getValue(Key.name.Value(), Key.name.key()).?;
+                        const cf_def = dict.getValue(Key.default_value.Value(), Key.default_value.key()).?;
+
+                        const name_str = cf_name.cstring(&buf, .utf8) orelse "";
+
+                        // Default value
+                        var def: f64 = 0;
+                        _ = cf_def.getValue(.double, &def);
+                        // Value in this font
+                        var val: f64 = def;
+                        if (values.getValue(
+                            macos.foundation.Number,
+                            cf_id,
+                        )) |cf_val| _ = cf_val.getValue(.double, &val);
+
+                        if (std.mem.eql(u8, "wght", name_str)) {
+                            // Somewhat subjective threshold, we consider fonts
+                            // bold if they have a 'wght' set greater than 600.
+                            is_bold = val > 600;
+                            continue;
+                        }
+                        if (std.mem.eql(u8, "ital", name_str)) {
+                            is_italic = val > 0.5;
+                            ital_seen = true;
+                            continue;
+                        }
+                        if (!ital_seen and std.mem.eql(u8, "slnt", name_str)) {
+                            // Arbitrary threshold of anything more than a 5
+                            // degree clockwise slant is considered italic.
+                            is_italic = val <= -5.0;
+                            continue;
+                        }
+                    }
+                }
+
+                break :derived .{ is_bold, is_italic };
+            };
+
+            self.bold = desc.bold == is_bold;
+            self.italic = desc.italic == is_italic;
+
+            // Get the style string from the font.
+            var style_str_buf: [128]u8 = undefined;
+            const style_str: []const u8 = style_str: {
+                const style = ct_desc.copyAttribute(.style_name) orelse
+                    break :style_str "";
+                defer style.release();
+
+                break :style_str style.cstring(&style_str_buf, .utf8) orelse "";
+            };
+
+            // The first string in this slice will be used for the exact match,
+            // and for the fuzzy match, all matching substrings will increase
+            // the rank.
+            const desired_styles: []const [:0]const u8 = desired: {
+                if (desc.style) |s| break :desired &.{s};
+
+                // If we don't have an explicitly desired style name, we base
+                // it on the bold and italic properties, this isn't ideal since
+                // fonts may use style names other than these, but it helps in
+                // some edge cases.
+                if (desc.bold) {
+                    if (desc.italic) break :desired &.{ "bold italic", "bold", "italic", "oblique" };
+                    break :desired &.{ "bold", "upright" };
+                } else if (desc.italic) {
+                    break :desired &.{ "italic", "regular", "oblique" };
+                }
+                break :desired &.{ "regular", "upright" };
+            };
+
+            self.exact_style = std.ascii.eqlIgnoreCase(
+                style_str,
+                desired_styles[0],
+            );
+            // Our "fuzzy match" score is 0 if the desired style isn't present
+            // in the string, otherwise we give higher priority for styles that
+            // have fewer characters not in the desired_styles list.
+            const fuzzy_type = @TypeOf(self.fuzzy_style);
+            self.fuzzy_style = @intCast(style_str.len);
+            for (desired_styles) |s| {
+                if (std.ascii.indexOfIgnoreCase(style_str, s) != null) {
+                    self.fuzzy_style -|= @intCast(s.len);
+                }
+            }
+            self.fuzzy_style = std.math.maxInt(fuzzy_type) -| self.fuzzy_style;
+
+            return self;
+        }
+    };

    pub const DiscoverIterator = struct {
        alloc: Allocator,
@@ -837,3 +955,85 @@ test "coretext codepoint" {
    // Should have other codepoints too
    try testing.expect(face.hasCodepoint('B', null));
 }
+
+test "coretext sorting" {
+    if (options.backend != .coretext and options.backend != .coretext_freetype)
+        return error.SkipZigTest;
+
+    // !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!//
+    // FIXME: Disabled for now because SF Pro is not available in CI
+    //        The solution likely involves directly testing that the
+    //        `sortMatchingDescriptors` function sorts a bundled test
+    //        font correctly, instead of relying on the system fonts.
+    if (true) return error.SkipZigTest;
+    // !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!//
+
+    const testing = std.testing;
+    const alloc = testing.allocator;
+
+    var ct = CoreText.init();
+    defer ct.deinit();
+
+    // We try to get a Regular, Italic, Bold, & Bold Italic version of SF Pro,
+    // which should be installed on all Macs, and has many styles which makes
+    // it a good test, since there will be many results for each discovery.
+
+    // Regular
+    {
+        var it = try ct.discover(alloc, .{
+            .family = "SF Pro",
+            .size = 12,
+        });
+        defer it.deinit();
+        const res = (try it.next()).?;
+        var buf: [1024]u8 = undefined;
+        const name = try res.name(&buf);
+        try testing.expectEqualStrings("SF Pro Regular", name);
+    }
+
+    // Regular Italic
+    //
+    // NOTE: This makes sure that we don't accidentally prefer "Thin Italic",
+    //       which we previously did, because it has a shorter name.
+    {
+        var it = try ct.discover(alloc, .{
+            .family = "SF Pro",
+            .size = 12,
+            .italic = true,
+        });
+        defer it.deinit();
+        const res = (try it.next()).?;
+        var buf: [1024]u8 = undefined;
+        const name = try res.name(&buf);
+        try testing.expectEqualStrings("SF Pro Regular Italic", name);
+    }
+
+    // Bold
+    {
+        var it = try ct.discover(alloc, .{
+            .family = "SF Pro",
+            .size = 12,
+            .bold = true,
+        });
+        defer it.deinit();
+        const res = (try it.next()).?;
+        var buf: [1024]u8 = undefined;
+        const name = try res.name(&buf);
+        try testing.expectEqualStrings("SF Pro Bold", name);
+    }
+
+    // Bold Italic
+    {
+        var it = try ct.discover(alloc, .{
+            .family = "SF Pro",
+            .size = 12,
+            .bold = true,
+            .italic = true,
+        });
+        defer it.deinit();
+        const res = (try it.next()).?;
+        var buf: [1024]u8 = undefined;
+        const name = try res.name(&buf);
+        try testing.expectEqualStrings("SF Pro Bold Italic", name);
+    }
+}