unicode: don't narrow invalid text presentation (VS15) sequences

This commit is contained in:
Jacob Sandlund
2025-11-23 20:39:35 -05:00
parent 97926ca307
commit 36c3295806
4 changed files with 82 additions and 31 deletions

View File

@@ -16,15 +16,13 @@ pub const Properties = packed struct {
grapheme_boundary_class: GraphemeBoundaryClass = .invalid,
/// Emoji VS compatibility
emoji_vs_text: bool = false,
emoji_vs_emoji: bool = false,
emoji_vs_base: bool = false,
// Needed for lut.Generator
pub fn eql(a: Properties, b: Properties) bool {
return a.width == b.width and
a.grapheme_boundary_class == b.grapheme_boundary_class and
a.emoji_vs_text == b.emoji_vs_text and
a.emoji_vs_emoji == b.emoji_vs_emoji;
a.emoji_vs_base == b.emoji_vs_base;
}
// Needed for lut.Generator
@@ -36,14 +34,12 @@ pub const Properties = packed struct {
\\.{{
\\ .width= {},
\\ .grapheme_boundary_class= .{s},
\\ .emoji_vs_text= {},
\\ .emoji_vs_emoji= {},
\\ .emoji_vs_base= {},
\\}}
, .{
self.width,
@tagName(self.grapheme_boundary_class),
self.emoji_vs_text,
self.emoji_vs_emoji,
self.emoji_vs_base,
});
}
};

View File

@@ -48,15 +48,13 @@ pub fn get(cp: u21) Properties {
if (cp > uucode.config.max_code_point) return .{
.width = 1,
.grapheme_boundary_class = .invalid,
.emoji_vs_text = false,
.emoji_vs_emoji = false,
.emoji_vs_base = false,
};
return .{
.width = uucode.get(.width, cp),
.grapheme_boundary_class = graphemeBoundaryClass(cp),
.emoji_vs_text = uucode.get(.is_emoji_vs_text, cp),
.emoji_vs_emoji = uucode.get(.is_emoji_vs_emoji, cp),
.emoji_vs_base = uucode.get(.is_emoji_vs_base, cp),
};
}