mirror of
https://github.com/odin-lang/Odin.git
synced 2026-04-19 13:00:28 +00:00
is_symbol uses generated Unicode tables
Tables for Sm, Sc, Sk, So are now being generated. These tables are now being searched by is_symbol
This commit is contained in:
committed by
Jeroen van Rijn
parent
8f5f1ee1cc
commit
65d0e5c82f
@@ -1442,6 +1442,307 @@ ps_ranges := Range{
|
||||
single_16 = ps_singles16[:],
|
||||
}
|
||||
|
||||
@(rodata)
|
||||
sc_singles16 := [?]u16{
|
||||
0x0024, 0x058F, 0x060B, 0x09FB, 0x0AF1, 0x0BF9, 0x0E3F, 0x17DB, 0xA838, 0xFDFC,
|
||||
0xFE69, 0xFF04,
|
||||
}
|
||||
|
||||
@(rodata)
|
||||
sc_ranges16 := [?]u16{
|
||||
0x00A2, 0x00A5,
|
||||
0x07FE, 0x07FF,
|
||||
0x09F2, 0x09F3,
|
||||
0x20A0, 0x20C1,
|
||||
0xFFE0, 0xFFE1,
|
||||
0xFFE5, 0xFFE6,
|
||||
}
|
||||
|
||||
@(rodata)
|
||||
sc_singles32 := [?]i32{
|
||||
0x1E2FF, 0x1ECB0,
|
||||
}
|
||||
|
||||
@(rodata)
|
||||
sc_ranges32 := [?]i32{
|
||||
0x11FDD, 0x11FE0,
|
||||
}
|
||||
|
||||
sc_ranges := Range{
|
||||
single_16 = sc_singles16[:],
|
||||
ranges_16 = sc_ranges16[:],
|
||||
single_32 = sc_singles32[:],
|
||||
ranges_32 = sc_ranges32[:],
|
||||
}
|
||||
|
||||
@(rodata)
|
||||
sk_singles16 := [?]u16{
|
||||
0x005E, 0x0060, 0x00A8, 0x00AF, 0x00B4, 0x00B8, 0x02ED, 0x0375, 0x0888, 0x1FBD,
|
||||
0xAB5B, 0xFF3E, 0xFF40, 0xFFE3,
|
||||
}
|
||||
|
||||
@(rodata)
|
||||
sk_ranges16 := [?]u16{
|
||||
0x02C2, 0x02C5,
|
||||
0x02D2, 0x02DF,
|
||||
0x02E5, 0x02EB,
|
||||
0x02EF, 0x02FF,
|
||||
0x0384, 0x0385,
|
||||
0x1FBF, 0x1FC1,
|
||||
0x1FCD, 0x1FCF,
|
||||
0x1FDD, 0x1FDF,
|
||||
0x1FED, 0x1FEF,
|
||||
0x1FFD, 0x1FFE,
|
||||
0x309B, 0x309C,
|
||||
0xA700, 0xA716,
|
||||
0xA720, 0xA721,
|
||||
0xA789, 0xA78A,
|
||||
0xAB6A, 0xAB6B,
|
||||
0xFBB2, 0xFBC2,
|
||||
}
|
||||
|
||||
@(rodata)
|
||||
sk_ranges32 := [?]i32{
|
||||
0x1F3FB, 0x1F3FF,
|
||||
}
|
||||
|
||||
sk_ranges := Range{
|
||||
single_16 = sk_singles16[:],
|
||||
ranges_16 = sk_ranges16[:],
|
||||
ranges_32 = sk_ranges32[:],
|
||||
}
|
||||
|
||||
@(rodata)
|
||||
sm_singles16 := [?]u16{
|
||||
0x002B, 0x007C, 0x007E, 0x00AC, 0x00B1, 0x00D7, 0x00F7, 0x03F6, 0x2044, 0x2052,
|
||||
0x2118, 0x214B, 0x21A0, 0x21A3, 0x21A6, 0x21AE, 0x21D2, 0x21D4, 0x237C, 0x25B7,
|
||||
0x25C1, 0x266F, 0xFB29, 0xFE62, 0xFF0B, 0xFF5C, 0xFF5E, 0xFFE2,
|
||||
}
|
||||
|
||||
@(rodata)
|
||||
sm_ranges16 := [?]u16{
|
||||
0x003C, 0x003E,
|
||||
0x0606, 0x0608,
|
||||
0x207A, 0x207C,
|
||||
0x208A, 0x208C,
|
||||
0x2140, 0x2144,
|
||||
0x2190, 0x2194,
|
||||
0x219A, 0x219B,
|
||||
0x21CE, 0x21CF,
|
||||
0x21F4, 0x22FF,
|
||||
0x2320, 0x2321,
|
||||
0x239B, 0x23B3,
|
||||
0x23DC, 0x23E1,
|
||||
0x25F8, 0x25FF,
|
||||
0x27C0, 0x27C4,
|
||||
0x27C7, 0x27E5,
|
||||
0x27F0, 0x27FF,
|
||||
0x2900, 0x2982,
|
||||
0x2999, 0x29D7,
|
||||
0x29DC, 0x29FB,
|
||||
0x29FE, 0x2AFF,
|
||||
0x2B30, 0x2B44,
|
||||
0x2B47, 0x2B4C,
|
||||
0xFE64, 0xFE66,
|
||||
0xFF1C, 0xFF1E,
|
||||
0xFFE9, 0xFFEC,
|
||||
}
|
||||
|
||||
@(rodata)
|
||||
sm_singles32 := [?]i32{
|
||||
0x1CEF0, 0x1D6C1, 0x1D6DB, 0x1D6FB, 0x1D715, 0x1D735, 0x1D74F, 0x1D76F,
|
||||
0x1D789, 0x1D7A9, 0x1D7C3,
|
||||
}
|
||||
|
||||
@(rodata)
|
||||
sm_ranges32 := [?]i32{
|
||||
0x10D8E, 0x10D8F,
|
||||
0x1EEF0, 0x1EEF1,
|
||||
0x1F8D0, 0x1F8D8,
|
||||
}
|
||||
|
||||
sm_ranges := Range{
|
||||
single_16 = sm_singles16[:],
|
||||
ranges_16 = sm_ranges16[:],
|
||||
single_32 = sm_singles32[:],
|
||||
ranges_32 = sm_ranges32[:],
|
||||
}
|
||||
|
||||
@(rodata)
|
||||
so_singles16 := [?]u16{
|
||||
0x00A6, 0x00A9, 0x00AE, 0x00B0, 0x0482, 0x06DE, 0x06E9, 0x07F6, 0x09FA, 0x0B70,
|
||||
0x0BFA, 0x0C7F, 0x0D4F, 0x0D79, 0x0F13, 0x0F34, 0x0F36, 0x0F38, 0x166D, 0x1940,
|
||||
0x2114, 0x2125, 0x2127, 0x2129, 0x212E, 0x214A, 0x214F, 0x21D3, 0x3004, 0x3020,
|
||||
0x31EF, 0x3250, 0xA839, 0xFFE4, 0xFFE8,
|
||||
}
|
||||
|
||||
@(rodata)
|
||||
so_ranges16 := [?]u16{
|
||||
0x058D, 0x058E,
|
||||
0x060E, 0x060F,
|
||||
0x06FD, 0x06FE,
|
||||
0x0BF3, 0x0BF8,
|
||||
0x0F01, 0x0F03,
|
||||
0x0F15, 0x0F17,
|
||||
0x0F1A, 0x0F1F,
|
||||
0x0FBE, 0x0FC5,
|
||||
0x0FC7, 0x0FCC,
|
||||
0x0FCE, 0x0FCF,
|
||||
0x0FD5, 0x0FD8,
|
||||
0x109E, 0x109F,
|
||||
0x1390, 0x1399,
|
||||
0x19DE, 0x19FF,
|
||||
0x1B61, 0x1B6A,
|
||||
0x1B74, 0x1B7C,
|
||||
0x2100, 0x2101,
|
||||
0x2103, 0x2106,
|
||||
0x2108, 0x2109,
|
||||
0x2116, 0x2117,
|
||||
0x211E, 0x2123,
|
||||
0x213A, 0x213B,
|
||||
0x214C, 0x214D,
|
||||
0x218A, 0x218B,
|
||||
0x2195, 0x2199,
|
||||
0x219C, 0x219F,
|
||||
0x21A1, 0x21A2,
|
||||
0x21A4, 0x21A5,
|
||||
0x21A7, 0x21AD,
|
||||
0x21AF, 0x21CD,
|
||||
0x21D0, 0x21D1,
|
||||
0x21D5, 0x21F3,
|
||||
0x2300, 0x2307,
|
||||
0x230C, 0x231F,
|
||||
0x2322, 0x2328,
|
||||
0x232B, 0x237B,
|
||||
0x237D, 0x239A,
|
||||
0x23B4, 0x23DB,
|
||||
0x23E2, 0x2429,
|
||||
0x2440, 0x244A,
|
||||
0x249C, 0x24E9,
|
||||
0x2500, 0x25B6,
|
||||
0x25B8, 0x25C0,
|
||||
0x25C2, 0x25F7,
|
||||
0x2600, 0x266E,
|
||||
0x2670, 0x2767,
|
||||
0x2794, 0x27BF,
|
||||
0x2800, 0x28FF,
|
||||
0x2B00, 0x2B2F,
|
||||
0x2B45, 0x2B46,
|
||||
0x2B4D, 0x2B73,
|
||||
0x2B76, 0x2BFF,
|
||||
0x2CE5, 0x2CEA,
|
||||
0x2E50, 0x2E51,
|
||||
0x2E80, 0x2E99,
|
||||
0x2E9B, 0x2EF3,
|
||||
0x2F00, 0x2FD5,
|
||||
0x2FF0, 0x2FFF,
|
||||
0x3012, 0x3013,
|
||||
0x3036, 0x3037,
|
||||
0x303E, 0x303F,
|
||||
0x3190, 0x3191,
|
||||
0x3196, 0x319F,
|
||||
0x31C0, 0x31E5,
|
||||
0x3200, 0x321E,
|
||||
0x322A, 0x3247,
|
||||
0x3260, 0x327F,
|
||||
0x328A, 0x32B0,
|
||||
0x32C0, 0x33FF,
|
||||
0x4DC0, 0x4DFF,
|
||||
0xA490, 0xA4C6,
|
||||
0xA828, 0xA82B,
|
||||
0xA836, 0xA837,
|
||||
0xAA77, 0xAA79,
|
||||
0xFBC3, 0xFBD2,
|
||||
0xFD40, 0xFD4F,
|
||||
0xFD90, 0xFD91,
|
||||
0xFDC8, 0xFDCF,
|
||||
0xFDFD, 0xFDFF,
|
||||
0xFFED, 0xFFEE,
|
||||
0xFFFC, 0xFFFD,
|
||||
}
|
||||
|
||||
@(rodata)
|
||||
so_singles32 := [?]i32{
|
||||
0x101A0, 0x10AC8, 0x1173F, 0x16B45, 0x1BC9C, 0x1D245, 0x1E14F, 0x1ECAC,
|
||||
0x1ED2E, 0x1F7F0, 0x1FAC8, 0x1FBFA,
|
||||
}
|
||||
|
||||
@(rodata)
|
||||
so_ranges32 := [?]i32{
|
||||
0x10137, 0x1013F,
|
||||
0x10179, 0x10189,
|
||||
0x1018C, 0x1018E,
|
||||
0x10190, 0x1019C,
|
||||
0x101D0, 0x101FC,
|
||||
0x10877, 0x10878,
|
||||
0x10ED1, 0x10ED8,
|
||||
0x11FD5, 0x11FDC,
|
||||
0x11FE1, 0x11FF1,
|
||||
0x16B3C, 0x16B3F,
|
||||
0x1CC00, 0x1CCEF,
|
||||
0x1CCFA, 0x1CCFC,
|
||||
0x1CD00, 0x1CEB3,
|
||||
0x1CEBA, 0x1CED0,
|
||||
0x1CEE0, 0x1CEEF,
|
||||
0x1CF50, 0x1CFC3,
|
||||
0x1D000, 0x1D0F5,
|
||||
0x1D100, 0x1D126,
|
||||
0x1D129, 0x1D164,
|
||||
0x1D16A, 0x1D16C,
|
||||
0x1D183, 0x1D184,
|
||||
0x1D18C, 0x1D1A9,
|
||||
0x1D1AE, 0x1D1EA,
|
||||
0x1D200, 0x1D241,
|
||||
0x1D300, 0x1D356,
|
||||
0x1D800, 0x1D9FF,
|
||||
0x1DA37, 0x1DA3A,
|
||||
0x1DA6D, 0x1DA74,
|
||||
0x1DA76, 0x1DA83,
|
||||
0x1DA85, 0x1DA86,
|
||||
0x1F000, 0x1F02B,
|
||||
0x1F030, 0x1F093,
|
||||
0x1F0A0, 0x1F0AE,
|
||||
0x1F0B1, 0x1F0BF,
|
||||
0x1F0C1, 0x1F0CF,
|
||||
0x1F0D1, 0x1F0F5,
|
||||
0x1F10D, 0x1F1AD,
|
||||
0x1F1E6, 0x1F202,
|
||||
0x1F210, 0x1F23B,
|
||||
0x1F240, 0x1F248,
|
||||
0x1F250, 0x1F251,
|
||||
0x1F260, 0x1F265,
|
||||
0x1F300, 0x1F3FA,
|
||||
0x1F400, 0x1F6D8,
|
||||
0x1F6DC, 0x1F6EC,
|
||||
0x1F6F0, 0x1F6FC,
|
||||
0x1F700, 0x1F7D9,
|
||||
0x1F7E0, 0x1F7EB,
|
||||
0x1F800, 0x1F80B,
|
||||
0x1F810, 0x1F847,
|
||||
0x1F850, 0x1F859,
|
||||
0x1F860, 0x1F887,
|
||||
0x1F890, 0x1F8AD,
|
||||
0x1F8B0, 0x1F8BB,
|
||||
0x1F8C0, 0x1F8C1,
|
||||
0x1F900, 0x1FA57,
|
||||
0x1FA60, 0x1FA6D,
|
||||
0x1FA70, 0x1FA7C,
|
||||
0x1FA80, 0x1FA8A,
|
||||
0x1FA8E, 0x1FAC6,
|
||||
0x1FACD, 0x1FADC,
|
||||
0x1FADF, 0x1FAEA,
|
||||
0x1FAEF, 0x1FAF8,
|
||||
0x1FB00, 0x1FB92,
|
||||
0x1FB94, 0x1FBEF,
|
||||
}
|
||||
|
||||
so_ranges := Range{
|
||||
single_16 = so_singles16[:],
|
||||
ranges_16 = so_ranges16[:],
|
||||
single_32 = so_singles32[:],
|
||||
ranges_32 = so_ranges32[:],
|
||||
}
|
||||
|
||||
@(rodata)
|
||||
extra_digits_singles16 := [?]u16{
|
||||
0x00B9, 0x19DA, 0x2070, 0x24EA, 0x24FF,
|
||||
|
||||
@@ -259,6 +259,13 @@ is_symbol :: proc(r: rune) -> bool #no_bounds_check {
|
||||
if u32(r) <= MAX_LATIN1 {
|
||||
return char_properties[u8(r)]&pS != 0
|
||||
}
|
||||
|
||||
s := in_range(r, sc_ranges) || in_range(r, sm_ranges)
|
||||
|
||||
if s || in_range(r, so_ranges) || in_range(r, sk_ranges) {
|
||||
return true
|
||||
}
|
||||
|
||||
return false
|
||||
}
|
||||
|
||||
|
||||
@@ -291,10 +291,10 @@ main :: proc() {
|
||||
// .Pi, // Initial_Punctuation, an initial quotation mark
|
||||
// .Po, // Other_Punctuation, a punctuation mark of other type
|
||||
// .Ps, // Open_Punctuation, an opening punctuation mark (of a pair)
|
||||
.Sc, // Currency_Symbol, a currency sign
|
||||
.Sk, // Modifier_Symbol, a non-letterlike modifier symbol
|
||||
.Sm, // Math_Symbol, a symbol of mathematical use
|
||||
.So, // Other_Symbol, a symbol of other type
|
||||
// .Sc, // Currency_Symbol, a currency sign
|
||||
// .Sk, // Modifier_Symbol, a non-letterlike modifier symbol
|
||||
// .Sm, // Math_Symbol, a symbol of mathematical use
|
||||
// .So, // Other_Symbol, a symbol of other type
|
||||
.Zl, // Line_Separator, U+2028 LINE SEPARATOR only
|
||||
.Zp, // Paragraph_Separator, U+2029 PARAGRAPH SEPARATOR only
|
||||
.Zs, // Space_Separator, a space character (of various non-zero widths)
|
||||
|
||||
Reference in New Issue
Block a user