Generate Unicode Punctuation Tables

This commit is contained in:
StudebakerGuy
2026-03-08 12:19:38 -04:00
committed by Jeroen van Rijn
parent 9b38bdf849
commit 3bfa01d4a8
2 changed files with 265 additions and 7 deletions

View File

@@ -1184,6 +1184,264 @@ no_ranges := Range{
ranges_32 = no_ranges32[:],
}
@(rodata)
pc_singles16 := [?]u16{
0x005F, 0x2054, 0xFF3F,
}
@(rodata)
pc_ranges16 := [?]u16{
0x203F, 0x2040,
0xFE33, 0xFE34,
0xFE4D, 0xFE4F,
}
pc_ranges := Range{
single_16 = pc_singles16[:],
ranges_16 = pc_ranges16[:],
}
@(rodata)
pd_singles16 := [?]u16{
0x002D, 0x058A, 0x05BE, 0x1400, 0x1806, 0x2E17, 0x2E1A, 0x2E40, 0x2E5D, 0x301C,
0x3030, 0x30A0, 0xFE58, 0xFE63, 0xFF0D,
}
@(rodata)
pd_ranges16 := [?]u16{
0x2010, 0x2015,
0x2E3A, 0x2E3B,
0xFE31, 0xFE32,
}
@(rodata)
pd_singles32 := [?]i32{
0x10D6E, 0x10EAD,
}
pd_ranges := Range{
single_16 = pd_singles16[:],
ranges_16 = pd_ranges16[:],
single_32 = pd_singles32[:],
}
@(rodata)
pe_singles16 := [?]u16{
0x0029, 0x005D, 0x007D, 0x0F3B, 0x0F3D, 0x169C, 0x2046, 0x207E, 0x208E, 0x2309,
0x230B, 0x232A, 0x2769, 0x276B, 0x276D, 0x276F, 0x2771, 0x2773, 0x2775, 0x27C6,
0x27E7, 0x27E9, 0x27EB, 0x27ED, 0x27EF, 0x2984, 0x2986, 0x2988, 0x298A, 0x298C,
0x298E, 0x2990, 0x2992, 0x2994, 0x2996, 0x2998, 0x29D9, 0x29DB, 0x29FD, 0x2E23,
0x2E25, 0x2E27, 0x2E29, 0x2E56, 0x2E58, 0x2E5A, 0x2E5C, 0x3009, 0x300B, 0x300D,
0x300F, 0x3011, 0x3015, 0x3017, 0x3019, 0x301B, 0xFD3E, 0xFE18, 0xFE36, 0xFE38,
0xFE3A, 0xFE3C, 0xFE3E, 0xFE40, 0xFE42, 0xFE44, 0xFE48, 0xFE5A, 0xFE5C, 0xFE5E,
0xFF09, 0xFF3D, 0xFF5D, 0xFF60, 0xFF63,
}
@(rodata)
pe_ranges16 := [?]u16{
0x301E, 0x301F,
}
pe_ranges := Range{
single_16 = pe_singles16[:],
ranges_16 = pe_ranges16[:],
}
@(rodata)
pf_singles16 := [?]u16{
0x00BB, 0x2019, 0x201D, 0x203A, 0x2E03, 0x2E05, 0x2E0A, 0x2E0D, 0x2E1D, 0x2E21,
}
pf_ranges := Range{
single_16 = pf_singles16[:],
}
@(rodata)
pi_singles16 := [?]u16{
0x00AB, 0x2018, 0x201F, 0x2039, 0x2E02, 0x2E04, 0x2E09, 0x2E0C, 0x2E1C, 0x2E20,
}
@(rodata)
pi_ranges16 := [?]u16{
0x201B, 0x201C,
}
pi_ranges := Range{
single_16 = pi_singles16[:],
ranges_16 = pi_ranges16[:],
}
@(rodata)
po_singles16 := [?]u16{
0x002A, 0x002C, 0x005C, 0x00A1, 0x00A7, 0x00BF, 0x037E, 0x0387, 0x0589, 0x05C0,
0x05C3, 0x05C6, 0x061B, 0x06D4, 0x085E, 0x0970, 0x09FD, 0x0A76, 0x0AF0, 0x0C77,
0x0C84, 0x0DF4, 0x0E4F, 0x0F14, 0x0F85, 0x10FB, 0x166E, 0x1CD3, 0x2053, 0x2D70,
0x2E0B, 0x2E1B, 0x2E41, 0x303D, 0x30FB, 0xA673, 0xA67E, 0xA8FC, 0xA95F, 0xABEB,
0xFE19, 0xFE30, 0xFE68, 0xFF0A, 0xFF0C, 0xFF3C, 0xFF61,
}
@(rodata)
po_ranges16 := [?]u16{
0x0021, 0x0023,
0x0025, 0x0027,
0x002E, 0x002F,
0x003A, 0x003B,
0x003F, 0x0040,
0x00B6, 0x00B7,
0x055A, 0x055F,
0x05F3, 0x05F4,
0x0609, 0x060A,
0x060C, 0x060D,
0x061D, 0x061F,
0x066A, 0x066D,
0x0700, 0x070D,
0x07F7, 0x07F9,
0x0830, 0x083E,
0x0964, 0x0965,
0x0E5A, 0x0E5B,
0x0F04, 0x0F12,
0x0FD0, 0x0FD4,
0x0FD9, 0x0FDA,
0x104A, 0x104F,
0x1360, 0x1368,
0x16EB, 0x16ED,
0x1735, 0x1736,
0x17D4, 0x17D6,
0x17D8, 0x17DA,
0x1800, 0x1805,
0x1807, 0x180A,
0x1944, 0x1945,
0x1A1E, 0x1A1F,
0x1AA0, 0x1AA6,
0x1AA8, 0x1AAD,
0x1B4E, 0x1B4F,
0x1B5A, 0x1B60,
0x1B7D, 0x1B7F,
0x1BFC, 0x1BFF,
0x1C3B, 0x1C3F,
0x1C7E, 0x1C7F,
0x1CC0, 0x1CC7,
0x2016, 0x2017,
0x2020, 0x2027,
0x2030, 0x2038,
0x203B, 0x203E,
0x2041, 0x2043,
0x2047, 0x2051,
0x2055, 0x205E,
0x2CF9, 0x2CFC,
0x2CFE, 0x2CFF,
0x2E00, 0x2E01,
0x2E06, 0x2E08,
0x2E0E, 0x2E16,
0x2E18, 0x2E19,
0x2E1E, 0x2E1F,
0x2E2A, 0x2E2E,
0x2E30, 0x2E39,
0x2E3C, 0x2E3F,
0x2E43, 0x2E4F,
0x2E52, 0x2E54,
0x3001, 0x3003,
0xA4FE, 0xA4FF,
0xA60D, 0xA60F,
0xA6F2, 0xA6F7,
0xA874, 0xA877,
0xA8CE, 0xA8CF,
0xA8F8, 0xA8FA,
0xA92E, 0xA92F,
0xA9C1, 0xA9CD,
0xA9DE, 0xA9DF,
0xAA5C, 0xAA5F,
0xAADE, 0xAADF,
0xAAF0, 0xAAF1,
0xFE10, 0xFE16,
0xFE45, 0xFE46,
0xFE49, 0xFE4C,
0xFE50, 0xFE52,
0xFE54, 0xFE57,
0xFE5F, 0xFE61,
0xFE6A, 0xFE6B,
0xFF01, 0xFF03,
0xFF05, 0xFF07,
0xFF0E, 0xFF0F,
0xFF1A, 0xFF1B,
0xFF1F, 0xFF20,
0xFF64, 0xFF65,
}
@(rodata)
po_singles32 := [?]i32{
0x1039F, 0x103D0, 0x1056F, 0x10857, 0x1091F, 0x1093F, 0x10A7F, 0x10ED0,
0x111CD, 0x111DB, 0x112A9, 0x1145D, 0x114C6, 0x116B9, 0x1183B, 0x119E2,
0x11BE1, 0x11FFF, 0x16AF5, 0x16B44, 0x16FE2, 0x1BC9F, 0x1E5FF,
}
@(rodata)
po_ranges32 := [?]i32{
0x10100, 0x10102,
0x10A50, 0x10A58,
0x10AF0, 0x10AF6,
0x10B39, 0x10B3F,
0x10B99, 0x10B9C,
0x10F55, 0x10F59,
0x10F86, 0x10F89,
0x11047, 0x1104D,
0x110BB, 0x110BC,
0x110BE, 0x110C1,
0x11140, 0x11143,
0x11174, 0x11175,
0x111C5, 0x111C8,
0x111DD, 0x111DF,
0x11238, 0x1123D,
0x113D4, 0x113D5,
0x113D7, 0x113D8,
0x1144B, 0x1144F,
0x1145A, 0x1145B,
0x115C1, 0x115D7,
0x11641, 0x11643,
0x11660, 0x1166C,
0x1173C, 0x1173E,
0x11944, 0x11946,
0x11A3F, 0x11A46,
0x11A9A, 0x11A9C,
0x11A9E, 0x11AA2,
0x11B00, 0x11B09,
0x11C41, 0x11C45,
0x11C70, 0x11C71,
0x11EF7, 0x11EF8,
0x11F43, 0x11F4F,
0x12470, 0x12474,
0x12FF1, 0x12FF2,
0x16A6E, 0x16A6F,
0x16B37, 0x16B3B,
0x16D6D, 0x16D6F,
0x16E97, 0x16E9A,
0x1DA87, 0x1DA8B,
0x1E95E, 0x1E95F,
}
po_ranges := Range{
single_16 = po_singles16[:],
ranges_16 = po_ranges16[:],
single_32 = po_singles32[:],
ranges_32 = po_ranges32[:],
}
@(rodata)
ps_singles16 := [?]u16{
0x0028, 0x005B, 0x007B, 0x0F3A, 0x0F3C, 0x169B, 0x201A, 0x201E, 0x2045, 0x207D,
0x208D, 0x2308, 0x230A, 0x2329, 0x2768, 0x276A, 0x276C, 0x276E, 0x2770, 0x2772,
0x2774, 0x27C5, 0x27E6, 0x27E8, 0x27EA, 0x27EC, 0x27EE, 0x2983, 0x2985, 0x2987,
0x2989, 0x298B, 0x298D, 0x298F, 0x2991, 0x2993, 0x2995, 0x2997, 0x29D8, 0x29DA,
0x29FC, 0x2E22, 0x2E24, 0x2E26, 0x2E28, 0x2E42, 0x2E55, 0x2E57, 0x2E59, 0x2E5B,
0x3008, 0x300A, 0x300C, 0x300E, 0x3010, 0x3014, 0x3016, 0x3018, 0x301A, 0x301D,
0xFD3F, 0xFE17, 0xFE35, 0xFE37, 0xFE39, 0xFE3B, 0xFE3D, 0xFE3F, 0xFE41, 0xFE43,
0xFE47, 0xFE59, 0xFE5B, 0xFE5D, 0xFF08, 0xFF3B, 0xFF5B, 0xFF5F, 0xFF62,
}
ps_ranges := Range{
single_16 = ps_singles16[:],
}
@(rodata)
extra_digits_singles16 := [?]u16{
0x00B9, 0x19DA, 0x2070, 0x24EA, 0x24FF,

View File

@@ -284,13 +284,13 @@ main :: proc() {
//.Nd, // Decimal_Number, a decimal digit
//.Nl, // Letter_Number, a letterlike numeric character
//.No, // Other_Number, a numeric character of other type
.Pc, // Connector_Punctuation, a connecting punctuation mark, like a tie
.Pd, // Dash_Punctuation, a dash or hyphen punctuation mark
.Pe, // Close_Punctuation, a closing punctuation mark (of a pair)
.Pf, // Final_Punctuation, a final quotation mark
.Pi, // Initial_Punctuation, an initial quotation mark
.Po, // Other_Punctuation, a punctuation mark of other type
.Ps, // Open_Punctuation, an opening punctuation mark (of a pair)
// .Pc, // Connector_Punctuation, a connecting punctuation mark, like a tie
// .Pd, // Dash_Punctuation, a dash or hyphen punctuation mark
// .Pe, // Close_Punctuation, a closing punctuation mark (of a pair)
// .Pf, // Final_Punctuation, a final quotation mark
// .Pi, // Initial_Punctuation, an initial quotation mark
// .Po, // Other_Punctuation, a punctuation mark of other type
// .Ps, // Open_Punctuation, an opening punctuation mark (of a pair)
.Sc, // Currency_Symbol, a currency sign
.Sk, // Modifier_Symbol, a non-letterlike modifier symbol
.Sm, // Math_Symbol, a symbol of mathematical use