diff --git a/src/error.cpp b/src/error.cpp index 03d96219b..f95123f15 100644 --- a/src/error.cpp +++ b/src/error.cpp @@ -237,6 +237,7 @@ enum TerminalColour { TerminalColour_Blue, TerminalColour_Purple, TerminalColour_Black, + TerminalColour_Grey, }; gb_internal void terminal_set_colours(TerminalStyle style, TerminalColour foreground) { @@ -256,6 +257,7 @@ gb_internal void terminal_set_colours(TerminalStyle style, TerminalColour foregr case TerminalColour_Blue: error_out("\x1b[%s;34m", ss); break; case TerminalColour_Purple: error_out("\x1b[%s;35m", ss); break; case TerminalColour_Black: error_out("\x1b[%s;30m", ss); break; + case TerminalColour_Grey: error_out("\x1b[%s;90m", ss); break; } } } @@ -272,85 +274,234 @@ gb_internal isize show_error_on_line(TokenPos const &pos, TokenPos end) { return -1; } - i32 offset = 0; - gbString the_line = get_file_line_as_string(pos, &offset); + i32 error_start_index_bytes = 0; + gbString the_line = get_file_line_as_string(pos, &error_start_index_bytes); defer (gb_string_free(the_line)); - if (the_line != nullptr) { - char const *line_text = the_line; - isize line_len = gb_string_length(the_line); - - // TODO(bill): This assumes ASCII - - enum { - MAX_LINE_LENGTH = 80, - MAX_TAB_WIDTH = 8, - ELLIPSIS_PADDING = 8, // `... ...` - MAX_LINE_LENGTH_PADDED = MAX_LINE_LENGTH-MAX_TAB_WIDTH-ELLIPSIS_PADDING, - }; - - i32 error_length = gb_max(end.offset - pos.offset, 1); - - error_out("\t"); - - terminal_set_colours(TerminalStyle_Bold, TerminalColour_White); - - - isize squiggle_extra = 0; - - if (line_len > MAX_LINE_LENGTH_PADDED) { - i32 left = MAX_TAB_WIDTH; - i32 diff = gb_max(offset-left, 0); - if (diff > 0) { - line_text += diff; - line_len -= diff; - offset = left + ELLIPSIS_PADDING/2; - } - if (line_len > MAX_LINE_LENGTH_PADDED) { - line_len = MAX_LINE_LENGTH_PADDED; - if (error_length > line_len-left) { - error_length = cast(i32)line_len - left; - squiggle_extra = 1; - } - } - if (diff > 0) { - error_out("... %.*s ...", cast(i32)line_len, line_text); - } else { - error_out("%.*s ...", cast(i32)line_len, line_text); - } - } else { - error_out("%.*s", cast(i32)line_len, line_text); - } - error_out("\n\t"); - - for (i32 i = 0; i < offset; i++) { - error_out(" "); - } - - terminal_set_colours(TerminalStyle_Bold, TerminalColour_Green); - - error_out("^"); - if (end.file_id == pos.file_id) { - if (end.line > pos.line) { - for (i32 i = offset; i < line_len; i++) { - error_out("~"); - } - } else if (end.line == pos.line && end.column > pos.column) { - for (i32 i = 1; i < error_length-1+squiggle_extra; i++) { - error_out("~"); - } - if (error_length > 1 && squiggle_extra == 0) { - error_out("^"); - } - } - } - + if (the_line == nullptr || gb_string_length(the_line) == 0) { + terminal_set_colours(TerminalStyle_Normal, TerminalColour_Grey); + error_out("\t( empty line )\n"); terminal_reset_colours(); - error_out("\n"); - return offset; + if (the_line == nullptr) { + return -1; + } else { + return cast(isize)error_start_index_bytes; + } } - return -1; + + // These two will be used like an Odin slice later. + char const *line_text = the_line; + i32 line_length_bytes = cast(i32)gb_string_length(the_line); + + ucg_grapheme* graphemes; + i32 line_length_runes = 0; + i32 line_length_graphemes = 0; + i32 line_width = 0; + + int ucg_result = ucg_decode_grapheme_clusters( + permanent_allocator(), (const uint8_t*)line_text, line_length_bytes, + &graphemes, &line_length_runes, &line_length_graphemes, &line_width); + + if (ucg_result < 0) { + // There was a UTF-8 parsing error. + // Insert a dummy grapheme so the start of the invalid rune can be pointed at. + graphemes = (ucg_grapheme*)gb_resize(permanent_allocator(), + graphemes, + sizeof(ucg_grapheme) * (line_length_graphemes), + sizeof(ucg_grapheme) * (1 + line_length_graphemes)); + + ucg_grapheme append = { + error_start_index_bytes, + line_length_runes, + 1, + }; + + graphemes[line_length_graphemes] = append; + } + + // The units below are counted in visual, monospace cells. + enum { + MAX_LINE_LENGTH = 80, + MAX_TAB_WIDTH = 8, + ELLIPSIS_PADDING = 8, // `... ...` + MIN_LEFT_VIEW = 8, + + // A rough estimate of how many characters we'll insert, at most: + MAX_INSERTED_WIDTH = MAX_TAB_WIDTH + ELLIPSIS_PADDING, + + MAX_LINE_LENGTH_PADDED = MAX_LINE_LENGTH - MAX_INSERTED_WIDTH, + }; + + i32 error_start_index_graphemes = 0; + for (i32 i = 0; i < line_length_graphemes; i += 1) { + if (graphemes[i].byte_index == error_start_index_bytes) { + error_start_index_graphemes = i; + break; + } + } + + if (error_start_index_graphemes == 0 && error_start_index_bytes != 0 && line_length_graphemes != 0) { + // The error index in graphemes was not found, but we did find a valid Unicode string. + // + // This is an edge case where the error is sitting on a newline or the + // end of the line, as that is the only location we could not have checked. + error_start_index_graphemes = line_length_graphemes; + } + + error_out("\t"); + + bool show_right_ellipsis = false; + + i32 squiggle_padding = 0; + i32 window_open_bytes = 0; + i32 window_close_bytes = 0; + if (line_width > MAX_LINE_LENGTH_PADDED) { + // Now that we know the line is over the length limit, we have to + // compose a visual window in which to display the error. + i32 window_size_left = 0; + i32 window_size_right = 0; + i32 window_open_graphemes = 0; + + for (i32 i = error_start_index_graphemes - 1; i > 0; i -= 1) { + window_size_left += graphemes[i].width; + if (window_size_left >= MIN_LEFT_VIEW) { + window_open_graphemes = i; + window_open_bytes = graphemes[i].byte_index; + break; + } + } + + for (i32 i = error_start_index_graphemes; i < line_length_graphemes; i += 1) { + window_size_right += graphemes[i].width; + if (window_size_right >= MAX_LINE_LENGTH_PADDED - MIN_LEFT_VIEW) { + window_close_bytes = graphemes[i].byte_index; + break; + } + } + if (window_close_bytes == 0) { + // The window ends at the end of the line. + window_close_bytes = line_length_bytes; + } + + if (window_size_right < MAX_LINE_LENGTH_PADDED - MIN_LEFT_VIEW) { + // Hit the end of the string early on the right side; expand backwards. + for (i32 i = window_open_graphemes - 1; i > 0; i -= 1) { + window_size_left += graphemes[i].width; + if (window_size_left + window_size_right >= MAX_LINE_LENGTH_PADDED) { + window_open_graphemes = i; + window_open_bytes = graphemes[i].byte_index; + break; + } + } + } + + GB_ASSERT_MSG(window_close_bytes >= window_open_bytes, "Error line truncation window has wrong byte indices. (open, close: %i, %i)", window_open_bytes, window_close_bytes); + + if (window_close_bytes != line_length_bytes) { + show_right_ellipsis = true; + } + + // Close the window, going left. + line_length_bytes = window_close_bytes; + + // Adjust the slice of text. In Odin, this would be: + // `line_text = line_text[window_left_bytes:]` + line_text += window_open_bytes; + line_length_bytes -= window_open_bytes; + GB_ASSERT_MSG(line_length_bytes >= 0, "Bounds-checking error: line_length_bytes"); + + if (window_open_bytes > 0) { + error_out("... "); + squiggle_padding += 4; + } + } else { + // No truncation needed. + window_open_bytes = 0; + window_close_bytes = line_length_bytes; + } + + for (i32 i = error_start_index_graphemes; i > 0; i -= 1) { + if (graphemes[i].byte_index == window_open_bytes) { + break; + } + squiggle_padding += graphemes[i].width; + } + + // Start printing code. + + terminal_set_colours(TerminalStyle_Normal, TerminalColour_White); + error_out("%.*s", line_length_bytes, line_text); + + i32 squiggle_length = 0; + bool trailing_squiggle = false; + + if (end.file_id == pos.file_id) { + // The error has an endpoint. + + if (end.line > pos.line) { + // Error goes to next line. + // Always show the ellipsis in this case + show_right_ellipsis = true; + + for (i32 i = error_start_index_graphemes; i < line_length_graphemes; i += 1) { + squiggle_length += graphemes[i].width; + trailing_squiggle = true; + } + + } else if (end.line == pos.line && end.column > pos.column) { + // Error terminates before line end. + i32 adjusted_end_index = graphemes[error_start_index_graphemes].byte_index + end.column - pos.column; + + for (i32 i = error_start_index_graphemes; i < line_length_graphemes; i += 1) { + if (graphemes[i].byte_index >= adjusted_end_index) { + break; + } else if (graphemes[i].byte_index >= window_close_bytes) { + trailing_squiggle = true; + break; + } + squiggle_length += graphemes[i].width; + } + } + } else { + // The error is at one spot; no range known. + squiggle_length = 1; + } + + if (show_right_ellipsis) { + error_out(" ..."); + } + + error_out("\n\t"); + + for (i32 i = squiggle_padding; i > 0; i -= 1) { + error_out(" "); + } + + terminal_set_colours(TerminalStyle_Bold, TerminalColour_Green); + + if (squiggle_length > 0) { + error_out("^"); + squiggle_length -= 1; + } + for (/**/; squiggle_length > 1; squiggle_length -= 1) { + error_out("~"); + } + if (squiggle_length > 0) { + if (trailing_squiggle) { + error_out("~ ..."); + } else { + error_out("^"); + } + } + + // NOTE(Feoramund): Specifically print a newline, then reset colours, + // instead of the other way around. Otherwise the printing mechanism + // will collapse the newline for reasons currently beyond my ken. + error_out("\n"); + terminal_reset_colours(); + + return squiggle_padding; } gb_internal void error_out_empty(void) { diff --git a/src/parser.cpp b/src/parser.cpp index 583f4a57d..93889d1b2 100644 --- a/src/parser.cpp +++ b/src/parser.cpp @@ -71,6 +71,12 @@ gb_internal gbString get_file_line_as_string(TokenPos const &pos, i32 *offset_) u8 *line_start = pos_offset; u8 *line_end = pos_offset; + + if (offset > 0 && *line_start == '\n') { + // Prevent an error token that starts at the boundary of a line that + // leads to an empty line from advancing off its line. + line_start -= 1; + } while (line_start >= start) { if (*line_start == '\n') { line_start += 1; @@ -78,6 +84,11 @@ gb_internal gbString get_file_line_as_string(TokenPos const &pos, i32 *offset_) } line_start -= 1; } + if (line_start == start - 1) { + // Prevent an error on the first line from stepping behind the boundary + // of the text. + line_start += 1; + } while (line_end < end) { if (*line_end == '\n') { diff --git a/src/tokenizer.cpp b/src/tokenizer.cpp index 2af41b881..4425bee29 100644 --- a/src/tokenizer.cpp +++ b/src/tokenizer.cpp @@ -786,7 +786,6 @@ gb_internal void tokenizer_get_token(Tokenizer *t, Token *token, int repeat=0) { case '`': // Raw String Literal case '"': // String Literal { - bool has_carriage_return = false; i32 success; Rune quote = curr_rune; token->kind = Token_String; @@ -816,9 +815,6 @@ gb_internal void tokenizer_get_token(Tokenizer *t, Token *token, int repeat=0) { if (r == quote) { break; } - if (r == '\r') { - has_carriage_return = true; - } } } token->string.len = t->curr - token->string.text; diff --git a/src/ucg/ucg.c b/src/ucg/ucg.c new file mode 100644 index 000000000..c3e270e1a --- /dev/null +++ b/src/ucg/ucg.c @@ -0,0 +1,686 @@ +/* + * SPDX-FileCopyrightText: (c) 2024 Feoramund + * SPDX-License-Identifier: BSD-3-Clause + */ + + +// +// NOTE(Feoramund): This is my UCG library, adapted for use within the Odin compiler. +// Most of the comments have been let alone and may not strictly apply anymore. +// +// 1. The UCG allocator interface was replaced by gbAllocator. +// 2. The UCG UTF-8 decoder was replaced with the one already in the compiler. +// 3. Non-essential code was stripped. +// 4. Some types were changed for compatibility. +// + + +/* This is the data that is allocated when an allocator is passed to + * ucg_decode_grapheme_clusters. */ +typedef struct { + i32 byte_index; + i32 rune_index; + i32 width; +} ucg_grapheme; + + +/* #include "ucg.h" */ +#include "ucg_tables.h" + +#define UCG_TABLE_LEN(t) (sizeof(t) / sizeof(int32_t)) + +#define ZERO_WIDTH_SPACE 0x200B +#define ZERO_WIDTH_NON_JOINER 0x200C +#define ZERO_WIDTH_JOINER 0x200D +#define WORD_JOINER 0x2060 + +int ucg_binary_search(int32_t value, const int32_t* table, int length, int stride) { + GB_ASSERT(table != NULL); + GB_ASSERT(length > 0); + GB_ASSERT(stride > 0); + + int n = length; + int t = 0; + for (/**/; n > 1; /**/) { + int m = n / 2; + int p = t + m * stride; + if (value >= table[p]) { + t = p; + n = n - m; + } else { + n = m; + } + } + if (n != 0 && value >= table[t]) { + return t; + } + return -1; +} + +// +// The procedures below are accurate as of Unicode 15.1.0. +// + +bool ucg_is_control(int32_t r) { + if (r <= 0x1F || (0x7F <= r && r <= 0x9F)) { + return true; + } + return false; +} + +// Emoji_Modifier +bool ucg_is_emoji_modifier(int32_t r) { + return 0x1F3FB <= r && r <= 0x1F3FF; +} + +// Regional_Indicator +bool ucg_is_regional_indicator(int32_t r) { + return 0x1F1E6 <= r && r <= 0x1F1FF; +} + +// General_Category=Enclosing_Mark +bool ucg_is_enclosing_mark(int32_t r) { + switch (r) { + case 0x0488: + case 0x0489: + case 0x1ABE: + return true; + } + + if (0x20DD <= r && r <= 0x20E0) { return true; } + if (0x20E2 <= r && r <= 0x20E4) { return true; } + if (0xA670 <= r && r <= 0xA672) { return true; } + + return false; +} + +// Prepended_Concatenation_Mark +bool ucg_is_prepended_concatenation_mark(int32_t r) { + switch (r) { + case 0x006DD: + case 0x0070F: + case 0x008E2: + case 0x110BD: + case 0x110CD: + return true; + } + + if (0x00600 <= r && r <= 0x00605) { return true; } + if (0x00890 <= r && r <= 0x00891) { return true; } + + return false; +} + +// General_Category=Spacing_Mark +bool ucg_is_spacing_mark(int32_t r) { + intptr_t p = ucg_binary_search(r, ucg_spacing_mark_ranges, UCG_TABLE_LEN(ucg_spacing_mark_ranges)/2, 2); + if (p >= 0 && ucg_spacing_mark_ranges[p] <= r && r <= ucg_spacing_mark_ranges[p+1]) { + return true; + } + return false; +} + +// General_Category=Nonspacing_Mark +bool ucg_is_nonspacing_mark(int32_t r) { + intptr_t p = ucg_binary_search(r, ucg_nonspacing_mark_ranges, UCG_TABLE_LEN(ucg_nonspacing_mark_ranges)/2, 2); + if (p >= 0 && ucg_nonspacing_mark_ranges[p] <= r && r <= ucg_nonspacing_mark_ranges[p+1]) { + return true; + } + return false; +} + +// Extended_Pictographic +bool ucg_is_emoji_extended_pictographic(int32_t r) { + intptr_t p = ucg_binary_search(r, ucg_emoji_extended_pictographic_ranges, UCG_TABLE_LEN(ucg_emoji_extended_pictographic_ranges)/2, 2); + if (p >= 0 && ucg_emoji_extended_pictographic_ranges[p] <= r && r <= ucg_emoji_extended_pictographic_ranges[p+1]) { + return true; + } + return false; +} + +// Grapheme_Extend +bool ucg_is_grapheme_extend(int32_t r) { + intptr_t p = ucg_binary_search(r, ucg_grapheme_extend_ranges, UCG_TABLE_LEN(ucg_grapheme_extend_ranges)/2, 2); + if (p >= 0 && ucg_grapheme_extend_ranges[p] <= r && r <= ucg_grapheme_extend_ranges[p+1]) { + return true; + } + return false; +} + + +// Hangul_Syllable_Type=Leading_Jamo +bool ucg_is_hangul_syllable_leading(int32_t r) { + return (0x1100 <= r && r <= 0x115F) || (0xA960 <= r && r <= 0xA97C); +} + +// Hangul_Syllable_Type=Vowel_Jamo +bool ucg_is_hangul_syllable_vowel(int32_t r) { + return (0x1160 <= r && r <= 0x11A7) || (0xD7B0 <= r && r <= 0xD7C6); +} + +// Hangul_Syllable_Type=Trailing_Jamo +bool ucg_is_hangul_syllable_trailing(int32_t r) { + return (0x11A8 <= r && r <= 0x11FF) || (0xD7CB <= r && r <= 0xD7FB); +} + +// Hangul_Syllable_Type=LV_Syllable +bool ucg_is_hangul_syllable_lv(int32_t r) { + intptr_t p = ucg_binary_search(r, ucg_hangul_syllable_lv_singlets, UCG_TABLE_LEN(ucg_hangul_syllable_lv_singlets), 1); + if (p >= 0 && r == ucg_hangul_syllable_lv_singlets[p]) { + return true; + } + return false; +} + +// Hangul_Syllable_Type=LVT_Syllable +bool ucg_is_hangul_syllable_lvt(int32_t r) { + intptr_t p = ucg_binary_search(r, ucg_hangul_syllable_lvt_ranges, UCG_TABLE_LEN(ucg_hangul_syllable_lvt_ranges)/2, 2); + if (p >= 0 && ucg_hangul_syllable_lvt_ranges[p] <= r && r <= ucg_hangul_syllable_lvt_ranges[p+1]) { + return true; + } + return false; +} + + +// Indic_Syllabic_Category=Consonant_Preceding_Repha +bool ucg_is_indic_consonant_preceding_repha(int32_t r) { + switch (r) { + case 0x00D4E: + case 0x11941: + case 0x11D46: + case 0x11F02: + return true; + } + return false; +} + +// Indic_Syllabic_Category=Consonant_Prefixed +bool ucg_is_indic_consonant_prefixed(int32_t r) { + switch (r) { + case 0x1193F: + case 0x11A3A: + return true; + } + + if (0x111C2 <= r && r <= 0x111C3) { return true; } + if (0x11A84 <= r && r <= 0x11A89) { return true; } + + return false; +} + +// Indic_Conjunct_Break=Linker +bool ucg_is_indic_conjunct_break_linker(int32_t r) { + switch (r) { + case 0x094D: + case 0x09CD: + case 0x0ACD: + case 0x0B4D: + case 0x0C4D: + case 0x0D4D: + return true; + } + return false; +} + +// Indic_Conjunct_Break=Consonant +bool ucg_is_indic_conjunct_break_consonant(int32_t r) { + intptr_t p = ucg_binary_search(r, ucg_indic_conjunct_break_consonant_ranges, UCG_TABLE_LEN(ucg_indic_conjunct_break_consonant_ranges)/2, 2); + if (p >= 0 && ucg_indic_conjunct_break_consonant_ranges[p] <= r && r <= ucg_indic_conjunct_break_consonant_ranges[p+1]) { + return true; + } + return false; +} + +// Indic_Conjunct_Break=Extend +bool ucg_is_indic_conjunct_break_extend(int32_t r) { + intptr_t p = ucg_binary_search(r, ucg_indic_conjunct_break_extend_ranges, UCG_TABLE_LEN(ucg_indic_conjunct_break_extend_ranges)/2, 2); + if (p >= 0 && ucg_indic_conjunct_break_extend_ranges[p] <= r && r <= ucg_indic_conjunct_break_extend_ranges[p+1]) { + return true; + } + return false; +} + + +/* +``` +Indic_Syllabic_Category = Consonant_Preceding_Repha, or +Indic_Syllabic_Category = Consonant_Prefixed, or +Prepended_Concatenation_Mark = Yes +``` +*/ +bool ucg_is_gcb_prepend_class(int32_t r) { + return ucg_is_indic_consonant_preceding_repha(r) || ucg_is_indic_consonant_prefixed(r) || ucg_is_prepended_concatenation_mark(r); +} + +/* +``` +Grapheme_Extend = Yes, or +Emoji_Modifier = Yes + +This includes: +General_Category = Nonspacing_Mark +General_Category = Enclosing_Mark +U+200C ZERO WIDTH NON-JOINER + +plus a few General_Category = Spacing_Mark needed for canonical equivalence. +``` +*/ +bool ucg_is_gcb_extend_class(int32_t r) { + return ucg_is_grapheme_extend(r) || ucg_is_emoji_modifier(r); +} + +// Return values: +// +// - 2 if East_Asian_Width=F or W, or +// - 0 if non-printable / zero-width, or +// - 1 in all other cases. +// +int ucg_normalized_east_asian_width(int32_t r) { + if (ucg_is_control(r)) { + return 0; + } else if (r <= 0x10FF) { + // Easy early out for low runes. + return 1; + } + + switch (r) { + // This is a different interpretation of the BOM which occurs in the middle of text. + case 0xFEFF: /* ZERO_WIDTH_NO_BREAK_SPACE */ + case ZERO_WIDTH_SPACE: + case ZERO_WIDTH_NON_JOINER: + case ZERO_WIDTH_JOINER: + case WORD_JOINER: + return 0; + } + + intptr_t p = ucg_binary_search(r, ucg_normalized_east_asian_width_ranges, UCG_TABLE_LEN(ucg_normalized_east_asian_width_ranges)/3, 3); + if (p >= 0 && ucg_normalized_east_asian_width_ranges[p] <= r && r <= ucg_normalized_east_asian_width_ranges[p+1]) { + return (int)ucg_normalized_east_asian_width_ranges[p+2]; + } + return 1; +} + +// +// End of Unicode 15.1.0 block. +// + +enum grapheme_cluster_sequence { + None, + Indic, + Emoji, + Regional, +}; + +typedef struct { + ucg_grapheme* graphemes; + i32 rune_count; + i32 grapheme_count; + i32 width; + + int32_t last_rune; + bool last_rune_breaks_forward; + + i32 last_width; + i32 last_grapheme_count; + + bool bypass_next_rune; + + int regional_indicator_counter; + + enum grapheme_cluster_sequence current_sequence; + bool continue_sequence; +} ucg_decoder_state; + + +void _ucg_decode_grapheme_clusters_deferred_step( + gbAllocator allocator, + ucg_decoder_state* state, + i32 byte_index, + int32_t this_rune +) { + // "Break at the start and end of text, unless the text is empty." + // + // GB1: sot ÷ Any + // GB2: Any ÷ eot + if (state->rune_count == 0 && state->grapheme_count == 0) { + state->grapheme_count += 1; + } + + if (state->grapheme_count > state->last_grapheme_count) { + state->width += ucg_normalized_east_asian_width(this_rune); + + /* if (allocator != NULL) { */ + state->graphemes = (ucg_grapheme*)gb_resize(allocator, + state->graphemes, + sizeof(ucg_grapheme) * (state->grapheme_count), + sizeof(ucg_grapheme) * (1 + state->grapheme_count)); + + ucg_grapheme append = { + byte_index, + state->rune_count, + state->width - state->last_width, + }; + + state->graphemes[state->grapheme_count - 1] = append; + /* } */ + + state->last_grapheme_count = state->grapheme_count; + state->last_width = state->width; + } + + state->last_rune = this_rune; + state->rune_count += 1; + + if (!state->continue_sequence) { + state->current_sequence = None; + state->regional_indicator_counter = 0; + } + state->continue_sequence = false; +} + +int ucg_decode_grapheme_clusters( + gbAllocator allocator, + const uint8_t* str, + int str_len, + + ucg_grapheme** out_graphemes, + i32* out_rune_count, + i32* out_grapheme_count, + i32* out_width +) { + // The following procedure implements text segmentation by breaking on + // Grapheme Cluster Boundaries[1], using the values[2] and rules[3] from + // the Unicode® Standard Annex #29, entitled: + // + // UNICODE TEXT SEGMENTATION + // + // Version: Unicode 15.1.0 + // Date: 2023-08-16 + // Revision: 43 + // + // This procedure is conformant[4] to UAX29-C1-1, otherwise known as the + // extended, non-legacy ruleset. + // + // Please see the references for more information. + // + // + // [1]: https://www.unicode.org/reports/tr29/#Grapheme_Cluster_Boundaries + // [2]: https://www.unicode.org/reports/tr29/#Default_Grapheme_Cluster_Table + // [3]: https://www.unicode.org/reports/tr29/#Grapheme_Cluster_Boundary_Rules + // [4]: https://www.unicode.org/reports/tr29/#Conformance + + // Additionally, this procedure takes into account Standard Annex #11, + // in order to estimate how visually wide the string will appear on a + // monospaced display. This can only ever be a rough guess, as this tends + // to be an implementation detail relating to which fonts are being used, + // how codepoints are interpreted and drawn, if codepoint sequences are + // interpreted correctly, and et cetera. + // + // For example, a program may not properly interpret an emoji modifier + // sequence and print the component glyphs instead of one whole glyph. + // + // See here for more information: https://www.unicode.org/reports/tr11/ + // + // NOTE: There is no explicit mention of what to do with zero-width spaces + // as far as grapheme cluster segmentation goes, therefore this + // implementation may count and return graphemes with a `width` of zero. + // + // Treat them as any other space. + + ucg_decoder_state state = {0}; + +#define UCG_DEFERRED_DECODE_STEP() (_ucg_decode_grapheme_clusters_deferred_step(allocator, &state, byte_index, this_rune)) + + for (i32 byte_index = 0, bytes_advanced = 0; byte_index < str_len; byte_index += bytes_advanced) { + int32_t this_rune = GB_RUNE_INVALID; + bytes_advanced = (i32)(utf8_decode(str+byte_index, str_len-byte_index, &this_rune)); + if (this_rune == GB_RUNE_INVALID || bytes_advanced == 0) { + // There was a Unicode parsing error; bail out. + if (out_graphemes != NULL) { *out_graphemes = state.graphemes; } + if (out_rune_count != NULL) { *out_rune_count = state.rune_count; } + if (out_grapheme_count != NULL) { *out_grapheme_count = state.grapheme_count; } + if (out_width != NULL) { *out_width = state.width; } + + // Return an error code. + return -1; + } + + // "Do not break between a CR and LF. Otherwise, break before and after controls." + // + // GB3: CR × LF + // GB4: (Control | CR | LF) ÷ + // GB5: ÷ (Control | CR | LF) + if (this_rune == '\n' && state.last_rune == '\r') { + state.last_rune_breaks_forward = false; + state.bypass_next_rune = false; + UCG_DEFERRED_DECODE_STEP(); continue; + } + + if (ucg_is_control(this_rune)) { + state.grapheme_count += 1; + state.last_rune_breaks_forward = true; + state.bypass_next_rune = true; + UCG_DEFERRED_DECODE_STEP(); continue; + } + + // (This check is for rules that work forwards, instead of backwards.) + if (state.bypass_next_rune) { + if (state.last_rune_breaks_forward) { + state.grapheme_count += 1; + state.last_rune_breaks_forward = false; + } + + state.bypass_next_rune = false; + UCG_DEFERRED_DECODE_STEP(); continue; + } + + // (Optimization 1: Prevent low runes from proceeding further.) + // + // * 0xA9 and 0xAE are in the Extended_Pictographic range, + // which is checked later in GB11. + if (this_rune != 0xA9 && this_rune != 0xAE && this_rune <= 0x2FF) { + state.grapheme_count += 1; + UCG_DEFERRED_DECODE_STEP(); continue; + } + + // (Optimization 2: Check if the rune is in the Hangul space before getting specific.) + if (0x1100 <= this_rune && this_rune <= 0xD7FB) { + // "Do not break Hangul syllable sequences." + // + // GB6: L × (L | V | LV | LVT) + // GB7: (LV | V) × (V | T) + // GB8: (LVT | T) × T + if (ucg_is_hangul_syllable_leading(this_rune) || + ucg_is_hangul_syllable_lv(this_rune) || + ucg_is_hangul_syllable_lvt(this_rune)) + { + if (!ucg_is_hangul_syllable_leading(state.last_rune)) { + state.grapheme_count += 1; + } + UCG_DEFERRED_DECODE_STEP(); continue; + } + + if (ucg_is_hangul_syllable_vowel(this_rune)) { + if (ucg_is_hangul_syllable_leading(state.last_rune) || + ucg_is_hangul_syllable_vowel(state.last_rune) || + ucg_is_hangul_syllable_lv(state.last_rune)) + { + UCG_DEFERRED_DECODE_STEP(); continue; + } + state.grapheme_count += 1; + UCG_DEFERRED_DECODE_STEP(); continue; + } + + if (ucg_is_hangul_syllable_trailing(this_rune)) { + if (ucg_is_hangul_syllable_trailing(state.last_rune) || + ucg_is_hangul_syllable_lvt(state.last_rune) || + ucg_is_hangul_syllable_lv(state.last_rune) || + ucg_is_hangul_syllable_vowel(state.last_rune)) + { + UCG_DEFERRED_DECODE_STEP(); continue; + } + state.grapheme_count += 1; + UCG_DEFERRED_DECODE_STEP(); continue; + } + } + + // "Do not break before extending characters or ZWJ." + // + // GB9: × (Extend | ZWJ) + if (this_rune == ZERO_WIDTH_JOINER) { + state.continue_sequence = true; + UCG_DEFERRED_DECODE_STEP(); continue; + } + + if (ucg_is_gcb_extend_class(this_rune)) { + // (Support for GB9c.) + if (state.current_sequence == Indic) { + if (ucg_is_indic_conjunct_break_extend(this_rune) && ( + ucg_is_indic_conjunct_break_linker(state.last_rune) || + ucg_is_indic_conjunct_break_consonant(state.last_rune) )) + { + state.continue_sequence = true; + UCG_DEFERRED_DECODE_STEP(); continue; + } + + if (ucg_is_indic_conjunct_break_linker(this_rune) && ( + ucg_is_indic_conjunct_break_linker(state.last_rune) || + ucg_is_indic_conjunct_break_extend(state.last_rune) || + ucg_is_indic_conjunct_break_consonant(state.last_rune) )) + { + state.continue_sequence = true; + UCG_DEFERRED_DECODE_STEP(); continue; + } + + UCG_DEFERRED_DECODE_STEP(); continue; + } + + // (Support for GB11.) + if (state.current_sequence == Emoji && ( + ucg_is_gcb_extend_class(state.last_rune) || + ucg_is_emoji_extended_pictographic(state.last_rune) )) + { + state.continue_sequence = true; + } + + UCG_DEFERRED_DECODE_STEP(); continue; + } + + // _The GB9a and GB9b rules only apply to extended grapheme clusters:_ + // "Do not break before SpacingMarks, or after Prepend characters." + // + // GB9a: × SpacingMark + // GB9b: Prepend × + if (ucg_is_spacing_mark(this_rune)) { + UCG_DEFERRED_DECODE_STEP(); continue; + } + + if (ucg_is_gcb_prepend_class(this_rune)) { + state.grapheme_count += 1; + state.bypass_next_rune = true; + UCG_DEFERRED_DECODE_STEP(); continue; + } + + // _The GB9c rule only applies to extended grapheme clusters:_ + // "Do not break within certain combinations with Indic_Conjunct_Break (InCB)=Linker." + // + // GB9c: \p{InCB=Consonant} [ \p{InCB=Extend} \p{InCB=Linker} ]* \p{InCB=Linker} [ \p{InCB=Extend} \p{InCB=Linker} ]* × \p{InCB=Consonant} + if (ucg_is_indic_conjunct_break_consonant(this_rune)) { + if (state.current_sequence == Indic) { + if (state.last_rune == ZERO_WIDTH_JOINER || + ucg_is_indic_conjunct_break_linker(state.last_rune)) + { + state.continue_sequence = true; + } else { + state.grapheme_count += 1; + } + } else { + state.grapheme_count += 1; + state.current_sequence = Indic; + state.continue_sequence = true; + } + UCG_DEFERRED_DECODE_STEP(); continue; + } + + if (ucg_is_indic_conjunct_break_extend(this_rune)) { + if (state.current_sequence == Indic) { + if (ucg_is_indic_conjunct_break_consonant(state.last_rune) || + ucg_is_indic_conjunct_break_linker(state.last_rune)) + { + state.continue_sequence = true; + } else { + state.grapheme_count += 1; + } + } + UCG_DEFERRED_DECODE_STEP(); continue; + } + + if (ucg_is_indic_conjunct_break_linker(this_rune)) { + if (state.current_sequence == Indic) { + if (ucg_is_indic_conjunct_break_extend(state.last_rune) || + ucg_is_indic_conjunct_break_linker(state.last_rune)) + { + state.continue_sequence = true; + } else { + state.grapheme_count += 1; + } + } + UCG_DEFERRED_DECODE_STEP(); continue; + } + + // + // (Curiously, there is no GB10.) + // + + // "Do not break within emoji modifier sequences or emoji zwj sequences." + // + // GB11: \p{Extended_Pictographic} Extend* ZWJ × \p{Extended_Pictographic} + if (ucg_is_emoji_extended_pictographic(this_rune)) { + if (state.current_sequence != Emoji || state.last_rune != ZERO_WIDTH_JOINER) { + state.grapheme_count += 1; + } + state.current_sequence = Emoji; + state.continue_sequence = true; + UCG_DEFERRED_DECODE_STEP(); continue; + } + + // "Do not break within emoji flag sequences. + // That is, do not break between regional indicator (RI) symbols + // if there is an odd number of RI characters before the break point." + // + // GB12: sot (RI RI)* RI × RI + // GB13: [^RI] (RI RI)* RI × RI + if (ucg_is_regional_indicator(this_rune)) { + if ((state.regional_indicator_counter & 1) == 0) { + state.grapheme_count += 1; + } + + state.current_sequence = Regional; + state.continue_sequence = true; + state.regional_indicator_counter += 1; + + UCG_DEFERRED_DECODE_STEP(); continue; + } + + // "Otherwise, break everywhere." + // + // GB999: Any ÷ Any + state.grapheme_count += 1; + UCG_DEFERRED_DECODE_STEP(); + } + +#undef UCG_DEFERRED_DECODE_STEP + + if (out_graphemes != NULL) { *out_graphemes = state.graphemes; } + if (out_rune_count != NULL) { *out_rune_count = state.rune_count; } + if (out_grapheme_count != NULL) { *out_grapheme_count = state.grapheme_count; } + if (out_width != NULL) { *out_width = state.width; } + + return 0; +} + +#undef UCG_TABLE_LEN +#undef ZERO_WIDTH_SPACE +#undef ZERO_WIDTH_NON_JOINER +#undef ZERO_WIDTH_JOINER +#undef WORD_JOINER diff --git a/src/ucg/ucg_tables.h b/src/ucg/ucg_tables.h new file mode 100644 index 000000000..a33f9f898 --- /dev/null +++ b/src/ucg/ucg_tables.h @@ -0,0 +1,2629 @@ +/* + * SPDX-FileCopyrightText: (c) 2024 Feoramund + * SPDX-License-Identifier: BSD-3-Clause + */ +#ifndef _UCG_TABLES_INCLUDED +#define _UCG_TABLES_INCLUDED + +#ifdef __cplusplus +extern "C" { +#endif + +#include + +// +// The tables below are accurate as of Unicode 15.1.0. +// + +static const int32_t ucg_spacing_mark_ranges[] = { + 0x0903, 0x0903, + 0x093B, 0x093B, + 0x093E, 0x0940, + 0x0949, 0x094C, + 0x094E, 0x094F, + 0x0982, 0x0983, + 0x09BE, 0x09C0, + 0x09C7, 0x09C8, + 0x09CB, 0x09CC, + 0x09D7, 0x09D7, + 0x0A03, 0x0A03, + 0x0A3E, 0x0A40, + 0x0A83, 0x0A83, + 0x0ABE, 0x0AC0, + 0x0AC9, 0x0AC9, + 0x0ACB, 0x0ACC, + 0x0B02, 0x0B03, + 0x0B3E, 0x0B3E, + 0x0B40, 0x0B40, + 0x0B47, 0x0B48, + 0x0B4B, 0x0B4C, + 0x0B57, 0x0B57, + 0x0BBE, 0x0BBF, + 0x0BC1, 0x0BC2, + 0x0BC6, 0x0BC8, + 0x0BCA, 0x0BCC, + 0x0BD7, 0x0BD7, + 0x0C01, 0x0C03, + 0x0C41, 0x0C44, + 0x0C82, 0x0C83, + 0x0CBE, 0x0CBE, + 0x0CC0, 0x0CC4, + 0x0CC7, 0x0CC8, + 0x0CCA, 0x0CCB, + 0x0CD5, 0x0CD6, + 0x0CF3, 0x0CF3, + 0x0D02, 0x0D03, + 0x0D3E, 0x0D40, + 0x0D46, 0x0D48, + 0x0D4A, 0x0D4C, + 0x0D57, 0x0D57, + 0x0D82, 0x0D83, + 0x0DCF, 0x0DD1, + 0x0DD8, 0x0DDF, + 0x0DF2, 0x0DF3, + 0x0F3E, 0x0F3F, + 0x0F7F, 0x0F7F, + 0x102B, 0x102C, + 0x1031, 0x1031, + 0x1038, 0x1038, + 0x103B, 0x103C, + 0x1056, 0x1057, + 0x1062, 0x1064, + 0x1067, 0x106D, + 0x1083, 0x1084, + 0x1087, 0x108C, + 0x108F, 0x108F, + 0x109A, 0x109C, + 0x1715, 0x1715, + 0x1734, 0x1734, + 0x17B6, 0x17B6, + 0x17BE, 0x17C5, + 0x17C7, 0x17C8, + 0x1923, 0x1926, + 0x1929, 0x192B, + 0x1930, 0x1931, + 0x1933, 0x1938, + 0x1A19, 0x1A1A, + 0x1A55, 0x1A55, + 0x1A57, 0x1A57, + 0x1A61, 0x1A61, + 0x1A63, 0x1A64, + 0x1A6D, 0x1A72, + 0x1B04, 0x1B04, + 0x1B35, 0x1B35, + 0x1B3B, 0x1B3B, + 0x1B3D, 0x1B41, + 0x1B43, 0x1B44, + 0x1B82, 0x1B82, + 0x1BA1, 0x1BA1, + 0x1BA6, 0x1BA7, + 0x1BAA, 0x1BAA, + 0x1BE7, 0x1BE7, + 0x1BEA, 0x1BEC, + 0x1BEE, 0x1BEE, + 0x1BF2, 0x1BF3, + 0x1C24, 0x1C2B, + 0x1C34, 0x1C35, + 0x1CE1, 0x1CE1, + 0x1CF7, 0x1CF7, + 0x302E, 0x302F, + 0xA823, 0xA824, + 0xA827, 0xA827, + 0xA880, 0xA881, + 0xA8B4, 0xA8C3, + 0xA952, 0xA953, + 0xA983, 0xA983, + 0xA9B4, 0xA9B5, + 0xA9BA, 0xA9BB, + 0xA9BE, 0xA9C0, + 0xAA2F, 0xAA30, + 0xAA33, 0xAA34, + 0xAA4D, 0xAA4D, + 0xAA7B, 0xAA7B, + 0xAA7D, 0xAA7D, + 0xAAEB, 0xAAEB, + 0xAAEE, 0xAAEF, + 0xAAF5, 0xAAF5, + 0xABE3, 0xABE4, + 0xABE6, 0xABE7, + 0xABE9, 0xABEA, + 0xABEC, 0xABEC, + 0x11000, 0x11000, + 0x11002, 0x11002, + 0x11082, 0x11082, + 0x110B0, 0x110B2, + 0x110B7, 0x110B8, + 0x1112C, 0x1112C, + 0x11145, 0x11146, + 0x11182, 0x11182, + 0x111B3, 0x111B5, + 0x111BF, 0x111C0, + 0x111CE, 0x111CE, + 0x1122C, 0x1122E, + 0x11232, 0x11233, + 0x11235, 0x11235, + 0x112E0, 0x112E2, + 0x11302, 0x11303, + 0x1133E, 0x1133F, + 0x11341, 0x11344, + 0x11347, 0x11348, + 0x1134B, 0x1134D, + 0x11357, 0x11357, + 0x11362, 0x11363, + 0x11435, 0x11437, + 0x11440, 0x11441, + 0x11445, 0x11445, + 0x114B0, 0x114B2, + 0x114B9, 0x114B9, + 0x114BB, 0x114BE, + 0x114C1, 0x114C1, + 0x115AF, 0x115B1, + 0x115B8, 0x115BB, + 0x115BE, 0x115BE, + 0x11630, 0x11632, + 0x1163B, 0x1163C, + 0x1163E, 0x1163E, + 0x116AC, 0x116AC, + 0x116AE, 0x116AF, + 0x116B6, 0x116B6, + 0x11720, 0x11721, + 0x11726, 0x11726, + 0x1182C, 0x1182E, + 0x11838, 0x11838, + 0x11930, 0x11935, + 0x11937, 0x11938, + 0x1193D, 0x1193D, + 0x11940, 0x11940, + 0x11942, 0x11942, + 0x119D1, 0x119D3, + 0x119DC, 0x119DF, + 0x119E4, 0x119E4, + 0x11A39, 0x11A39, + 0x11A57, 0x11A58, + 0x11A97, 0x11A97, + 0x11C2F, 0x11C2F, + 0x11C3E, 0x11C3E, + 0x11CA9, 0x11CA9, + 0x11CB1, 0x11CB1, + 0x11CB4, 0x11CB4, + 0x11D8A, 0x11D8E, + 0x11D93, 0x11D94, + 0x11D96, 0x11D96, + 0x11EF5, 0x11EF6, + 0x11F03, 0x11F03, + 0x11F34, 0x11F35, + 0x11F3E, 0x11F3F, + 0x11F41, 0x11F41, + 0x16F51, 0x16F87, + 0x16FF0, 0x16FF1, + 0x1D165, 0x1D166, + 0x1D16D, 0x1D172, +}; + +static const int32_t ucg_nonspacing_mark_ranges[] = { + 0x0300, 0x036F, + 0x0483, 0x0487, + 0x0591, 0x05BD, + 0x05BF, 0x05BF, + 0x05C1, 0x05C2, + 0x05C4, 0x05C5, + 0x05C7, 0x05C7, + 0x0610, 0x061A, + 0x064B, 0x065F, + 0x0670, 0x0670, + 0x06D6, 0x06DC, + 0x06DF, 0x06E4, + 0x06E7, 0x06E8, + 0x06EA, 0x06ED, + 0x0711, 0x0711, + 0x0730, 0x074A, + 0x07A6, 0x07B0, + 0x07EB, 0x07F3, + 0x07FD, 0x07FD, + 0x0816, 0x0819, + 0x081B, 0x0823, + 0x0825, 0x0827, + 0x0829, 0x082D, + 0x0859, 0x085B, + 0x0898, 0x089F, + 0x08CA, 0x08E1, + 0x08E3, 0x0902, + 0x093A, 0x093A, + 0x093C, 0x093C, + 0x0941, 0x0948, + 0x094D, 0x094D, + 0x0951, 0x0957, + 0x0962, 0x0963, + 0x0981, 0x0981, + 0x09BC, 0x09BC, + 0x09C1, 0x09C4, + 0x09CD, 0x09CD, + 0x09E2, 0x09E3, + 0x09FE, 0x09FE, + 0x0A01, 0x0A02, + 0x0A3C, 0x0A3C, + 0x0A41, 0x0A42, + 0x0A47, 0x0A48, + 0x0A4B, 0x0A4D, + 0x0A51, 0x0A51, + 0x0A70, 0x0A71, + 0x0A75, 0x0A75, + 0x0A81, 0x0A82, + 0x0ABC, 0x0ABC, + 0x0AC1, 0x0AC5, + 0x0AC7, 0x0AC8, + 0x0ACD, 0x0ACD, + 0x0AE2, 0x0AE3, + 0x0AFA, 0x0AFF, + 0x0B01, 0x0B01, + 0x0B3C, 0x0B3C, + 0x0B3F, 0x0B3F, + 0x0B41, 0x0B44, + 0x0B4D, 0x0B4D, + 0x0B55, 0x0B56, + 0x0B62, 0x0B63, + 0x0B82, 0x0B82, + 0x0BC0, 0x0BC0, + 0x0BCD, 0x0BCD, + 0x0C00, 0x0C00, + 0x0C04, 0x0C04, + 0x0C3C, 0x0C3C, + 0x0C3E, 0x0C40, + 0x0C46, 0x0C48, + 0x0C4A, 0x0C4D, + 0x0C55, 0x0C56, + 0x0C62, 0x0C63, + 0x0C81, 0x0C81, + 0x0CBC, 0x0CBC, + 0x0CBF, 0x0CBF, + 0x0CC6, 0x0CC6, + 0x0CCC, 0x0CCD, + 0x0CE2, 0x0CE3, + 0x0D00, 0x0D01, + 0x0D3B, 0x0D3C, + 0x0D41, 0x0D44, + 0x0D4D, 0x0D4D, + 0x0D62, 0x0D63, + 0x0D81, 0x0D81, + 0x0DCA, 0x0DCA, + 0x0DD2, 0x0DD4, + 0x0DD6, 0x0DD6, + 0x0E31, 0x0E31, + 0x0E34, 0x0E3A, + 0x0E47, 0x0E4E, + 0x0EB1, 0x0EB1, + 0x0EB4, 0x0EBC, + 0x0EC8, 0x0ECE, + 0x0F18, 0x0F19, + 0x0F35, 0x0F35, + 0x0F37, 0x0F37, + 0x0F39, 0x0F39, + 0x0F71, 0x0F7E, + 0x0F80, 0x0F84, + 0x0F86, 0x0F87, + 0x0F8D, 0x0F97, + 0x0F99, 0x0FBC, + 0x0FC6, 0x0FC6, + 0x102D, 0x1030, + 0x1032, 0x1037, + 0x1039, 0x103A, + 0x103D, 0x103E, + 0x1058, 0x1059, + 0x105E, 0x1060, + 0x1071, 0x1074, + 0x1082, 0x1082, + 0x1085, 0x1086, + 0x108D, 0x108D, + 0x109D, 0x109D, + 0x135D, 0x135F, + 0x1712, 0x1714, + 0x1732, 0x1733, + 0x1752, 0x1753, + 0x1772, 0x1773, + 0x17B4, 0x17B5, + 0x17B7, 0x17BD, + 0x17C6, 0x17C6, + 0x17C9, 0x17D3, + 0x17DD, 0x17DD, + 0x180B, 0x180D, + 0x180F, 0x180F, + 0x1885, 0x1886, + 0x18A9, 0x18A9, + 0x1920, 0x1922, + 0x1927, 0x1928, + 0x1932, 0x1932, + 0x1939, 0x193B, + 0x1A17, 0x1A18, + 0x1A1B, 0x1A1B, + 0x1A56, 0x1A56, + 0x1A58, 0x1A5E, + 0x1A60, 0x1A60, + 0x1A62, 0x1A62, + 0x1A65, 0x1A6C, + 0x1A73, 0x1A7C, + 0x1A7F, 0x1A7F, + 0x1AB0, 0x1ABD, + 0x1ABF, 0x1ACE, + 0x1B00, 0x1B03, + 0x1B34, 0x1B34, + 0x1B36, 0x1B3A, + 0x1B3C, 0x1B3C, + 0x1B42, 0x1B42, + 0x1B6B, 0x1B73, + 0x1B80, 0x1B81, + 0x1BA2, 0x1BA5, + 0x1BA8, 0x1BA9, + 0x1BAB, 0x1BAD, + 0x1BE6, 0x1BE6, + 0x1BE8, 0x1BE9, + 0x1BED, 0x1BED, + 0x1BEF, 0x1BF1, + 0x1C2C, 0x1C33, + 0x1C36, 0x1C37, + 0x1CD0, 0x1CD2, + 0x1CD4, 0x1CE0, + 0x1CE2, 0x1CE8, + 0x1CED, 0x1CED, + 0x1CF4, 0x1CF4, + 0x1CF8, 0x1CF9, + 0x1DC0, 0x1DFF, + 0x20D0, 0x20DC, + 0x20E1, 0x20E1, + 0x20E5, 0x20F0, + 0x2CEF, 0x2CF1, + 0x2D7F, 0x2D7F, + 0x2DE0, 0x2DFF, + 0x302A, 0x302D, + 0x3099, 0x309A, + 0xA66F, 0xA66F, + 0xA674, 0xA67D, + 0xA69E, 0xA69F, + 0xA6F0, 0xA6F1, + 0xA802, 0xA802, + 0xA806, 0xA806, + 0xA80B, 0xA80B, + 0xA825, 0xA826, + 0xA82C, 0xA82C, + 0xA8C4, 0xA8C5, + 0xA8E0, 0xA8F1, + 0xA8FF, 0xA8FF, + 0xA926, 0xA92D, + 0xA947, 0xA951, + 0xA980, 0xA982, + 0xA9B3, 0xA9B3, + 0xA9B6, 0xA9B9, + 0xA9BC, 0xA9BD, + 0xA9E5, 0xA9E5, + 0xAA29, 0xAA2E, + 0xAA31, 0xAA32, + 0xAA35, 0xAA36, + 0xAA43, 0xAA43, + 0xAA4C, 0xAA4C, + 0xAA7C, 0xAA7C, + 0xAAB0, 0xAAB0, + 0xAAB2, 0xAAB4, + 0xAAB7, 0xAAB8, + 0xAABE, 0xAABF, + 0xAAC1, 0xAAC1, + 0xAAEC, 0xAAED, + 0xAAF6, 0xAAF6, + 0xABE5, 0xABE5, + 0xABE8, 0xABE8, + 0xABED, 0xABED, + 0xFB1E, 0xFB1E, + 0xFE00, 0xFE0F, + 0xFE20, 0xFE2F, + 0x101FD, 0x101FD, + 0x102E0, 0x102E0, + 0x10376, 0x1037A, + 0x10A01, 0x10A03, + 0x10A05, 0x10A06, + 0x10A0C, 0x10A0F, + 0x10A38, 0x10A3A, + 0x10A3F, 0x10A3F, + 0x10AE5, 0x10AE6, + 0x10D24, 0x10D27, + 0x10EAB, 0x10EAC, + 0x10EFD, 0x10EFF, + 0x10F46, 0x10F50, + 0x10F82, 0x10F85, + 0x11001, 0x11001, + 0x11038, 0x11046, + 0x11070, 0x11070, + 0x11073, 0x11074, + 0x1107F, 0x11081, + 0x110B3, 0x110B6, + 0x110B9, 0x110BA, + 0x110C2, 0x110C2, + 0x11100, 0x11102, + 0x11127, 0x1112B, + 0x1112D, 0x11134, + 0x11173, 0x11173, + 0x11180, 0x11181, + 0x111B6, 0x111BE, + 0x111C9, 0x111CC, + 0x111CF, 0x111CF, + 0x1122F, 0x11231, + 0x11234, 0x11234, + 0x11236, 0x11237, + 0x1123E, 0x1123E, + 0x11241, 0x11241, + 0x112DF, 0x112DF, + 0x112E3, 0x112EA, + 0x11300, 0x11301, + 0x1133B, 0x1133C, + 0x11340, 0x11340, + 0x11366, 0x1136C, + 0x11370, 0x11374, + 0x11438, 0x1143F, + 0x11442, 0x11444, + 0x11446, 0x11446, + 0x1145E, 0x1145E, + 0x114B3, 0x114B8, + 0x114BA, 0x114BA, + 0x114BF, 0x114C0, + 0x114C2, 0x114C3, + 0x115B2, 0x115B5, + 0x115BC, 0x115BD, + 0x115BF, 0x115C0, + 0x115DC, 0x115DD, + 0x11633, 0x1163A, + 0x1163D, 0x1163D, + 0x1163F, 0x11640, + 0x116AB, 0x116AB, + 0x116AD, 0x116AD, + 0x116B0, 0x116B5, + 0x116B7, 0x116B7, + 0x1171D, 0x1171F, + 0x11722, 0x11725, + 0x11727, 0x1172B, + 0x1182F, 0x11837, + 0x11839, 0x1183A, + 0x1193B, 0x1193C, + 0x1193E, 0x1193E, + 0x11943, 0x11943, + 0x119D4, 0x119D7, + 0x119DA, 0x119DB, + 0x119E0, 0x119E0, + 0x11A01, 0x11A0A, + 0x11A33, 0x11A38, + 0x11A3B, 0x11A3E, + 0x11A47, 0x11A47, + 0x11A51, 0x11A56, + 0x11A59, 0x11A5B, + 0x11A8A, 0x11A96, + 0x11A98, 0x11A99, + 0x11C30, 0x11C36, + 0x11C38, 0x11C3D, + 0x11C3F, 0x11C3F, + 0x11C92, 0x11CA7, + 0x11CAA, 0x11CB0, + 0x11CB2, 0x11CB3, + 0x11CB5, 0x11CB6, + 0x11D31, 0x11D36, + 0x11D3A, 0x11D3A, + 0x11D3C, 0x11D3D, + 0x11D3F, 0x11D45, + 0x11D47, 0x11D47, + 0x11D90, 0x11D91, + 0x11D95, 0x11D95, + 0x11D97, 0x11D97, + 0x11EF3, 0x11EF4, + 0x11F00, 0x11F01, + 0x11F36, 0x11F3A, + 0x11F40, 0x11F40, + 0x11F42, 0x11F42, + 0x13440, 0x13440, + 0x13447, 0x13455, + 0x16AF0, 0x16AF4, + 0x16B30, 0x16B36, + 0x16F4F, 0x16F4F, + 0x16F8F, 0x16F92, + 0x16FE4, 0x16FE4, + 0x1BC9D, 0x1BC9E, + 0x1CF00, 0x1CF2D, + 0x1CF30, 0x1CF46, + 0x1D167, 0x1D169, + 0x1D17B, 0x1D182, + 0x1D185, 0x1D18B, + 0x1D1AA, 0x1D1AD, + 0x1D242, 0x1D244, + 0x1DA00, 0x1DA36, + 0x1DA3B, 0x1DA6C, + 0x1DA75, 0x1DA75, + 0x1DA84, 0x1DA84, + 0x1DA9B, 0x1DA9F, + 0x1DAA1, 0x1DAAF, + 0x1E000, 0x1E006, + 0x1E008, 0x1E018, + 0x1E01B, 0x1E021, + 0x1E023, 0x1E024, + 0x1E026, 0x1E02A, + 0x1E08F, 0x1E08F, + 0x1E130, 0x1E136, + 0x1E2AE, 0x1E2AE, + 0x1E2EC, 0x1E2EF, + 0x1E4EC, 0x1E4EF, + 0x1E8D0, 0x1E8D6, + 0x1E944, 0x1E94A, + 0xE0100, 0xE01EF, +}; + +static const int32_t ucg_emoji_extended_pictographic_ranges[] = { + 0x00A9, 0x00A9, + 0x00AE, 0x00AE, + 0x203C, 0x203C, + 0x2049, 0x2049, + 0x2122, 0x2122, + 0x2139, 0x2139, + 0x2194, 0x2199, + 0x21A9, 0x21AA, + 0x231A, 0x231B, + 0x2328, 0x2328, + 0x2388, 0x2388, + 0x23CF, 0x23CF, + 0x23E9, 0x23EC, + 0x23ED, 0x23EE, + 0x23EF, 0x23EF, + 0x23F0, 0x23F0, + 0x23F1, 0x23F2, + 0x23F3, 0x23F3, + 0x23F8, 0x23FA, + 0x24C2, 0x24C2, + 0x25AA, 0x25AB, + 0x25B6, 0x25B6, + 0x25C0, 0x25C0, + 0x25FB, 0x25FE, + 0x2600, 0x2601, + 0x2602, 0x2603, + 0x2604, 0x2604, + 0x2605, 0x2605, + 0x2607, 0x260D, + 0x260E, 0x260E, + 0x260F, 0x2610, + 0x2611, 0x2611, + 0x2612, 0x2612, + 0x2614, 0x2615, + 0x2616, 0x2617, + 0x2618, 0x2618, + 0x2619, 0x261C, + 0x261D, 0x261D, + 0x261E, 0x261F, + 0x2620, 0x2620, + 0x2621, 0x2621, + 0x2622, 0x2623, + 0x2624, 0x2625, + 0x2626, 0x2626, + 0x2627, 0x2629, + 0x262A, 0x262A, + 0x262B, 0x262D, + 0x262E, 0x262E, + 0x262F, 0x262F, + 0x2630, 0x2637, + 0x2638, 0x2639, + 0x263A, 0x263A, + 0x263B, 0x263F, + 0x2640, 0x2640, + 0x2641, 0x2641, + 0x2642, 0x2642, + 0x2643, 0x2647, + 0x2648, 0x2653, + 0x2654, 0x265E, + 0x265F, 0x265F, + 0x2660, 0x2660, + 0x2661, 0x2662, + 0x2663, 0x2663, + 0x2664, 0x2664, + 0x2665, 0x2666, + 0x2667, 0x2667, + 0x2668, 0x2668, + 0x2669, 0x267A, + 0x267B, 0x267B, + 0x267C, 0x267D, + 0x267E, 0x267E, + 0x267F, 0x267F, + 0x2680, 0x2685, + 0x2690, 0x2691, + 0x2692, 0x2692, + 0x2693, 0x2693, + 0x2694, 0x2694, + 0x2695, 0x2695, + 0x2696, 0x2697, + 0x2698, 0x2698, + 0x2699, 0x2699, + 0x269A, 0x269A, + 0x269B, 0x269C, + 0x269D, 0x269F, + 0x26A0, 0x26A1, + 0x26A2, 0x26A6, + 0x26A7, 0x26A7, + 0x26A8, 0x26A9, + 0x26AA, 0x26AB, + 0x26AC, 0x26AF, + 0x26B0, 0x26B1, + 0x26B2, 0x26BC, + 0x26BD, 0x26BE, + 0x26BF, 0x26C3, + 0x26C4, 0x26C5, + 0x26C6, 0x26C7, + 0x26C8, 0x26C8, + 0x26C9, 0x26CD, + 0x26CE, 0x26CE, + 0x26CF, 0x26CF, + 0x26D0, 0x26D0, + 0x26D1, 0x26D1, + 0x26D2, 0x26D2, + 0x26D3, 0x26D3, + 0x26D4, 0x26D4, + 0x26D5, 0x26E8, + 0x26E9, 0x26E9, + 0x26EA, 0x26EA, + 0x26EB, 0x26EF, + 0x26F0, 0x26F1, + 0x26F2, 0x26F3, + 0x26F4, 0x26F4, + 0x26F5, 0x26F5, + 0x26F6, 0x26F6, + 0x26F7, 0x26F9, + 0x26FA, 0x26FA, + 0x26FB, 0x26FC, + 0x26FD, 0x26FD, + 0x26FE, 0x2701, + 0x2702, 0x2702, + 0x2703, 0x2704, + 0x2705, 0x2705, + 0x2708, 0x270C, + 0x270D, 0x270D, + 0x270E, 0x270E, + 0x270F, 0x270F, + 0x2710, 0x2711, + 0x2712, 0x2712, + 0x2714, 0x2714, + 0x2716, 0x2716, + 0x271D, 0x271D, + 0x2721, 0x2721, + 0x2728, 0x2728, + 0x2733, 0x2734, + 0x2744, 0x2744, + 0x2747, 0x2747, + 0x274C, 0x274C, + 0x274E, 0x274E, + 0x2753, 0x2755, + 0x2757, 0x2757, + 0x2763, 0x2763, + 0x2764, 0x2764, + 0x2765, 0x2767, + 0x2795, 0x2797, + 0x27A1, 0x27A1, + 0x27B0, 0x27B0, + 0x27BF, 0x27BF, + 0x2934, 0x2935, + 0x2B05, 0x2B07, + 0x2B1B, 0x2B1C, + 0x2B50, 0x2B50, + 0x2B55, 0x2B55, + 0x3030, 0x3030, + 0x303D, 0x303D, + 0x3297, 0x3297, + 0x3299, 0x3299, + 0x1F000, 0x1F003, + 0x1F004, 0x1F004, + 0x1F005, 0x1F0CE, + 0x1F0CF, 0x1F0CF, + 0x1F0D0, 0x1F0FF, + 0x1F10D, 0x1F10F, + 0x1F12F, 0x1F12F, + 0x1F16C, 0x1F16F, + 0x1F170, 0x1F171, + 0x1F17E, 0x1F17F, + 0x1F18E, 0x1F18E, + 0x1F191, 0x1F19A, + 0x1F1AD, 0x1F1E5, + 0x1F201, 0x1F202, + 0x1F203, 0x1F20F, + 0x1F21A, 0x1F21A, + 0x1F22F, 0x1F22F, + 0x1F232, 0x1F23A, + 0x1F23C, 0x1F23F, + 0x1F249, 0x1F24F, + 0x1F250, 0x1F251, + 0x1F252, 0x1F2FF, + 0x1F300, 0x1F30C, + 0x1F30D, 0x1F30E, + 0x1F30F, 0x1F30F, + 0x1F310, 0x1F310, + 0x1F311, 0x1F311, + 0x1F312, 0x1F312, + 0x1F313, 0x1F315, + 0x1F316, 0x1F318, + 0x1F319, 0x1F319, + 0x1F31A, 0x1F31A, + 0x1F31B, 0x1F31B, + 0x1F31C, 0x1F31C, + 0x1F31D, 0x1F31E, + 0x1F31F, 0x1F320, + 0x1F321, 0x1F321, + 0x1F322, 0x1F323, + 0x1F324, 0x1F32C, + 0x1F32D, 0x1F32F, + 0x1F330, 0x1F331, + 0x1F332, 0x1F333, + 0x1F334, 0x1F335, + 0x1F336, 0x1F336, + 0x1F337, 0x1F34A, + 0x1F34B, 0x1F34B, + 0x1F34C, 0x1F34F, + 0x1F350, 0x1F350, + 0x1F351, 0x1F37B, + 0x1F37C, 0x1F37C, + 0x1F37D, 0x1F37D, + 0x1F37E, 0x1F37F, + 0x1F380, 0x1F393, + 0x1F394, 0x1F395, + 0x1F396, 0x1F397, + 0x1F398, 0x1F398, + 0x1F399, 0x1F39B, + 0x1F39C, 0x1F39D, + 0x1F39E, 0x1F39F, + 0x1F3A0, 0x1F3C4, + 0x1F3C5, 0x1F3C5, + 0x1F3C6, 0x1F3C6, + 0x1F3C7, 0x1F3C7, + 0x1F3C8, 0x1F3C8, + 0x1F3C9, 0x1F3C9, + 0x1F3CA, 0x1F3CA, + 0x1F3CB, 0x1F3CE, + 0x1F3CF, 0x1F3D3, + 0x1F3D4, 0x1F3DF, + 0x1F3E0, 0x1F3E3, + 0x1F3E4, 0x1F3E4, + 0x1F3E5, 0x1F3F0, + 0x1F3F1, 0x1F3F2, + 0x1F3F3, 0x1F3F3, + 0x1F3F4, 0x1F3F4, + 0x1F3F5, 0x1F3F5, + 0x1F3F6, 0x1F3F6, + 0x1F3F7, 0x1F3F7, + 0x1F3F8, 0x1F3FA, + 0x1F400, 0x1F407, + 0x1F408, 0x1F408, + 0x1F409, 0x1F40B, + 0x1F40C, 0x1F40E, + 0x1F40F, 0x1F410, + 0x1F411, 0x1F412, + 0x1F413, 0x1F413, + 0x1F414, 0x1F414, + 0x1F415, 0x1F415, + 0x1F416, 0x1F416, + 0x1F417, 0x1F429, + 0x1F42A, 0x1F42A, + 0x1F42B, 0x1F43E, + 0x1F43F, 0x1F43F, + 0x1F440, 0x1F440, + 0x1F441, 0x1F441, + 0x1F442, 0x1F464, + 0x1F465, 0x1F465, + 0x1F466, 0x1F46B, + 0x1F46C, 0x1F46D, + 0x1F46E, 0x1F4AC, + 0x1F4AD, 0x1F4AD, + 0x1F4AE, 0x1F4B5, + 0x1F4B6, 0x1F4B7, + 0x1F4B8, 0x1F4EB, + 0x1F4EC, 0x1F4ED, + 0x1F4EE, 0x1F4EE, + 0x1F4EF, 0x1F4EF, + 0x1F4F0, 0x1F4F4, + 0x1F4F5, 0x1F4F5, + 0x1F4F6, 0x1F4F7, + 0x1F4F8, 0x1F4F8, + 0x1F4F9, 0x1F4FC, + 0x1F4FD, 0x1F4FD, + 0x1F4FE, 0x1F4FE, + 0x1F4FF, 0x1F502, + 0x1F503, 0x1F503, + 0x1F504, 0x1F507, + 0x1F508, 0x1F508, + 0x1F509, 0x1F509, + 0x1F50A, 0x1F514, + 0x1F515, 0x1F515, + 0x1F516, 0x1F52B, + 0x1F52C, 0x1F52D, + 0x1F52E, 0x1F53D, + 0x1F546, 0x1F548, + 0x1F549, 0x1F54A, + 0x1F54B, 0x1F54E, + 0x1F54F, 0x1F54F, + 0x1F550, 0x1F55B, + 0x1F55C, 0x1F567, + 0x1F568, 0x1F56E, + 0x1F56F, 0x1F570, + 0x1F571, 0x1F572, + 0x1F573, 0x1F579, + 0x1F57A, 0x1F57A, + 0x1F57B, 0x1F586, + 0x1F587, 0x1F587, + 0x1F588, 0x1F589, + 0x1F58A, 0x1F58D, + 0x1F58E, 0x1F58F, + 0x1F590, 0x1F590, + 0x1F591, 0x1F594, + 0x1F595, 0x1F596, + 0x1F597, 0x1F5A3, + 0x1F5A4, 0x1F5A4, + 0x1F5A5, 0x1F5A5, + 0x1F5A6, 0x1F5A7, + 0x1F5A8, 0x1F5A8, + 0x1F5A9, 0x1F5B0, + 0x1F5B1, 0x1F5B2, + 0x1F5B3, 0x1F5BB, + 0x1F5BC, 0x1F5BC, + 0x1F5BD, 0x1F5C1, + 0x1F5C2, 0x1F5C4, + 0x1F5C5, 0x1F5D0, + 0x1F5D1, 0x1F5D3, + 0x1F5D4, 0x1F5DB, + 0x1F5DC, 0x1F5DE, + 0x1F5DF, 0x1F5E0, + 0x1F5E1, 0x1F5E1, + 0x1F5E2, 0x1F5E2, + 0x1F5E3, 0x1F5E3, + 0x1F5E4, 0x1F5E7, + 0x1F5E8, 0x1F5E8, + 0x1F5E9, 0x1F5EE, + 0x1F5EF, 0x1F5EF, + 0x1F5F0, 0x1F5F2, + 0x1F5F3, 0x1F5F3, + 0x1F5F4, 0x1F5F9, + 0x1F5FA, 0x1F5FA, + 0x1F5FB, 0x1F5FF, + 0x1F600, 0x1F600, + 0x1F601, 0x1F606, + 0x1F607, 0x1F608, + 0x1F609, 0x1F60D, + 0x1F60E, 0x1F60E, + 0x1F60F, 0x1F60F, + 0x1F610, 0x1F610, + 0x1F611, 0x1F611, + 0x1F612, 0x1F614, + 0x1F615, 0x1F615, + 0x1F616, 0x1F616, + 0x1F617, 0x1F617, + 0x1F618, 0x1F618, + 0x1F619, 0x1F619, + 0x1F61A, 0x1F61A, + 0x1F61B, 0x1F61B, + 0x1F61C, 0x1F61E, + 0x1F61F, 0x1F61F, + 0x1F620, 0x1F625, + 0x1F626, 0x1F627, + 0x1F628, 0x1F62B, + 0x1F62C, 0x1F62C, + 0x1F62D, 0x1F62D, + 0x1F62E, 0x1F62F, + 0x1F630, 0x1F633, + 0x1F634, 0x1F634, + 0x1F635, 0x1F635, + 0x1F636, 0x1F636, + 0x1F637, 0x1F640, + 0x1F641, 0x1F644, + 0x1F645, 0x1F64F, + 0x1F680, 0x1F680, + 0x1F681, 0x1F682, + 0x1F683, 0x1F685, + 0x1F686, 0x1F686, + 0x1F687, 0x1F687, + 0x1F688, 0x1F688, + 0x1F689, 0x1F689, + 0x1F68A, 0x1F68B, + 0x1F68C, 0x1F68C, + 0x1F68D, 0x1F68D, + 0x1F68E, 0x1F68E, + 0x1F68F, 0x1F68F, + 0x1F690, 0x1F690, + 0x1F691, 0x1F693, + 0x1F694, 0x1F694, + 0x1F695, 0x1F695, + 0x1F696, 0x1F696, + 0x1F697, 0x1F697, + 0x1F698, 0x1F698, + 0x1F699, 0x1F69A, + 0x1F69B, 0x1F6A1, + 0x1F6A2, 0x1F6A2, + 0x1F6A3, 0x1F6A3, + 0x1F6A4, 0x1F6A5, + 0x1F6A6, 0x1F6A6, + 0x1F6A7, 0x1F6AD, + 0x1F6AE, 0x1F6B1, + 0x1F6B2, 0x1F6B2, + 0x1F6B3, 0x1F6B5, + 0x1F6B6, 0x1F6B6, + 0x1F6B7, 0x1F6B8, + 0x1F6B9, 0x1F6BE, + 0x1F6BF, 0x1F6BF, + 0x1F6C0, 0x1F6C0, + 0x1F6C1, 0x1F6C5, + 0x1F6C6, 0x1F6CA, + 0x1F6CB, 0x1F6CB, + 0x1F6CC, 0x1F6CC, + 0x1F6CD, 0x1F6CF, + 0x1F6D0, 0x1F6D0, + 0x1F6D1, 0x1F6D2, + 0x1F6D3, 0x1F6D4, + 0x1F6D5, 0x1F6D5, + 0x1F6D6, 0x1F6D7, + 0x1F6D8, 0x1F6DB, + 0x1F6DC, 0x1F6DC, + 0x1F6DD, 0x1F6DF, + 0x1F6E0, 0x1F6E5, + 0x1F6E6, 0x1F6E8, + 0x1F6E9, 0x1F6E9, + 0x1F6EA, 0x1F6EA, + 0x1F6EB, 0x1F6EC, + 0x1F6ED, 0x1F6EF, + 0x1F6F0, 0x1F6F0, + 0x1F6F1, 0x1F6F2, + 0x1F6F3, 0x1F6F3, + 0x1F6F4, 0x1F6F6, + 0x1F6F7, 0x1F6F8, + 0x1F6F9, 0x1F6F9, + 0x1F6FA, 0x1F6FA, + 0x1F6FB, 0x1F6FC, + 0x1F6FD, 0x1F6FF, + 0x1F774, 0x1F77F, + 0x1F7D5, 0x1F7DF, + 0x1F7E0, 0x1F7EB, + 0x1F7EC, 0x1F7EF, + 0x1F7F0, 0x1F7F0, + 0x1F7F1, 0x1F7FF, + 0x1F80C, 0x1F80F, + 0x1F848, 0x1F84F, + 0x1F85A, 0x1F85F, + 0x1F888, 0x1F88F, + 0x1F8AE, 0x1F8FF, + 0x1F90C, 0x1F90C, + 0x1F90D, 0x1F90F, + 0x1F910, 0x1F918, + 0x1F919, 0x1F91E, + 0x1F91F, 0x1F91F, + 0x1F920, 0x1F927, + 0x1F928, 0x1F92F, + 0x1F930, 0x1F930, + 0x1F931, 0x1F932, + 0x1F933, 0x1F93A, + 0x1F93C, 0x1F93E, + 0x1F93F, 0x1F93F, + 0x1F940, 0x1F945, + 0x1F947, 0x1F94B, + 0x1F94C, 0x1F94C, + 0x1F94D, 0x1F94F, + 0x1F950, 0x1F95E, + 0x1F95F, 0x1F96B, + 0x1F96C, 0x1F970, + 0x1F971, 0x1F971, + 0x1F972, 0x1F972, + 0x1F973, 0x1F976, + 0x1F977, 0x1F978, + 0x1F979, 0x1F979, + 0x1F97A, 0x1F97A, + 0x1F97B, 0x1F97B, + 0x1F97C, 0x1F97F, + 0x1F980, 0x1F984, + 0x1F985, 0x1F991, + 0x1F992, 0x1F997, + 0x1F998, 0x1F9A2, + 0x1F9A3, 0x1F9A4, + 0x1F9A5, 0x1F9AA, + 0x1F9AB, 0x1F9AD, + 0x1F9AE, 0x1F9AF, + 0x1F9B0, 0x1F9B9, + 0x1F9BA, 0x1F9BF, + 0x1F9C0, 0x1F9C0, + 0x1F9C1, 0x1F9C2, + 0x1F9C3, 0x1F9CA, + 0x1F9CB, 0x1F9CB, + 0x1F9CC, 0x1F9CC, + 0x1F9CD, 0x1F9CF, + 0x1F9D0, 0x1F9E6, + 0x1F9E7, 0x1F9FF, + 0x1FA00, 0x1FA6F, + 0x1FA70, 0x1FA73, + 0x1FA74, 0x1FA74, + 0x1FA75, 0x1FA77, + 0x1FA78, 0x1FA7A, + 0x1FA7B, 0x1FA7C, + 0x1FA7D, 0x1FA7F, + 0x1FA80, 0x1FA82, + 0x1FA83, 0x1FA86, + 0x1FA87, 0x1FA88, + 0x1FA89, 0x1FA8F, + 0x1FA90, 0x1FA95, + 0x1FA96, 0x1FAA8, + 0x1FAA9, 0x1FAAC, + 0x1FAAD, 0x1FAAF, + 0x1FAB0, 0x1FAB6, + 0x1FAB7, 0x1FABA, + 0x1FABB, 0x1FABD, + 0x1FABE, 0x1FABE, + 0x1FABF, 0x1FABF, + 0x1FAC0, 0x1FAC2, + 0x1FAC3, 0x1FAC5, + 0x1FAC6, 0x1FACD, + 0x1FACE, 0x1FACF, + 0x1FAD0, 0x1FAD6, + 0x1FAD7, 0x1FAD9, + 0x1FADA, 0x1FADB, + 0x1FADC, 0x1FADF, + 0x1FAE0, 0x1FAE7, + 0x1FAE8, 0x1FAE8, + 0x1FAE9, 0x1FAEF, + 0x1FAF0, 0x1FAF6, + 0x1FAF7, 0x1FAF8, + 0x1FAF9, 0x1FAFF, + 0x1FC00, 0x1FFFD, +}; + +static const int32_t ucg_grapheme_extend_ranges[] = { + 0x0300, 0x036F, + 0x0483, 0x0487, + 0x0488, 0x0489, + 0x0591, 0x05BD, + 0x05BF, 0x05BF, + 0x05C1, 0x05C2, + 0x05C4, 0x05C5, + 0x05C7, 0x05C7, + 0x0610, 0x061A, + 0x064B, 0x065F, + 0x0670, 0x0670, + 0x06D6, 0x06DC, + 0x06DF, 0x06E4, + 0x06E7, 0x06E8, + 0x06EA, 0x06ED, + 0x0711, 0x0711, + 0x0730, 0x074A, + 0x07A6, 0x07B0, + 0x07EB, 0x07F3, + 0x07FD, 0x07FD, + 0x0816, 0x0819, + 0x081B, 0x0823, + 0x0825, 0x0827, + 0x0829, 0x082D, + 0x0859, 0x085B, + 0x0898, 0x089F, + 0x08CA, 0x08E1, + 0x08E3, 0x0902, + 0x093A, 0x093A, + 0x093C, 0x093C, + 0x0941, 0x0948, + 0x094D, 0x094D, + 0x0951, 0x0957, + 0x0962, 0x0963, + 0x0981, 0x0981, + 0x09BC, 0x09BC, + 0x09BE, 0x09BE, + 0x09C1, 0x09C4, + 0x09CD, 0x09CD, + 0x09D7, 0x09D7, + 0x09E2, 0x09E3, + 0x09FE, 0x09FE, + 0x0A01, 0x0A02, + 0x0A3C, 0x0A3C, + 0x0A41, 0x0A42, + 0x0A47, 0x0A48, + 0x0A4B, 0x0A4D, + 0x0A51, 0x0A51, + 0x0A70, 0x0A71, + 0x0A75, 0x0A75, + 0x0A81, 0x0A82, + 0x0ABC, 0x0ABC, + 0x0AC1, 0x0AC5, + 0x0AC7, 0x0AC8, + 0x0ACD, 0x0ACD, + 0x0AE2, 0x0AE3, + 0x0AFA, 0x0AFF, + 0x0B01, 0x0B01, + 0x0B3C, 0x0B3C, + 0x0B3E, 0x0B3E, + 0x0B3F, 0x0B3F, + 0x0B41, 0x0B44, + 0x0B4D, 0x0B4D, + 0x0B55, 0x0B56, + 0x0B57, 0x0B57, + 0x0B62, 0x0B63, + 0x0B82, 0x0B82, + 0x0BBE, 0x0BBE, + 0x0BC0, 0x0BC0, + 0x0BCD, 0x0BCD, + 0x0BD7, 0x0BD7, + 0x0C00, 0x0C00, + 0x0C04, 0x0C04, + 0x0C3C, 0x0C3C, + 0x0C3E, 0x0C40, + 0x0C46, 0x0C48, + 0x0C4A, 0x0C4D, + 0x0C55, 0x0C56, + 0x0C62, 0x0C63, + 0x0C81, 0x0C81, + 0x0CBC, 0x0CBC, + 0x0CBF, 0x0CBF, + 0x0CC2, 0x0CC2, + 0x0CC6, 0x0CC6, + 0x0CCC, 0x0CCD, + 0x0CD5, 0x0CD6, + 0x0CE2, 0x0CE3, + 0x0D00, 0x0D01, + 0x0D3B, 0x0D3C, + 0x0D3E, 0x0D3E, + 0x0D41, 0x0D44, + 0x0D4D, 0x0D4D, + 0x0D57, 0x0D57, + 0x0D62, 0x0D63, + 0x0D81, 0x0D81, + 0x0DCA, 0x0DCA, + 0x0DCF, 0x0DCF, + 0x0DD2, 0x0DD4, + 0x0DD6, 0x0DD6, + 0x0DDF, 0x0DDF, + 0x0E31, 0x0E31, + 0x0E34, 0x0E3A, + 0x0E47, 0x0E4E, + 0x0EB1, 0x0EB1, + 0x0EB4, 0x0EBC, + 0x0EC8, 0x0ECE, + 0x0F18, 0x0F19, + 0x0F35, 0x0F35, + 0x0F37, 0x0F37, + 0x0F39, 0x0F39, + 0x0F71, 0x0F7E, + 0x0F80, 0x0F84, + 0x0F86, 0x0F87, + 0x0F8D, 0x0F97, + 0x0F99, 0x0FBC, + 0x0FC6, 0x0FC6, + 0x102D, 0x1030, + 0x1032, 0x1037, + 0x1039, 0x103A, + 0x103D, 0x103E, + 0x1058, 0x1059, + 0x105E, 0x1060, + 0x1071, 0x1074, + 0x1082, 0x1082, + 0x1085, 0x1086, + 0x108D, 0x108D, + 0x109D, 0x109D, + 0x135D, 0x135F, + 0x1712, 0x1714, + 0x1732, 0x1733, + 0x1752, 0x1753, + 0x1772, 0x1773, + 0x17B4, 0x17B5, + 0x17B7, 0x17BD, + 0x17C6, 0x17C6, + 0x17C9, 0x17D3, + 0x17DD, 0x17DD, + 0x180B, 0x180D, + 0x180F, 0x180F, + 0x1885, 0x1886, + 0x18A9, 0x18A9, + 0x1920, 0x1922, + 0x1927, 0x1928, + 0x1932, 0x1932, + 0x1939, 0x193B, + 0x1A17, 0x1A18, + 0x1A1B, 0x1A1B, + 0x1A56, 0x1A56, + 0x1A58, 0x1A5E, + 0x1A60, 0x1A60, + 0x1A62, 0x1A62, + 0x1A65, 0x1A6C, + 0x1A73, 0x1A7C, + 0x1A7F, 0x1A7F, + 0x1AB0, 0x1ABD, + 0x1ABE, 0x1ABE, + 0x1ABF, 0x1ACE, + 0x1B00, 0x1B03, + 0x1B34, 0x1B34, + 0x1B35, 0x1B35, + 0x1B36, 0x1B3A, + 0x1B3C, 0x1B3C, + 0x1B42, 0x1B42, + 0x1B6B, 0x1B73, + 0x1B80, 0x1B81, + 0x1BA2, 0x1BA5, + 0x1BA8, 0x1BA9, + 0x1BAB, 0x1BAD, + 0x1BE6, 0x1BE6, + 0x1BE8, 0x1BE9, + 0x1BED, 0x1BED, + 0x1BEF, 0x1BF1, + 0x1C2C, 0x1C33, + 0x1C36, 0x1C37, + 0x1CD0, 0x1CD2, + 0x1CD4, 0x1CE0, + 0x1CE2, 0x1CE8, + 0x1CED, 0x1CED, + 0x1CF4, 0x1CF4, + 0x1CF8, 0x1CF9, + 0x1DC0, 0x1DFF, + 0x200C, 0x200C, + 0x20D0, 0x20DC, + 0x20DD, 0x20E0, + 0x20E1, 0x20E1, + 0x20E2, 0x20E4, + 0x20E5, 0x20F0, + 0x2CEF, 0x2CF1, + 0x2D7F, 0x2D7F, + 0x2DE0, 0x2DFF, + 0x302A, 0x302D, + 0x302E, 0x302F, + 0x3099, 0x309A, + 0xA66F, 0xA66F, + 0xA670, 0xA672, + 0xA674, 0xA67D, + 0xA69E, 0xA69F, + 0xA6F0, 0xA6F1, + 0xA802, 0xA802, + 0xA806, 0xA806, + 0xA80B, 0xA80B, + 0xA825, 0xA826, + 0xA82C, 0xA82C, + 0xA8C4, 0xA8C5, + 0xA8E0, 0xA8F1, + 0xA8FF, 0xA8FF, + 0xA926, 0xA92D, + 0xA947, 0xA951, + 0xA980, 0xA982, + 0xA9B3, 0xA9B3, + 0xA9B6, 0xA9B9, + 0xA9BC, 0xA9BD, + 0xA9E5, 0xA9E5, + 0xAA29, 0xAA2E, + 0xAA31, 0xAA32, + 0xAA35, 0xAA36, + 0xAA43, 0xAA43, + 0xAA4C, 0xAA4C, + 0xAA7C, 0xAA7C, + 0xAAB0, 0xAAB0, + 0xAAB2, 0xAAB4, + 0xAAB7, 0xAAB8, + 0xAABE, 0xAABF, + 0xAAC1, 0xAAC1, + 0xAAEC, 0xAAED, + 0xAAF6, 0xAAF6, + 0xABE5, 0xABE5, + 0xABE8, 0xABE8, + 0xABED, 0xABED, + 0xFB1E, 0xFB1E, + 0xFE00, 0xFE0F, + 0xFE20, 0xFE2F, + 0xFF9E, 0xFF9F, + 0x101FD, 0x101FD, + 0x102E0, 0x102E0, + 0x10376, 0x1037A, + 0x10A01, 0x10A03, + 0x10A05, 0x10A06, + 0x10A0C, 0x10A0F, + 0x10A38, 0x10A3A, + 0x10A3F, 0x10A3F, + 0x10AE5, 0x10AE6, + 0x10D24, 0x10D27, + 0x10EAB, 0x10EAC, + 0x10EFD, 0x10EFF, + 0x10F46, 0x10F50, + 0x10F82, 0x10F85, + 0x11001, 0x11001, + 0x11038, 0x11046, + 0x11070, 0x11070, + 0x11073, 0x11074, + 0x1107F, 0x11081, + 0x110B3, 0x110B6, + 0x110B9, 0x110BA, + 0x110C2, 0x110C2, + 0x11100, 0x11102, + 0x11127, 0x1112B, + 0x1112D, 0x11134, + 0x11173, 0x11173, + 0x11180, 0x11181, + 0x111B6, 0x111BE, + 0x111C9, 0x111CC, + 0x111CF, 0x111CF, + 0x1122F, 0x11231, + 0x11234, 0x11234, + 0x11236, 0x11237, + 0x1123E, 0x1123E, + 0x11241, 0x11241, + 0x112DF, 0x112DF, + 0x112E3, 0x112EA, + 0x11300, 0x11301, + 0x1133B, 0x1133C, + 0x1133E, 0x1133E, + 0x11340, 0x11340, + 0x11357, 0x11357, + 0x11366, 0x1136C, + 0x11370, 0x11374, + 0x11438, 0x1143F, + 0x11442, 0x11444, + 0x11446, 0x11446, + 0x1145E, 0x1145E, + 0x114B0, 0x114B0, + 0x114B3, 0x114B8, + 0x114BA, 0x114BA, + 0x114BD, 0x114BD, + 0x114BF, 0x114C0, + 0x114C2, 0x114C3, + 0x115AF, 0x115AF, + 0x115B2, 0x115B5, + 0x115BC, 0x115BD, + 0x115BF, 0x115C0, + 0x115DC, 0x115DD, + 0x11633, 0x1163A, + 0x1163D, 0x1163D, + 0x1163F, 0x11640, + 0x116AB, 0x116AB, + 0x116AD, 0x116AD, + 0x116B0, 0x116B5, + 0x116B7, 0x116B7, + 0x1171D, 0x1171F, + 0x11722, 0x11725, + 0x11727, 0x1172B, + 0x1182F, 0x11837, + 0x11839, 0x1183A, + 0x11930, 0x11930, + 0x1193B, 0x1193C, + 0x1193E, 0x1193E, + 0x11943, 0x11943, + 0x119D4, 0x119D7, + 0x119DA, 0x119DB, + 0x119E0, 0x119E0, + 0x11A01, 0x11A0A, + 0x11A33, 0x11A38, + 0x11A3B, 0x11A3E, + 0x11A47, 0x11A47, + 0x11A51, 0x11A56, + 0x11A59, 0x11A5B, + 0x11A8A, 0x11A96, + 0x11A98, 0x11A99, + 0x11C30, 0x11C36, + 0x11C38, 0x11C3D, + 0x11C3F, 0x11C3F, + 0x11C92, 0x11CA7, + 0x11CAA, 0x11CB0, + 0x11CB2, 0x11CB3, + 0x11CB5, 0x11CB6, + 0x11D31, 0x11D36, + 0x11D3A, 0x11D3A, + 0x11D3C, 0x11D3D, + 0x11D3F, 0x11D45, + 0x11D47, 0x11D47, + 0x11D90, 0x11D91, + 0x11D95, 0x11D95, + 0x11D97, 0x11D97, + 0x11EF3, 0x11EF4, + 0x11F00, 0x11F01, + 0x11F36, 0x11F3A, + 0x11F40, 0x11F40, + 0x11F42, 0x11F42, + 0x13440, 0x13440, + 0x13447, 0x13455, + 0x16AF0, 0x16AF4, + 0x16B30, 0x16B36, + 0x16F4F, 0x16F4F, + 0x16F8F, 0x16F92, + 0x16FE4, 0x16FE4, + 0x1BC9D, 0x1BC9E, + 0x1CF00, 0x1CF2D, + 0x1CF30, 0x1CF46, + 0x1D165, 0x1D165, + 0x1D167, 0x1D169, + 0x1D16E, 0x1D172, + 0x1D17B, 0x1D182, + 0x1D185, 0x1D18B, + 0x1D1AA, 0x1D1AD, + 0x1D242, 0x1D244, + 0x1DA00, 0x1DA36, + 0x1DA3B, 0x1DA6C, + 0x1DA75, 0x1DA75, + 0x1DA84, 0x1DA84, + 0x1DA9B, 0x1DA9F, + 0x1DAA1, 0x1DAAF, + 0x1E000, 0x1E006, + 0x1E008, 0x1E018, + 0x1E01B, 0x1E021, + 0x1E023, 0x1E024, + 0x1E026, 0x1E02A, + 0x1E08F, 0x1E08F, + 0x1E130, 0x1E136, + 0x1E2AE, 0x1E2AE, + 0x1E2EC, 0x1E2EF, + 0x1E4EC, 0x1E4EF, + 0x1E8D0, 0x1E8D6, + 0x1E944, 0x1E94A, + 0xE0020, 0xE007F, + 0xE0100, 0xE01EF, +}; + +static const int32_t ucg_hangul_syllable_lv_singlets[] = { + 0xAC00, + 0xAC1C, + 0xAC38, + 0xAC54, + 0xAC70, + 0xAC8C, + 0xACA8, + 0xACC4, + 0xACE0, + 0xACFC, + 0xAD18, + 0xAD34, + 0xAD50, + 0xAD6C, + 0xAD88, + 0xADA4, + 0xADC0, + 0xADDC, + 0xADF8, + 0xAE14, + 0xAE30, + 0xAE4C, + 0xAE68, + 0xAE84, + 0xAEA0, + 0xAEBC, + 0xAED8, + 0xAEF4, + 0xAF10, + 0xAF2C, + 0xAF48, + 0xAF64, + 0xAF80, + 0xAF9C, + 0xAFB8, + 0xAFD4, + 0xAFF0, + 0xB00C, + 0xB028, + 0xB044, + 0xB060, + 0xB07C, + 0xB098, + 0xB0B4, + 0xB0D0, + 0xB0EC, + 0xB108, + 0xB124, + 0xB140, + 0xB15C, + 0xB178, + 0xB194, + 0xB1B0, + 0xB1CC, + 0xB1E8, + 0xB204, + 0xB220, + 0xB23C, + 0xB258, + 0xB274, + 0xB290, + 0xB2AC, + 0xB2C8, + 0xB2E4, + 0xB300, + 0xB31C, + 0xB338, + 0xB354, + 0xB370, + 0xB38C, + 0xB3A8, + 0xB3C4, + 0xB3E0, + 0xB3FC, + 0xB418, + 0xB434, + 0xB450, + 0xB46C, + 0xB488, + 0xB4A4, + 0xB4C0, + 0xB4DC, + 0xB4F8, + 0xB514, + 0xB530, + 0xB54C, + 0xB568, + 0xB584, + 0xB5A0, + 0xB5BC, + 0xB5D8, + 0xB5F4, + 0xB610, + 0xB62C, + 0xB648, + 0xB664, + 0xB680, + 0xB69C, + 0xB6B8, + 0xB6D4, + 0xB6F0, + 0xB70C, + 0xB728, + 0xB744, + 0xB760, + 0xB77C, + 0xB798, + 0xB7B4, + 0xB7D0, + 0xB7EC, + 0xB808, + 0xB824, + 0xB840, + 0xB85C, + 0xB878, + 0xB894, + 0xB8B0, + 0xB8CC, + 0xB8E8, + 0xB904, + 0xB920, + 0xB93C, + 0xB958, + 0xB974, + 0xB990, + 0xB9AC, + 0xB9C8, + 0xB9E4, + 0xBA00, + 0xBA1C, + 0xBA38, + 0xBA54, + 0xBA70, + 0xBA8C, + 0xBAA8, + 0xBAC4, + 0xBAE0, + 0xBAFC, + 0xBB18, + 0xBB34, + 0xBB50, + 0xBB6C, + 0xBB88, + 0xBBA4, + 0xBBC0, + 0xBBDC, + 0xBBF8, + 0xBC14, + 0xBC30, + 0xBC4C, + 0xBC68, + 0xBC84, + 0xBCA0, + 0xBCBC, + 0xBCD8, + 0xBCF4, + 0xBD10, + 0xBD2C, + 0xBD48, + 0xBD64, + 0xBD80, + 0xBD9C, + 0xBDB8, + 0xBDD4, + 0xBDF0, + 0xBE0C, + 0xBE28, + 0xBE44, + 0xBE60, + 0xBE7C, + 0xBE98, + 0xBEB4, + 0xBED0, + 0xBEEC, + 0xBF08, + 0xBF24, + 0xBF40, + 0xBF5C, + 0xBF78, + 0xBF94, + 0xBFB0, + 0xBFCC, + 0xBFE8, + 0xC004, + 0xC020, + 0xC03C, + 0xC058, + 0xC074, + 0xC090, + 0xC0AC, + 0xC0C8, + 0xC0E4, + 0xC100, + 0xC11C, + 0xC138, + 0xC154, + 0xC170, + 0xC18C, + 0xC1A8, + 0xC1C4, + 0xC1E0, + 0xC1FC, + 0xC218, + 0xC234, + 0xC250, + 0xC26C, + 0xC288, + 0xC2A4, + 0xC2C0, + 0xC2DC, + 0xC2F8, + 0xC314, + 0xC330, + 0xC34C, + 0xC368, + 0xC384, + 0xC3A0, + 0xC3BC, + 0xC3D8, + 0xC3F4, + 0xC410, + 0xC42C, + 0xC448, + 0xC464, + 0xC480, + 0xC49C, + 0xC4B8, + 0xC4D4, + 0xC4F0, + 0xC50C, + 0xC528, + 0xC544, + 0xC560, + 0xC57C, + 0xC598, + 0xC5B4, + 0xC5D0, + 0xC5EC, + 0xC608, + 0xC624, + 0xC640, + 0xC65C, + 0xC678, + 0xC694, + 0xC6B0, + 0xC6CC, + 0xC6E8, + 0xC704, + 0xC720, + 0xC73C, + 0xC758, + 0xC774, + 0xC790, + 0xC7AC, + 0xC7C8, + 0xC7E4, + 0xC800, + 0xC81C, + 0xC838, + 0xC854, + 0xC870, + 0xC88C, + 0xC8A8, + 0xC8C4, + 0xC8E0, + 0xC8FC, + 0xC918, + 0xC934, + 0xC950, + 0xC96C, + 0xC988, + 0xC9A4, + 0xC9C0, + 0xC9DC, + 0xC9F8, + 0xCA14, + 0xCA30, + 0xCA4C, + 0xCA68, + 0xCA84, + 0xCAA0, + 0xCABC, + 0xCAD8, + 0xCAF4, + 0xCB10, + 0xCB2C, + 0xCB48, + 0xCB64, + 0xCB80, + 0xCB9C, + 0xCBB8, + 0xCBD4, + 0xCBF0, + 0xCC0C, + 0xCC28, + 0xCC44, + 0xCC60, + 0xCC7C, + 0xCC98, + 0xCCB4, + 0xCCD0, + 0xCCEC, + 0xCD08, + 0xCD24, + 0xCD40, + 0xCD5C, + 0xCD78, + 0xCD94, + 0xCDB0, + 0xCDCC, + 0xCDE8, + 0xCE04, + 0xCE20, + 0xCE3C, + 0xCE58, + 0xCE74, + 0xCE90, + 0xCEAC, + 0xCEC8, + 0xCEE4, + 0xCF00, + 0xCF1C, + 0xCF38, + 0xCF54, + 0xCF70, + 0xCF8C, + 0xCFA8, + 0xCFC4, + 0xCFE0, + 0xCFFC, + 0xD018, + 0xD034, + 0xD050, + 0xD06C, + 0xD088, + 0xD0A4, + 0xD0C0, + 0xD0DC, + 0xD0F8, + 0xD114, + 0xD130, + 0xD14C, + 0xD168, + 0xD184, + 0xD1A0, + 0xD1BC, + 0xD1D8, + 0xD1F4, + 0xD210, + 0xD22C, + 0xD248, + 0xD264, + 0xD280, + 0xD29C, + 0xD2B8, + 0xD2D4, + 0xD2F0, + 0xD30C, + 0xD328, + 0xD344, + 0xD360, + 0xD37C, + 0xD398, + 0xD3B4, + 0xD3D0, + 0xD3EC, + 0xD408, + 0xD424, + 0xD440, + 0xD45C, + 0xD478, + 0xD494, + 0xD4B0, + 0xD4CC, + 0xD4E8, + 0xD504, + 0xD520, + 0xD53C, + 0xD558, + 0xD574, + 0xD590, + 0xD5AC, + 0xD5C8, + 0xD5E4, + 0xD600, + 0xD61C, + 0xD638, + 0xD654, + 0xD670, + 0xD68C, + 0xD6A8, + 0xD6C4, + 0xD6E0, + 0xD6FC, + 0xD718, + 0xD734, + 0xD750, + 0xD76C, + 0xD788, +}; + +static const int32_t ucg_hangul_syllable_lvt_ranges[] = { + 0xAC01, 0xAC1B, + 0xAC1D, 0xAC37, + 0xAC39, 0xAC53, + 0xAC55, 0xAC6F, + 0xAC71, 0xAC8B, + 0xAC8D, 0xACA7, + 0xACA9, 0xACC3, + 0xACC5, 0xACDF, + 0xACE1, 0xACFB, + 0xACFD, 0xAD17, + 0xAD19, 0xAD33, + 0xAD35, 0xAD4F, + 0xAD51, 0xAD6B, + 0xAD6D, 0xAD87, + 0xAD89, 0xADA3, + 0xADA5, 0xADBF, + 0xADC1, 0xADDB, + 0xADDD, 0xADF7, + 0xADF9, 0xAE13, + 0xAE15, 0xAE2F, + 0xAE31, 0xAE4B, + 0xAE4D, 0xAE67, + 0xAE69, 0xAE83, + 0xAE85, 0xAE9F, + 0xAEA1, 0xAEBB, + 0xAEBD, 0xAED7, + 0xAED9, 0xAEF3, + 0xAEF5, 0xAF0F, + 0xAF11, 0xAF2B, + 0xAF2D, 0xAF47, + 0xAF49, 0xAF63, + 0xAF65, 0xAF7F, + 0xAF81, 0xAF9B, + 0xAF9D, 0xAFB7, + 0xAFB9, 0xAFD3, + 0xAFD5, 0xAFEF, + 0xAFF1, 0xB00B, + 0xB00D, 0xB027, + 0xB029, 0xB043, + 0xB045, 0xB05F, + 0xB061, 0xB07B, + 0xB07D, 0xB097, + 0xB099, 0xB0B3, + 0xB0B5, 0xB0CF, + 0xB0D1, 0xB0EB, + 0xB0ED, 0xB107, + 0xB109, 0xB123, + 0xB125, 0xB13F, + 0xB141, 0xB15B, + 0xB15D, 0xB177, + 0xB179, 0xB193, + 0xB195, 0xB1AF, + 0xB1B1, 0xB1CB, + 0xB1CD, 0xB1E7, + 0xB1E9, 0xB203, + 0xB205, 0xB21F, + 0xB221, 0xB23B, + 0xB23D, 0xB257, + 0xB259, 0xB273, + 0xB275, 0xB28F, + 0xB291, 0xB2AB, + 0xB2AD, 0xB2C7, + 0xB2C9, 0xB2E3, + 0xB2E5, 0xB2FF, + 0xB301, 0xB31B, + 0xB31D, 0xB337, + 0xB339, 0xB353, + 0xB355, 0xB36F, + 0xB371, 0xB38B, + 0xB38D, 0xB3A7, + 0xB3A9, 0xB3C3, + 0xB3C5, 0xB3DF, + 0xB3E1, 0xB3FB, + 0xB3FD, 0xB417, + 0xB419, 0xB433, + 0xB435, 0xB44F, + 0xB451, 0xB46B, + 0xB46D, 0xB487, + 0xB489, 0xB4A3, + 0xB4A5, 0xB4BF, + 0xB4C1, 0xB4DB, + 0xB4DD, 0xB4F7, + 0xB4F9, 0xB513, + 0xB515, 0xB52F, + 0xB531, 0xB54B, + 0xB54D, 0xB567, + 0xB569, 0xB583, + 0xB585, 0xB59F, + 0xB5A1, 0xB5BB, + 0xB5BD, 0xB5D7, + 0xB5D9, 0xB5F3, + 0xB5F5, 0xB60F, + 0xB611, 0xB62B, + 0xB62D, 0xB647, + 0xB649, 0xB663, + 0xB665, 0xB67F, + 0xB681, 0xB69B, + 0xB69D, 0xB6B7, + 0xB6B9, 0xB6D3, + 0xB6D5, 0xB6EF, + 0xB6F1, 0xB70B, + 0xB70D, 0xB727, + 0xB729, 0xB743, + 0xB745, 0xB75F, + 0xB761, 0xB77B, + 0xB77D, 0xB797, + 0xB799, 0xB7B3, + 0xB7B5, 0xB7CF, + 0xB7D1, 0xB7EB, + 0xB7ED, 0xB807, + 0xB809, 0xB823, + 0xB825, 0xB83F, + 0xB841, 0xB85B, + 0xB85D, 0xB877, + 0xB879, 0xB893, + 0xB895, 0xB8AF, + 0xB8B1, 0xB8CB, + 0xB8CD, 0xB8E7, + 0xB8E9, 0xB903, + 0xB905, 0xB91F, + 0xB921, 0xB93B, + 0xB93D, 0xB957, + 0xB959, 0xB973, + 0xB975, 0xB98F, + 0xB991, 0xB9AB, + 0xB9AD, 0xB9C7, + 0xB9C9, 0xB9E3, + 0xB9E5, 0xB9FF, + 0xBA01, 0xBA1B, + 0xBA1D, 0xBA37, + 0xBA39, 0xBA53, + 0xBA55, 0xBA6F, + 0xBA71, 0xBA8B, + 0xBA8D, 0xBAA7, + 0xBAA9, 0xBAC3, + 0xBAC5, 0xBADF, + 0xBAE1, 0xBAFB, + 0xBAFD, 0xBB17, + 0xBB19, 0xBB33, + 0xBB35, 0xBB4F, + 0xBB51, 0xBB6B, + 0xBB6D, 0xBB87, + 0xBB89, 0xBBA3, + 0xBBA5, 0xBBBF, + 0xBBC1, 0xBBDB, + 0xBBDD, 0xBBF7, + 0xBBF9, 0xBC13, + 0xBC15, 0xBC2F, + 0xBC31, 0xBC4B, + 0xBC4D, 0xBC67, + 0xBC69, 0xBC83, + 0xBC85, 0xBC9F, + 0xBCA1, 0xBCBB, + 0xBCBD, 0xBCD7, + 0xBCD9, 0xBCF3, + 0xBCF5, 0xBD0F, + 0xBD11, 0xBD2B, + 0xBD2D, 0xBD47, + 0xBD49, 0xBD63, + 0xBD65, 0xBD7F, + 0xBD81, 0xBD9B, + 0xBD9D, 0xBDB7, + 0xBDB9, 0xBDD3, + 0xBDD5, 0xBDEF, + 0xBDF1, 0xBE0B, + 0xBE0D, 0xBE27, + 0xBE29, 0xBE43, + 0xBE45, 0xBE5F, + 0xBE61, 0xBE7B, + 0xBE7D, 0xBE97, + 0xBE99, 0xBEB3, + 0xBEB5, 0xBECF, + 0xBED1, 0xBEEB, + 0xBEED, 0xBF07, + 0xBF09, 0xBF23, + 0xBF25, 0xBF3F, + 0xBF41, 0xBF5B, + 0xBF5D, 0xBF77, + 0xBF79, 0xBF93, + 0xBF95, 0xBFAF, + 0xBFB1, 0xBFCB, + 0xBFCD, 0xBFE7, + 0xBFE9, 0xC003, + 0xC005, 0xC01F, + 0xC021, 0xC03B, + 0xC03D, 0xC057, + 0xC059, 0xC073, + 0xC075, 0xC08F, + 0xC091, 0xC0AB, + 0xC0AD, 0xC0C7, + 0xC0C9, 0xC0E3, + 0xC0E5, 0xC0FF, + 0xC101, 0xC11B, + 0xC11D, 0xC137, + 0xC139, 0xC153, + 0xC155, 0xC16F, + 0xC171, 0xC18B, + 0xC18D, 0xC1A7, + 0xC1A9, 0xC1C3, + 0xC1C5, 0xC1DF, + 0xC1E1, 0xC1FB, + 0xC1FD, 0xC217, + 0xC219, 0xC233, + 0xC235, 0xC24F, + 0xC251, 0xC26B, + 0xC26D, 0xC287, + 0xC289, 0xC2A3, + 0xC2A5, 0xC2BF, + 0xC2C1, 0xC2DB, + 0xC2DD, 0xC2F7, + 0xC2F9, 0xC313, + 0xC315, 0xC32F, + 0xC331, 0xC34B, + 0xC34D, 0xC367, + 0xC369, 0xC383, + 0xC385, 0xC39F, + 0xC3A1, 0xC3BB, + 0xC3BD, 0xC3D7, + 0xC3D9, 0xC3F3, + 0xC3F5, 0xC40F, + 0xC411, 0xC42B, + 0xC42D, 0xC447, + 0xC449, 0xC463, + 0xC465, 0xC47F, + 0xC481, 0xC49B, + 0xC49D, 0xC4B7, + 0xC4B9, 0xC4D3, + 0xC4D5, 0xC4EF, + 0xC4F1, 0xC50B, + 0xC50D, 0xC527, + 0xC529, 0xC543, + 0xC545, 0xC55F, + 0xC561, 0xC57B, + 0xC57D, 0xC597, + 0xC599, 0xC5B3, + 0xC5B5, 0xC5CF, + 0xC5D1, 0xC5EB, + 0xC5ED, 0xC607, + 0xC609, 0xC623, + 0xC625, 0xC63F, + 0xC641, 0xC65B, + 0xC65D, 0xC677, + 0xC679, 0xC693, + 0xC695, 0xC6AF, + 0xC6B1, 0xC6CB, + 0xC6CD, 0xC6E7, + 0xC6E9, 0xC703, + 0xC705, 0xC71F, + 0xC721, 0xC73B, + 0xC73D, 0xC757, + 0xC759, 0xC773, + 0xC775, 0xC78F, + 0xC791, 0xC7AB, + 0xC7AD, 0xC7C7, + 0xC7C9, 0xC7E3, + 0xC7E5, 0xC7FF, + 0xC801, 0xC81B, + 0xC81D, 0xC837, + 0xC839, 0xC853, + 0xC855, 0xC86F, + 0xC871, 0xC88B, + 0xC88D, 0xC8A7, + 0xC8A9, 0xC8C3, + 0xC8C5, 0xC8DF, + 0xC8E1, 0xC8FB, + 0xC8FD, 0xC917, + 0xC919, 0xC933, + 0xC935, 0xC94F, + 0xC951, 0xC96B, + 0xC96D, 0xC987, + 0xC989, 0xC9A3, + 0xC9A5, 0xC9BF, + 0xC9C1, 0xC9DB, + 0xC9DD, 0xC9F7, + 0xC9F9, 0xCA13, + 0xCA15, 0xCA2F, + 0xCA31, 0xCA4B, + 0xCA4D, 0xCA67, + 0xCA69, 0xCA83, + 0xCA85, 0xCA9F, + 0xCAA1, 0xCABB, + 0xCABD, 0xCAD7, + 0xCAD9, 0xCAF3, + 0xCAF5, 0xCB0F, + 0xCB11, 0xCB2B, + 0xCB2D, 0xCB47, + 0xCB49, 0xCB63, + 0xCB65, 0xCB7F, + 0xCB81, 0xCB9B, + 0xCB9D, 0xCBB7, + 0xCBB9, 0xCBD3, + 0xCBD5, 0xCBEF, + 0xCBF1, 0xCC0B, + 0xCC0D, 0xCC27, + 0xCC29, 0xCC43, + 0xCC45, 0xCC5F, + 0xCC61, 0xCC7B, + 0xCC7D, 0xCC97, + 0xCC99, 0xCCB3, + 0xCCB5, 0xCCCF, + 0xCCD1, 0xCCEB, + 0xCCED, 0xCD07, + 0xCD09, 0xCD23, + 0xCD25, 0xCD3F, + 0xCD41, 0xCD5B, + 0xCD5D, 0xCD77, + 0xCD79, 0xCD93, + 0xCD95, 0xCDAF, + 0xCDB1, 0xCDCB, + 0xCDCD, 0xCDE7, + 0xCDE9, 0xCE03, + 0xCE05, 0xCE1F, + 0xCE21, 0xCE3B, + 0xCE3D, 0xCE57, + 0xCE59, 0xCE73, + 0xCE75, 0xCE8F, + 0xCE91, 0xCEAB, + 0xCEAD, 0xCEC7, + 0xCEC9, 0xCEE3, + 0xCEE5, 0xCEFF, + 0xCF01, 0xCF1B, + 0xCF1D, 0xCF37, + 0xCF39, 0xCF53, + 0xCF55, 0xCF6F, + 0xCF71, 0xCF8B, + 0xCF8D, 0xCFA7, + 0xCFA9, 0xCFC3, + 0xCFC5, 0xCFDF, + 0xCFE1, 0xCFFB, + 0xCFFD, 0xD017, + 0xD019, 0xD033, + 0xD035, 0xD04F, + 0xD051, 0xD06B, + 0xD06D, 0xD087, + 0xD089, 0xD0A3, + 0xD0A5, 0xD0BF, + 0xD0C1, 0xD0DB, + 0xD0DD, 0xD0F7, + 0xD0F9, 0xD113, + 0xD115, 0xD12F, + 0xD131, 0xD14B, + 0xD14D, 0xD167, + 0xD169, 0xD183, + 0xD185, 0xD19F, + 0xD1A1, 0xD1BB, + 0xD1BD, 0xD1D7, + 0xD1D9, 0xD1F3, + 0xD1F5, 0xD20F, + 0xD211, 0xD22B, + 0xD22D, 0xD247, + 0xD249, 0xD263, + 0xD265, 0xD27F, + 0xD281, 0xD29B, + 0xD29D, 0xD2B7, + 0xD2B9, 0xD2D3, + 0xD2D5, 0xD2EF, + 0xD2F1, 0xD30B, + 0xD30D, 0xD327, + 0xD329, 0xD343, + 0xD345, 0xD35F, + 0xD361, 0xD37B, + 0xD37D, 0xD397, + 0xD399, 0xD3B3, + 0xD3B5, 0xD3CF, + 0xD3D1, 0xD3EB, + 0xD3ED, 0xD407, + 0xD409, 0xD423, + 0xD425, 0xD43F, + 0xD441, 0xD45B, + 0xD45D, 0xD477, + 0xD479, 0xD493, + 0xD495, 0xD4AF, + 0xD4B1, 0xD4CB, + 0xD4CD, 0xD4E7, + 0xD4E9, 0xD503, + 0xD505, 0xD51F, + 0xD521, 0xD53B, + 0xD53D, 0xD557, + 0xD559, 0xD573, + 0xD575, 0xD58F, + 0xD591, 0xD5AB, + 0xD5AD, 0xD5C7, + 0xD5C9, 0xD5E3, + 0xD5E5, 0xD5FF, + 0xD601, 0xD61B, + 0xD61D, 0xD637, + 0xD639, 0xD653, + 0xD655, 0xD66F, + 0xD671, 0xD68B, + 0xD68D, 0xD6A7, + 0xD6A9, 0xD6C3, + 0xD6C5, 0xD6DF, + 0xD6E1, 0xD6FB, + 0xD6FD, 0xD717, + 0xD719, 0xD733, + 0xD735, 0xD74F, + 0xD751, 0xD76B, + 0xD76D, 0xD787, + 0xD789, 0xD7A3, +}; + +static const int32_t ucg_indic_conjunct_break_consonant_ranges[] = { + 0x0915, 0x0939, + 0x0958, 0x095F, + 0x0978, 0x097F, + 0x0995, 0x09A8, + 0x09AA, 0x09B0, + 0x09B2, 0x09B2, + 0x09B6, 0x09B9, + 0x09DC, 0x09DD, + 0x09DF, 0x09DF, + 0x09F0, 0x09F1, + 0x0A95, 0x0AA8, + 0x0AAA, 0x0AB0, + 0x0AB2, 0x0AB3, + 0x0AB5, 0x0AB9, + 0x0AF9, 0x0AF9, + 0x0B15, 0x0B28, + 0x0B2A, 0x0B30, + 0x0B32, 0x0B33, + 0x0B35, 0x0B39, + 0x0B5C, 0x0B5D, + 0x0B5F, 0x0B5F, + 0x0B71, 0x0B71, + 0x0C15, 0x0C28, + 0x0C2A, 0x0C39, + 0x0C58, 0x0C5A, + 0x0D15, 0x0D3A, +}; + +static const int32_t ucg_indic_conjunct_break_extend_ranges[] = { + 0x0300, 0x034E, + 0x0350, 0x036F, + 0x0483, 0x0487, + 0x0591, 0x05BD, + 0x05BF, 0x05BF, + 0x05C1, 0x05C2, + 0x05C4, 0x05C5, + 0x05C7, 0x05C7, + 0x0610, 0x061A, + 0x064B, 0x065F, + 0x0670, 0x0670, + 0x06D6, 0x06DC, + 0x06DF, 0x06E4, + 0x06E7, 0x06E8, + 0x06EA, 0x06ED, + 0x0711, 0x0711, + 0x0730, 0x074A, + 0x07EB, 0x07F3, + 0x07FD, 0x07FD, + 0x0816, 0x0819, + 0x081B, 0x0823, + 0x0825, 0x0827, + 0x0829, 0x082D, + 0x0859, 0x085B, + 0x0898, 0x089F, + 0x08CA, 0x08E1, + 0x08E3, 0x08FF, + 0x093C, 0x093C, + 0x0951, 0x0954, + 0x09BC, 0x09BC, + 0x09FE, 0x09FE, + 0x0A3C, 0x0A3C, + 0x0ABC, 0x0ABC, + 0x0B3C, 0x0B3C, + 0x0C3C, 0x0C3C, + 0x0C55, 0x0C56, + 0x0CBC, 0x0CBC, + 0x0D3B, 0x0D3C, + 0x0E38, 0x0E3A, + 0x0E48, 0x0E4B, + 0x0EB8, 0x0EBA, + 0x0EC8, 0x0ECB, + 0x0F18, 0x0F19, + 0x0F35, 0x0F35, + 0x0F37, 0x0F37, + 0x0F39, 0x0F39, + 0x0F71, 0x0F72, + 0x0F74, 0x0F74, + 0x0F7A, 0x0F7D, + 0x0F80, 0x0F80, + 0x0F82, 0x0F84, + 0x0F86, 0x0F87, + 0x0FC6, 0x0FC6, + 0x1037, 0x1037, + 0x1039, 0x103A, + 0x108D, 0x108D, + 0x135D, 0x135F, + 0x1714, 0x1714, + 0x17D2, 0x17D2, + 0x17DD, 0x17DD, + 0x18A9, 0x18A9, + 0x1939, 0x193B, + 0x1A17, 0x1A18, + 0x1A60, 0x1A60, + 0x1A75, 0x1A7C, + 0x1A7F, 0x1A7F, + 0x1AB0, 0x1ABD, + 0x1ABF, 0x1ACE, + 0x1B34, 0x1B34, + 0x1B6B, 0x1B73, + 0x1BAB, 0x1BAB, + 0x1BE6, 0x1BE6, + 0x1C37, 0x1C37, + 0x1CD0, 0x1CD2, + 0x1CD4, 0x1CE0, + 0x1CE2, 0x1CE8, + 0x1CED, 0x1CED, + 0x1CF4, 0x1CF4, + 0x1CF8, 0x1CF9, + 0x1DC0, 0x1DFF, + 0x200D, 0x200D, + 0x20D0, 0x20DC, + 0x20E1, 0x20E1, + 0x20E5, 0x20F0, + 0x2CEF, 0x2CF1, + 0x2D7F, 0x2D7F, + 0x2DE0, 0x2DFF, + 0x302A, 0x302D, + 0x302E, 0x302F, + 0x3099, 0x309A, + 0xA66F, 0xA66F, + 0xA674, 0xA67D, + 0xA69E, 0xA69F, + 0xA6F0, 0xA6F1, + 0xA82C, 0xA82C, + 0xA8E0, 0xA8F1, + 0xA92B, 0xA92D, + 0xA9B3, 0xA9B3, + 0xAAB0, 0xAAB0, + 0xAAB2, 0xAAB4, + 0xAAB7, 0xAAB8, + 0xAABE, 0xAABF, + 0xAAC1, 0xAAC1, + 0xAAF6, 0xAAF6, + 0xABED, 0xABED, + 0xFB1E, 0xFB1E, + 0xFE20, 0xFE2F, + 0x101FD, 0x101FD, + 0x102E0, 0x102E0, + 0x10376, 0x1037A, + 0x10A0D, 0x10A0D, + 0x10A0F, 0x10A0F, + 0x10A38, 0x10A3A, + 0x10A3F, 0x10A3F, + 0x10AE5, 0x10AE6, + 0x10D24, 0x10D27, + 0x10EAB, 0x10EAC, + 0x10EFD, 0x10EFF, + 0x10F46, 0x10F50, + 0x10F82, 0x10F85, + 0x11070, 0x11070, + 0x1107F, 0x1107F, + 0x110BA, 0x110BA, + 0x11100, 0x11102, + 0x11133, 0x11134, + 0x11173, 0x11173, + 0x111CA, 0x111CA, + 0x11236, 0x11236, + 0x112E9, 0x112EA, + 0x1133B, 0x1133C, + 0x11366, 0x1136C, + 0x11370, 0x11374, + 0x11446, 0x11446, + 0x1145E, 0x1145E, + 0x114C3, 0x114C3, + 0x115C0, 0x115C0, + 0x116B7, 0x116B7, + 0x1172B, 0x1172B, + 0x1183A, 0x1183A, + 0x1193E, 0x1193E, + 0x11943, 0x11943, + 0x11A34, 0x11A34, + 0x11A47, 0x11A47, + 0x11A99, 0x11A99, + 0x11D42, 0x11D42, + 0x11D44, 0x11D45, + 0x11D97, 0x11D97, + 0x11F42, 0x11F42, + 0x16AF0, 0x16AF4, + 0x16B30, 0x16B36, + 0x1BC9E, 0x1BC9E, + 0x1D165, 0x1D165, + 0x1D167, 0x1D169, + 0x1D16E, 0x1D172, + 0x1D17B, 0x1D182, + 0x1D185, 0x1D18B, + 0x1D1AA, 0x1D1AD, + 0x1D242, 0x1D244, + 0x1E000, 0x1E006, + 0x1E008, 0x1E018, + 0x1E01B, 0x1E021, + 0x1E023, 0x1E024, + 0x1E026, 0x1E02A, + 0x1E08F, 0x1E08F, + 0x1E130, 0x1E136, + 0x1E2AE, 0x1E2AE, + 0x1E2EC, 0x1E2EF, + 0x1E4EC, 0x1E4EF, + 0x1E8D0, 0x1E8D6, + 0x1E944, 0x1E94A, +}; + +// Fullwidth (F) and Wide (W) are counted as 2. +// Everything else is 1. +// +// Derived from: https://unicode.org/Public/15.1.0/ucd/EastAsianWidth.txt +static const int32_t ucg_normalized_east_asian_width_ranges[] = { + 0x0000, 0x10FF, 1, + 0x1100, 0x115F, 2, + 0x1160, 0x2319, 1, + 0x231A, 0x231B, 2, + 0x231C, 0x2328, 1, + 0x2329, 0x232A, 2, + 0x232B, 0x23E8, 1, + 0x23E9, 0x23EC, 2, + 0x23ED, 0x23EF, 1, + 0x23F0, 0x23F0, 2, + 0x23F1, 0x23F2, 1, + 0x23F3, 0x23F3, 2, + 0x23F4, 0x25FC, 1, + 0x25FD, 0x25FE, 2, + 0x25FF, 0x2613, 1, + 0x2614, 0x2615, 2, + 0x2616, 0x2647, 1, + 0x2648, 0x2653, 2, + 0x2654, 0x267E, 1, + 0x267F, 0x267F, 2, + 0x2680, 0x2692, 1, + 0x2693, 0x2693, 2, + 0x2694, 0x26A0, 1, + 0x26A1, 0x26A1, 2, + 0x26A2, 0x26A9, 1, + 0x26AA, 0x26AB, 2, + 0x26AC, 0x26BC, 1, + 0x26BD, 0x26BE, 2, + 0x26BF, 0x26C3, 1, + 0x26C4, 0x26C5, 2, + 0x26C6, 0x26CD, 1, + 0x26CE, 0x26CE, 2, + 0x26CF, 0x26D3, 1, + 0x26D4, 0x26D4, 2, + 0x26D5, 0x26E9, 1, + 0x26EA, 0x26EA, 2, + 0x26EB, 0x26F1, 1, + 0x26F2, 0x26F3, 2, + 0x26F4, 0x26F4, 1, + 0x26F5, 0x26F5, 2, + 0x26F6, 0x26F9, 1, + 0x26FA, 0x26FA, 2, + 0x26FB, 0x26FC, 1, + 0x26FD, 0x26FD, 2, + 0x26FE, 0x2704, 1, + 0x2705, 0x2705, 2, + 0x2706, 0x2709, 1, + 0x270A, 0x270B, 2, + 0x270C, 0x2727, 1, + 0x2728, 0x2728, 2, + 0x2729, 0x274B, 1, + 0x274C, 0x274C, 2, + 0x274D, 0x274D, 1, + 0x274E, 0x274E, 2, + 0x274F, 0x2752, 1, + 0x2753, 0x2755, 2, + 0x2756, 0x2756, 1, + 0x2757, 0x2757, 2, + 0x2758, 0x2794, 1, + 0x2795, 0x2797, 2, + 0x2798, 0x27AF, 1, + 0x27B0, 0x27B0, 2, + 0x27B1, 0x27BE, 1, + 0x27BF, 0x27BF, 2, + 0x27C0, 0x2B1A, 1, + 0x2B1B, 0x2B1C, 2, + 0x2B1D, 0x2B4F, 1, + 0x2B50, 0x2B50, 2, + 0x2B51, 0x2B54, 1, + 0x2B55, 0x2B55, 2, + 0x2B56, 0x2E5D, 1, + 0x2E80, 0x303E, 2, + 0x303F, 0x303F, 1, + 0x3041, 0x3247, 2, + 0x3248, 0x324F, 1, + 0x3250, 0x4DBF, 2, + 0x4DC0, 0x4DFF, 1, + 0x4E00, 0xA4C6, 2, + 0xA4D0, 0xA95F, 1, + 0xA960, 0xA97C, 2, + 0xA980, 0xABF9, 1, + 0xAC00, 0xD7A3, 2, + 0xD7B0, 0xF8FF, 1, + 0xF900, 0xFAFF, 2, + 0xFB00, 0xFE0F, 1, + 0xFE10, 0xFE19, 2, + 0xFE20, 0xFE2F, 1, + 0xFE30, 0xFE6B, 2, + 0xFE70, 0xFEFF, 1, + 0xFF01, 0xFF60, 2, + 0xFF61, 0xFFDC, 1, + 0xFFE0, 0xFFE6, 2, + 0xFFE8, 0x16F9F, 1, + 0x16FE0, 0x1B2FB, 2, + 0x1BC00, 0x1F003, 1, + 0x1F004, 0x1F004, 2, + 0x1F005, 0x1F0CE, 1, + 0x1F0CF, 0x1F0CF, 2, + 0x1F0D1, 0x1F18D, 1, + 0x1F18E, 0x1F18E, 2, + 0x1F18F, 0x1F190, 1, + 0x1F191, 0x1F19A, 2, + 0x1F19B, 0x1F1FF, 1, + 0x1F200, 0x1F320, 2, + 0x1F321, 0x1F32C, 1, + 0x1F32D, 0x1F335, 2, + 0x1F336, 0x1F336, 1, + 0x1F337, 0x1F37C, 2, + 0x1F37D, 0x1F37D, 1, + 0x1F37E, 0x1F393, 2, + 0x1F394, 0x1F39F, 1, + 0x1F3A0, 0x1F3CA, 2, + 0x1F3CB, 0x1F3CE, 1, + 0x1F3CF, 0x1F3D3, 2, + 0x1F3D4, 0x1F3DF, 1, + 0x1F3E0, 0x1F3F0, 2, + 0x1F3F1, 0x1F3F3, 1, + 0x1F3F4, 0x1F3F4, 2, + 0x1F3F5, 0x1F3F7, 1, + 0x1F3F8, 0x1F43E, 2, + 0x1F43F, 0x1F43F, 1, + 0x1F440, 0x1F440, 2, + 0x1F441, 0x1F441, 1, + 0x1F442, 0x1F4FC, 2, + 0x1F4FD, 0x1F4FE, 1, + 0x1F4FF, 0x1F53D, 2, + 0x1F53E, 0x1F54A, 1, + 0x1F54B, 0x1F54E, 2, + 0x1F54F, 0x1F54F, 1, + 0x1F550, 0x1F567, 2, + 0x1F568, 0x1F579, 1, + 0x1F57A, 0x1F57A, 2, + 0x1F57B, 0x1F594, 1, + 0x1F595, 0x1F596, 2, + 0x1F597, 0x1F5A3, 1, + 0x1F5A4, 0x1F5A4, 2, + 0x1F5A5, 0x1F5FA, 1, + 0x1F5FB, 0x1F64F, 2, + 0x1F650, 0x1F67F, 1, + 0x1F680, 0x1F6C5, 2, + 0x1F6C6, 0x1F6CB, 1, + 0x1F6CC, 0x1F6CC, 2, + 0x1F6CD, 0x1F6CF, 1, + 0x1F6D0, 0x1F6D2, 2, + 0x1F6D3, 0x1F6D4, 1, + 0x1F6D5, 0x1F6DF, 2, + 0x1F6E0, 0x1F6EA, 1, + 0x1F6EB, 0x1F6EC, 2, + 0x1F6F0, 0x1F6F3, 1, + 0x1F6F4, 0x1F6FC, 2, + 0x1F700, 0x1F7D9, 1, + 0x1F7E0, 0x1F7F0, 2, + 0x1F800, 0x1F90B, 1, + 0x1F90C, 0x1F93A, 2, + 0x1F93B, 0x1F93B, 1, + 0x1F93C, 0x1F945, 2, + 0x1F946, 0x1F946, 1, + 0x1F947, 0x1F9FF, 2, + 0x1FA00, 0x1FA6D, 1, + 0x1FA70, 0x1FAF8, 2, + 0x1FB00, 0x1FBF9, 1, + 0x20000, 0x3FFFD, 2, + 0xE0001, 0x10FFFD, 1, +}; + +// +// End of Unicode 15.1.0 block. +// + +#ifdef __cplusplus +} +#endif + +#endif /* _UCG_TABLES_INCLUDED */ diff --git a/src/unicode.cpp b/src/unicode.cpp index c244a323c..665d5b182 100644 --- a/src/unicode.cpp +++ b/src/unicode.cpp @@ -162,3 +162,8 @@ end: if (codepoint_out) *codepoint_out = codepoint; return width; } + +// NOTE(Feoramund): It's down here because I made UCG use the utf8_decode above to avoid duplicating code. +extern "C" { +#include "ucg/ucg.c" +}