mirror of
https://github.com/odin-lang/Odin.git
synced 2025-12-29 17:34:34 +00:00
196 lines
4.4 KiB
Odin
196 lines
4.4 KiB
Odin
package unicode
|
|
|
|
MAX_RUNE :: '\U00010fff'; // Maximum valid unicode code point
|
|
REPLACEMENT_CHAR :: '\ufffd'; // Represented an invalid code point
|
|
MAX_ASCII :: '\u007f'; // Maximum ASCII value
|
|
MAX_LATIN1 :: '\u00ff'; // Maximum Latin-1 value
|
|
|
|
binary_search :: proc(c: i32, table: []i32, length, stride: int) -> int {
|
|
n := length;
|
|
t := 0;
|
|
for n > 1 {
|
|
m := n / 2;
|
|
p := t + m*stride;
|
|
if c >= table[p] {
|
|
t = p;
|
|
n = n-m;
|
|
} else {
|
|
n = m;
|
|
}
|
|
}
|
|
if n != 0 && c >= table[t] {
|
|
return t;
|
|
}
|
|
return -1;
|
|
}
|
|
|
|
to_lower :: proc(r: rune) -> rune {
|
|
c := i32(r);
|
|
p := binary_search(c, to_lower_ranges[:], len(to_lower_ranges)/3, 3);
|
|
if p >= 0 && to_lower_ranges[p] <= c && c <= to_lower_ranges[p+1] {
|
|
return rune(c + to_lower_ranges[p+2] - 500);
|
|
}
|
|
p = binary_search(c, to_lower_singlets[:], len(to_lower_singlets)/2, 2);
|
|
if p >= 0 && c == to_lower_singlets[p] {
|
|
return rune(c + to_lower_singlets[p+1] - 500);
|
|
}
|
|
return rune(c);
|
|
}
|
|
to_upper :: proc(r: rune) -> rune {
|
|
c := i32(r);
|
|
p := binary_search(c, to_upper_ranges[:], len(to_upper_ranges)/3, 3);
|
|
if p >= 0 && to_upper_ranges[p] <= c && c <= to_upper_ranges[p+1] {
|
|
return rune(c + to_upper_ranges[p+2] - 500);
|
|
}
|
|
p = binary_search(c, to_upper_singlets[:], len(to_upper_singlets)/2, 2);
|
|
if p >= 0 && c == to_upper_singlets[p] {
|
|
return rune(c + to_upper_singlets[p+1] - 500);
|
|
}
|
|
return rune(c);
|
|
}
|
|
to_title :: proc(r: rune) -> rune {
|
|
c := i32(r);
|
|
p := binary_search(c, to_upper_singlets[:], len(to_title_singlets)/2, 2);
|
|
if p >= 0 && c == to_upper_singlets[p] {
|
|
return rune(c + to_title_singlets[p+1] - 500);
|
|
}
|
|
return rune(c);
|
|
}
|
|
|
|
|
|
is_lower :: proc(r: rune) -> bool {
|
|
if r <= MAX_ASCII {
|
|
return u32(r)-'a' < 26;
|
|
}
|
|
c := i32(r);
|
|
p := binary_search(c, to_upper_ranges[:], len(to_upper_ranges)/3, 3);
|
|
if p >= 0 && to_upper_ranges[p] <= c && c <= to_upper_ranges[p+1] {
|
|
return true;
|
|
}
|
|
p = binary_search(c, to_upper_singlets[:], len(to_upper_singlets)/2, 2);
|
|
if p >= 0 && c == to_upper_singlets[p] {
|
|
return true;
|
|
}
|
|
return false;
|
|
}
|
|
|
|
is_upper :: proc(r: rune) -> bool {
|
|
if r <= MAX_ASCII {
|
|
return u32(r)-'A' < 26;
|
|
}
|
|
c := i32(r);
|
|
p := binary_search(c, to_lower_ranges[:], len(to_lower_ranges)/3, 3);
|
|
if p >= 0 && to_lower_ranges[p] <= c && c <= to_lower_ranges[p+1] {
|
|
return true;
|
|
}
|
|
p = binary_search(c, to_lower_singlets[:], len(to_lower_singlets)/2, 2);
|
|
if p >= 0 && c == to_lower_singlets[p] {
|
|
return true;
|
|
}
|
|
return false;
|
|
}
|
|
|
|
is_alpha :: is_letter;
|
|
is_letter :: proc(r: rune) -> bool {
|
|
if u32(r) <= MAX_LATIN1 {
|
|
return char_properties[u8(r)]&pLmask != 0;
|
|
}
|
|
if is_upper(r) || is_lower(r) {
|
|
return true;
|
|
}
|
|
|
|
c := i32(r);
|
|
p := binary_search(c, alpha_ranges[:], len(alpha_ranges)/2, 2);
|
|
if p >= 0 && alpha_ranges[p] <= c && c <= alpha_ranges[p+1] {
|
|
return true;
|
|
}
|
|
p = binary_search(c, alpha_singlets[:], len(alpha_singlets), 1);
|
|
if p >= 0 && c == alpha_singlets[p] {
|
|
return true;
|
|
}
|
|
return false;
|
|
}
|
|
|
|
is_title :: proc(r: rune) -> bool {
|
|
return is_upper(r) && is_lower(r);
|
|
}
|
|
|
|
is_digit :: proc(r: rune) -> bool {
|
|
if r <= MAX_LATIN1 {
|
|
return '0' <= r && r <= '9';
|
|
}
|
|
return false;
|
|
}
|
|
|
|
|
|
is_white_space :: is_space;
|
|
is_space :: proc(r: rune) -> bool {
|
|
if u32(r) <= MAX_LATIN1 {
|
|
switch r {
|
|
case '\t', '\n', '\v', '\f', '\r', ' ', 0x85, 0xa0:
|
|
return true;
|
|
}
|
|
return false;
|
|
}
|
|
c := i32(r);
|
|
p := binary_search(c, space_ranges[:], len(space_ranges)/2, 2);
|
|
if p >= 0 && space_ranges[p] <= c && c <= space_ranges[p+1] {
|
|
return true;
|
|
}
|
|
return false;
|
|
}
|
|
|
|
is_combining :: proc(r: rune) -> bool {
|
|
c := i32(r);
|
|
|
|
return c >= 0x0300 && (c <= 0x036f ||
|
|
(c >= 0x1ab0 && c <= 0x1aff) ||
|
|
(c >= 0x1dc0 && c <= 0x1dff) ||
|
|
(c >= 0x20d0 && c <= 0x20ff) ||
|
|
(c >= 0xfe20 && c <= 0xfe2f));
|
|
}
|
|
|
|
|
|
|
|
is_graphic :: proc(r: rune) -> bool {
|
|
if u32(r) <= MAX_LATIN1 {
|
|
return char_properties[u8(r)]&pg != 0;
|
|
}
|
|
return false;
|
|
}
|
|
|
|
is_print :: proc(r: rune) -> bool {
|
|
if u32(r) <= MAX_LATIN1 {
|
|
return char_properties[u8(r)]&pp != 0;
|
|
}
|
|
return false;
|
|
}
|
|
|
|
is_control :: proc(r: rune) -> bool {
|
|
if u32(r) <= MAX_LATIN1 {
|
|
return char_properties[u8(r)]&pC != 0;
|
|
}
|
|
return false;
|
|
}
|
|
|
|
is_number :: proc(r: rune) -> bool {
|
|
if u32(r) <= MAX_LATIN1 {
|
|
return char_properties[u8(r)]&pN != 0;
|
|
}
|
|
return false;
|
|
}
|
|
|
|
is_punct :: proc(r: rune) -> bool {
|
|
if u32(r) <= MAX_LATIN1 {
|
|
return char_properties[u8(r)]&pP != 0;
|
|
}
|
|
return false;
|
|
}
|
|
|
|
is_symbol :: proc(r: rune) -> bool {
|
|
if u32(r) <= MAX_LATIN1 {
|
|
return char_properties[u8(r)]&pS != 0;
|
|
}
|
|
return false;
|
|
}
|