Add rune_is_letter_or_digit for tokenizer

This commit is contained in:
gingerBill
2020-05-27 12:54:11 +01:00
parent 4e21a4d46a
commit 876820789e
2 changed files with 24 additions and 1 deletions

View File

@@ -32,6 +32,29 @@ bool rune_is_digit(Rune r) {
return utf8proc_category(r) == UTF8PROC_CATEGORY_ND;
}
bool rune_is_letter_or_digit(Rune r) {
if (r < 0x80) {
if (r == '_') {
return true;
}
if (((cast(u32)r | 0x20) - 0x61) < 26) {
return true;
}
return (cast(u32)r - '0') < 10;
}
switch (utf8proc_category(r)) {
case UTF8PROC_CATEGORY_LU:
case UTF8PROC_CATEGORY_LL:
case UTF8PROC_CATEGORY_LT:
case UTF8PROC_CATEGORY_LM:
case UTF8PROC_CATEGORY_LO:
return true;
case UTF8PROC_CATEGORY_ND:
return true;
}
return false;
}
bool rune_is_whitespace(Rune r) {
switch (r) {
case ' ':