Add rune_is_letter_or_digit for tokenizer

This commit is contained in:
gingerBill
2020-05-27 12:54:11 +01:00
parent 4e21a4d46a
commit 876820789e
2 changed files with 24 additions and 1 deletions
+1 -1
View File
@@ -948,7 +948,7 @@ Token tokenizer_get_token(Tokenizer *t) {
Rune curr_rune = t->curr_rune;
if (rune_is_letter(curr_rune)) {
token.kind = Token_Ident;
while (rune_is_letter(t->curr_rune) || rune_is_digit(t->curr_rune)) {
while (rune_is_letter_or_digit(t->curr_rune)) {
advance_to_next_rune(t);
}
+23
View File
@@ -32,6 +32,29 @@ bool rune_is_digit(Rune r) {
return utf8proc_category(r) == UTF8PROC_CATEGORY_ND;
}
bool rune_is_letter_or_digit(Rune r) {
if (r < 0x80) {
if (r == '_') {
return true;
}
if (((cast(u32)r | 0x20) - 0x61) < 26) {
return true;
}
return (cast(u32)r - '0') < 10;
}
switch (utf8proc_category(r)) {
case UTF8PROC_CATEGORY_LU:
case UTF8PROC_CATEGORY_LL:
case UTF8PROC_CATEGORY_LT:
case UTF8PROC_CATEGORY_LM:
case UTF8PROC_CATEGORY_LO:
return true;
case UTF8PROC_CATEGORY_ND:
return true;
}
return false;
}
bool rune_is_whitespace(Rune r) {
switch (r) {
case ' ':