Add rune_is_letter_or_digit for tokenizer

2026-07-29 19:00:06 +00:00 · 2020-05-27 12:54:11 +01:00
parent 4e21a4d46a
commit 876820789e
2 changed files with 24 additions and 1 deletions
@@ -948,7 +948,7 @@ Token tokenizer_get_token(Tokenizer *t) {
 	Rune curr_rune = t->curr_rune;
 	if (rune_is_letter(curr_rune)) {
 		token.kind = Token_Ident;
-		while (rune_is_letter(t->curr_rune) || rune_is_digit(t->curr_rune)) {
+		while (rune_is_letter_or_digit(t->curr_rune)) {
 			advance_to_next_rune(t);
 		}

@@ -32,6 +32,29 @@ bool rune_is_digit(Rune r) {
 	return utf8proc_category(r) == UTF8PROC_CATEGORY_ND;
 }

+bool rune_is_letter_or_digit(Rune r) {
+	if (r < 0x80) {
+		if (r == '_') {
+			return true;
+		}
+		if (((cast(u32)r | 0x20) - 0x61) < 26) {
+			return true;
+		}
+		return (cast(u32)r - '0') < 10;
+	}
+	switch (utf8proc_category(r)) {
+	case UTF8PROC_CATEGORY_LU:
+	case UTF8PROC_CATEGORY_LL:
+	case UTF8PROC_CATEGORY_LT:
+	case UTF8PROC_CATEGORY_LM:
+	case UTF8PROC_CATEGORY_LO:
+		return true;
+	case UTF8PROC_CATEGORY_ND:
+		return true;
+	}
+	return false;
+}
+
 bool rune_is_whitespace(Rune r) {
 	switch (r) {
 	case ' ':