fix 2-byte utf decoding issues, fix utf8 lexing issues

This commit is contained in:
Ryan Fleury
2025-05-20 14:46:46 -07:00
parent 2f5082fb61
commit 206ec346cf
2 changed files with 5 additions and 5 deletions
+1 -1
View File
@@ -1517,7 +1517,7 @@ utf8_decode(U8 *str, U64 max){
}break;
case 2:
{
if (2 < max)
if (1 < max)
{
U8 cont_byte = str[1];
if (utf8_class[cont_byte >> 3] == 0)
+4 -4
View File
@@ -262,7 +262,7 @@ txt_token_array_from_string__c_cpp(Arena *arena, U64 *bytes_processed_counter, S
}break;
case TXT_TokenKind_Identifier:
{
ender_found = (!char_is_alpha(byte) && !char_is_digit(byte, 10) && byte != '_' && byte != '$');
ender_found = (!char_is_alpha(byte) && !char_is_digit(byte, 10) && byte != '_' && byte != '$' && byte < 128);
}break;
case TXT_TokenKind_Numeric:
{
@@ -606,7 +606,7 @@ txt_token_array_from_string__odin(Arena *arena, U64 *bytes_processed_counter, St
}break;
case TXT_TokenKind_Identifier:
{
ender_found = (!char_is_alpha(byte) && !char_is_digit(byte, 10) && byte != '_' && byte != '$');
ender_found = (!char_is_alpha(byte) && !char_is_digit(byte, 10) && byte != '_' && byte != '$' && byte < 128);
}break;
case TXT_TokenKind_Numeric:
{
@@ -892,7 +892,7 @@ txt_token_array_from_string__jai(Arena *arena, U64 *bytes_processed_counter, Str
}break;
case TXT_TokenKind_Identifier:
{
ender_found = (!char_is_alpha(byte) && !char_is_digit(byte, 10) && byte != '_' && byte != '$');
ender_found = (!char_is_alpha(byte) && !char_is_digit(byte, 10) && byte != '_' && byte != '$' && byte < 128);
}break;
case TXT_TokenKind_Numeric:
{
@@ -1177,7 +1177,7 @@ txt_token_array_from_string__zig(Arena *arena, U64 *bytes_processed_counter, Str
}break;
case TXT_TokenKind_Identifier:
{
ender_found = (!char_is_alpha(byte) && !char_is_digit(byte, 10) && byte != '_' && byte != '$');
ender_found = (!char_is_alpha(byte) && !char_is_digit(byte, 10) && byte != '_' && byte != '$' && byte < 128);
}break;
case TXT_TokenKind_Numeric:
{