mirror of
https://github.com/Ed94/raddebugger.git
synced 2026-06-13 07:32:23 -07:00
deduplicate txti/text_cache language stuff; port over odin lexing
This commit is contained in:
+247
-10
@@ -26,9 +26,27 @@ txt_lang_kind_from_extension(String8 extension)
|
||||
{
|
||||
kind = TXT_LangKind_CPlusPlus;
|
||||
}
|
||||
else if(str8_match(extension, str8_lit("odin"), StringMatchFlag_CaseInsensitive))
|
||||
{
|
||||
kind = TXT_LangKind_Odin;
|
||||
}
|
||||
return kind;
|
||||
}
|
||||
|
||||
internal TXT_LangLexFunctionType *
|
||||
txt_lex_function_from_lang_kind(TXT_LangKind kind)
|
||||
{
|
||||
TXT_LangLexFunctionType *fn = 0;
|
||||
switch(kind)
|
||||
{
|
||||
default:{}break;
|
||||
case TXT_LangKind_C: {fn = txt_token_array_from_string__c_cpp;}break;
|
||||
case TXT_LangKind_CPlusPlus: {fn = txt_token_array_from_string__c_cpp;}break;
|
||||
case TXT_LangKind_Odin: {fn = txt_token_array_from_string__odin;}break;
|
||||
}
|
||||
return fn;
|
||||
}
|
||||
|
||||
////////////////////////////////
|
||||
//~ rjf: Token Type Functions
|
||||
|
||||
@@ -378,6 +396,234 @@ txt_token_array_from_string__c_cpp(Arena *arena, U64 *bytes_processed_counter, S
|
||||
return result;
|
||||
}
|
||||
|
||||
internal TXT_TokenArray
|
||||
txt_token_array_from_string__odin(Arena *arena, U64 *bytes_processed_counter, String8 string)
|
||||
{
|
||||
Temp scratch = scratch_begin(&arena, 1);
|
||||
|
||||
//- rjf: generate token list
|
||||
TXT_TokenChunkList tokens = {0};
|
||||
{
|
||||
B32 comment_is_single_line = 0;
|
||||
B32 string_is_char = 0;
|
||||
TXT_TokenKind active_token_kind = TXT_TokenKind_Null;
|
||||
U64 active_token_start_idx = 0;
|
||||
B32 escaped = 0;
|
||||
B32 next_escaped = 0;
|
||||
U64 byte_process_start_idx = 0;
|
||||
for(U64 idx = 0; idx <= string.size;)
|
||||
{
|
||||
U8 byte = (idx+0 < string.size) ? (string.str[idx+0]) : 0;
|
||||
U8 next_byte = (idx+1 < string.size) ? (string.str[idx+1]) : 0;
|
||||
|
||||
// rjf: update counter
|
||||
if(bytes_processed_counter != 0 && ((idx-byte_process_start_idx) >= 1000 || idx == string.size))
|
||||
{
|
||||
ins_atomic_u64_add_eval(bytes_processed_counter, (idx-byte_process_start_idx));
|
||||
byte_process_start_idx = idx;
|
||||
}
|
||||
|
||||
// rjf: escaping
|
||||
if(escaped && (byte != '\r' && byte != '\n'))
|
||||
{
|
||||
next_escaped = 0;
|
||||
}
|
||||
else if(!escaped && byte == '\\')
|
||||
{
|
||||
next_escaped = 1;
|
||||
}
|
||||
|
||||
// rjf: take starter, determine active token kind
|
||||
if(active_token_kind == TXT_TokenKind_Null)
|
||||
{
|
||||
// rjf: use next bytes to start a new token
|
||||
if(0){}
|
||||
else if(char_is_space(byte)) { active_token_kind = TXT_TokenKind_Whitespace; }
|
||||
else if(byte == '_' ||
|
||||
byte == '$' ||
|
||||
char_is_alpha(byte)) { active_token_kind = TXT_TokenKind_Identifier; }
|
||||
else if(char_is_digit(byte, 10) ||
|
||||
(byte == '.' &&
|
||||
char_is_digit(next_byte, 10))) { active_token_kind = TXT_TokenKind_Numeric; }
|
||||
else if(byte == '"') { active_token_kind = TXT_TokenKind_String; string_is_char = 0; }
|
||||
else if(byte == '\'') { active_token_kind = TXT_TokenKind_String; string_is_char = 1; }
|
||||
else if(byte == '/' && next_byte == '/') { active_token_kind = TXT_TokenKind_Comment; comment_is_single_line = 1; }
|
||||
else if(byte == '/' && next_byte == '*') { active_token_kind = TXT_TokenKind_Comment; comment_is_single_line = 0; }
|
||||
else if(byte == '~' || byte == '!' ||
|
||||
byte == '%' || byte == '^' ||
|
||||
byte == '&' || byte == '*' ||
|
||||
byte == '(' || byte == ')' ||
|
||||
byte == '-' || byte == '=' ||
|
||||
byte == '+' || byte == '[' ||
|
||||
byte == ']' || byte == '{' ||
|
||||
byte == '}' || byte == ':' ||
|
||||
byte == ';' || byte == ',' ||
|
||||
byte == '.' || byte == '<' ||
|
||||
byte == '>' || byte == '/' ||
|
||||
byte == '?' || byte == '|') { active_token_kind = TXT_TokenKind_Symbol; }
|
||||
else if(byte == '#') { active_token_kind = TXT_TokenKind_Meta; }
|
||||
|
||||
// rjf: start new token
|
||||
if(active_token_kind != TXT_TokenKind_Null)
|
||||
{
|
||||
active_token_start_idx = idx;
|
||||
}
|
||||
|
||||
// rjf: invalid token kind -> emit error
|
||||
else
|
||||
{
|
||||
TXT_Token token = {TXT_TokenKind_Error, r1u64(idx, idx+1)};
|
||||
txt_token_chunk_list_push(scratch.arena, &tokens, 4096, &token);
|
||||
}
|
||||
}
|
||||
|
||||
// rjf: look for ender
|
||||
U64 ender_pad = 0;
|
||||
B32 ender_found = 0;
|
||||
if(active_token_kind != TXT_TokenKind_Null && idx>active_token_start_idx)
|
||||
{
|
||||
if(idx == string.size)
|
||||
{
|
||||
ender_pad = 0;
|
||||
ender_found = 1;
|
||||
}
|
||||
else switch(active_token_kind)
|
||||
{
|
||||
default:break;
|
||||
case TXT_TokenKind_Whitespace:
|
||||
{
|
||||
ender_found = !char_is_space(byte);
|
||||
}break;
|
||||
case TXT_TokenKind_Identifier:
|
||||
{
|
||||
ender_found = (!char_is_alpha(byte) && !char_is_digit(byte, 10) && byte != '_' && byte != '$');
|
||||
}break;
|
||||
case TXT_TokenKind_Numeric:
|
||||
{
|
||||
ender_found = (!char_is_alpha(byte) && !char_is_digit(byte, 10) && byte != '_' && byte != '.' && byte != '\'');
|
||||
}break;
|
||||
case TXT_TokenKind_String:
|
||||
{
|
||||
ender_found = (!escaped && ((!string_is_char && byte == '"') || (string_is_char && byte == '\'')));
|
||||
ender_pad += 1;
|
||||
}break;
|
||||
case TXT_TokenKind_Symbol:
|
||||
{
|
||||
ender_found = (byte != '~' && byte != '!' &&
|
||||
byte != '%' && byte != '^' &&
|
||||
byte != '&' && byte != '*' &&
|
||||
byte != '(' && byte != ')' &&
|
||||
byte != '-' && byte != '=' &&
|
||||
byte != '+' && byte != '[' &&
|
||||
byte != ']' && byte != '{' &&
|
||||
byte != '}' && byte != ':' &&
|
||||
byte != ';' && byte != ',' &&
|
||||
byte != '.' && byte != '<' &&
|
||||
byte != '>' && byte != '/' &&
|
||||
byte != '?' && byte != '|');
|
||||
}break;
|
||||
case TXT_TokenKind_Comment:
|
||||
{
|
||||
if(comment_is_single_line)
|
||||
{
|
||||
ender_found = (!escaped && (byte == '\r' || byte == '\n'));
|
||||
}
|
||||
else
|
||||
{
|
||||
ender_found = (active_token_start_idx+1 < idx && byte == '*' && next_byte == '/');
|
||||
ender_pad += 2;
|
||||
}
|
||||
}break;
|
||||
case TXT_TokenKind_Meta:
|
||||
{
|
||||
ender_found = (!char_is_alpha(byte) && !char_is_digit(byte, 10) && byte != '_' && byte != '$');
|
||||
}break;
|
||||
}
|
||||
}
|
||||
|
||||
// rjf: next byte is ender => emit token
|
||||
if(ender_found)
|
||||
{
|
||||
TXT_Token token = {active_token_kind, r1u64(active_token_start_idx, idx+ender_pad)};
|
||||
active_token_kind = TXT_TokenKind_Null;
|
||||
|
||||
// rjf: identifier -> keyword in special cases
|
||||
if(token.kind == TXT_TokenKind_Identifier)
|
||||
{
|
||||
read_only local_persist String8 odin_keywords[] =
|
||||
{
|
||||
str8_lit_comp("asm"),
|
||||
str8_lit_comp("auto_cast"),
|
||||
str8_lit_comp("bit_set"),
|
||||
str8_lit_comp("break"),
|
||||
str8_lit_comp("case"),
|
||||
str8_lit_comp("cast"),
|
||||
str8_lit_comp("context"),
|
||||
str8_lit_comp("continue"),
|
||||
str8_lit_comp("defer"),
|
||||
str8_lit_comp("distinct"),
|
||||
str8_lit_comp("do"),
|
||||
str8_lit_comp("dynamic"),
|
||||
str8_lit_comp("else"),
|
||||
str8_lit_comp("enum"),
|
||||
str8_lit_comp("fallthrough"),
|
||||
str8_lit_comp("for"),
|
||||
str8_lit_comp("foreign"),
|
||||
str8_lit_comp("if"),
|
||||
str8_lit_comp("in"),
|
||||
str8_lit_comp("map"),
|
||||
str8_lit_comp("matrix"),
|
||||
str8_lit_comp("not_in"),
|
||||
str8_lit_comp("or_break"),
|
||||
str8_lit_comp("or_continue"),
|
||||
str8_lit_comp("or_else"),
|
||||
str8_lit_comp("or_return"),
|
||||
str8_lit_comp("package"),
|
||||
str8_lit_comp("proc"),
|
||||
str8_lit_comp("return"),
|
||||
str8_lit_comp("struct"),
|
||||
str8_lit_comp("switch"),
|
||||
str8_lit_comp("transmute"),
|
||||
str8_lit_comp("typeid"),
|
||||
str8_lit_comp("union"),
|
||||
str8_lit_comp("using"),
|
||||
str8_lit_comp("when"),
|
||||
str8_lit_comp("where"),
|
||||
str8_lit_comp("import"),
|
||||
};
|
||||
String8 token_string = str8_substr(string, r1u64(active_token_start_idx, idx+ender_pad));
|
||||
for(U64 keyword_idx = 0; keyword_idx < ArrayCount(odin_keywords); keyword_idx += 1)
|
||||
{
|
||||
if(str8_match(odin_keywords[keyword_idx], token_string, 0))
|
||||
{
|
||||
token.kind = TXT_TokenKind_Keyword;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// rjf: push
|
||||
txt_token_chunk_list_push(scratch.arena, &tokens, 4096, &token);
|
||||
|
||||
// rjf: increment by ender padding
|
||||
idx += ender_pad;
|
||||
}
|
||||
|
||||
// rjf: advance by 1 byte if we haven't found an ender
|
||||
if(!ender_found)
|
||||
{
|
||||
idx += 1;
|
||||
}
|
||||
escaped = next_escaped;
|
||||
}
|
||||
}
|
||||
|
||||
//- rjf: token list -> token array
|
||||
TXT_TokenArray result = txt_token_array_from_chunk_list(arena, &tokens);
|
||||
scratch_end(scratch);
|
||||
return result;
|
||||
}
|
||||
|
||||
////////////////////////////////
|
||||
//~ rjf: Main Layer Initialization
|
||||
|
||||
@@ -784,16 +1030,7 @@ txt_parse_thread__entry_point(void *p)
|
||||
}
|
||||
|
||||
//- rjf: lang -> lex function
|
||||
TXT_LangLexFunctionType *lex_function = 0;
|
||||
switch(lang)
|
||||
{
|
||||
default:{}break;
|
||||
case TXT_LangKind_C:
|
||||
case TXT_LangKind_CPlusPlus:
|
||||
{
|
||||
lex_function = txt_token_array_from_string__c_cpp;
|
||||
}break;
|
||||
}
|
||||
TXT_LangLexFunctionType *lex_function = txt_lex_function_from_lang_kind(lang);
|
||||
|
||||
//- rjf: lex function * data -> tokens
|
||||
TXT_TokenArray tokens = {0};
|
||||
|
||||
@@ -32,15 +32,6 @@ typedef enum TXT_TokenKind
|
||||
}
|
||||
TXT_TokenKind;
|
||||
|
||||
typedef enum TXT_LangKind
|
||||
{
|
||||
TXT_LangKind_Null,
|
||||
TXT_LangKind_C,
|
||||
TXT_LangKind_CPlusPlus,
|
||||
TXT_LangKind_COUNT
|
||||
}
|
||||
TXT_LangKind;
|
||||
|
||||
typedef struct TXT_Token TXT_Token;
|
||||
struct TXT_Token
|
||||
{
|
||||
@@ -105,6 +96,19 @@ struct TXT_TextInfo
|
||||
TXT_TokenArray tokens;
|
||||
};
|
||||
|
||||
////////////////////////////////
|
||||
//~ rjf: Language Kind Types
|
||||
|
||||
typedef enum TXT_LangKind
|
||||
{
|
||||
TXT_LangKind_Null,
|
||||
TXT_LangKind_C,
|
||||
TXT_LangKind_CPlusPlus,
|
||||
TXT_LangKind_Odin,
|
||||
TXT_LangKind_COUNT
|
||||
}
|
||||
TXT_LangKind;
|
||||
|
||||
typedef TXT_TokenArray TXT_LangLexFunctionType(Arena *arena, U64 *bytes_processed_counter, String8 string);
|
||||
|
||||
////////////////////////////////
|
||||
@@ -234,6 +238,7 @@ global TXT_Shared *txt_shared = 0;
|
||||
//~ rjf: Basic Helpers
|
||||
|
||||
internal TXT_LangKind txt_lang_kind_from_extension(String8 extension);
|
||||
internal TXT_LangLexFunctionType *txt_lex_function_from_lang_kind(TXT_LangKind kind);
|
||||
|
||||
////////////////////////////////
|
||||
//~ rjf: Token Type Functions
|
||||
@@ -247,6 +252,7 @@ internal TXT_TokenArray txt_token_array_from_list(Arena *arena, TXT_TokenList *l
|
||||
//~ rjf: Lexing Functions
|
||||
|
||||
internal TXT_TokenArray txt_token_array_from_string__c_cpp(Arena *arena, U64 *bytes_processed_counter, String8 string);
|
||||
internal TXT_TokenArray txt_token_array_from_string__odin(Arena *arena, U64 *bytes_processed_counter, String8 string);
|
||||
|
||||
////////////////////////////////
|
||||
//~ rjf: Main Layer Initialization
|
||||
|
||||
+4
-38
@@ -47,31 +47,6 @@ txti_hash_from_string(String8 string)
|
||||
return result;
|
||||
}
|
||||
|
||||
internal TXTI_LangKind
|
||||
txti_lang_kind_from_extension(String8 extension)
|
||||
{
|
||||
TXTI_LangKind kind = TXTI_LangKind_Null;
|
||||
if(str8_match(extension, str8_lit("c"), 0) ||
|
||||
str8_match(extension, str8_lit("h"), 0))
|
||||
{
|
||||
kind = TXTI_LangKind_C;
|
||||
}
|
||||
else if(str8_match(extension, str8_lit("cpp"), StringMatchFlag_CaseInsensitive) ||
|
||||
str8_match(extension, str8_lit("cxx"), StringMatchFlag_CaseInsensitive) ||
|
||||
str8_match(extension, str8_lit("cc"), StringMatchFlag_CaseInsensitive) ||
|
||||
str8_match(extension, str8_lit("c++"), StringMatchFlag_CaseInsensitive) ||
|
||||
str8_match(extension, str8_lit("C"), 0) ||
|
||||
str8_match(extension, str8_lit("hpp"), StringMatchFlag_CaseInsensitive) ||
|
||||
str8_match(extension, str8_lit("hxx"), StringMatchFlag_CaseInsensitive) ||
|
||||
str8_match(extension, str8_lit("hh"), StringMatchFlag_CaseInsensitive) ||
|
||||
str8_match(extension, str8_lit("h++"), StringMatchFlag_CaseInsensitive) ||
|
||||
str8_match(extension, str8_lit("H"), 0))
|
||||
{
|
||||
kind = TXTI_LangKind_CPlusPlus;
|
||||
}
|
||||
return kind;
|
||||
}
|
||||
|
||||
////////////////////////////////
|
||||
//~ rjf: Message Type Functions
|
||||
|
||||
@@ -628,14 +603,14 @@ txti_mut_thread_entry_point(void *p)
|
||||
//- rjf: load file if we need it
|
||||
B32 load_valid = 0;
|
||||
String8 file_contents = {0};
|
||||
TXTI_LangKind lang_kind = TXTI_LangKind_Null;
|
||||
TXT_LangKind lang_kind = TXT_LangKind_Null;
|
||||
U64 timestamp = 0;
|
||||
if(msg->kind == TXTI_MsgKind_Reload) ProfScope("reload file")
|
||||
{
|
||||
FileProperties pre_load_props = os_properties_from_file_path(msg->string);
|
||||
OS_Handle file = os_file_open(OS_AccessFlag_Read|OS_AccessFlag_ShareRead|OS_AccessFlag_ShareWrite, msg->string);
|
||||
file_contents = os_string_from_file_range(scratch.arena, file, r1u64(0, pre_load_props.size));
|
||||
lang_kind = txti_lang_kind_from_extension(str8_skip_last_dot(msg->string));
|
||||
lang_kind = txt_lang_kind_from_extension(str8_skip_last_dot(msg->string));
|
||||
os_file_close(file);
|
||||
FileProperties post_load_props = os_properties_from_file_path(msg->string);
|
||||
load_valid = (post_load_props.modified == pre_load_props.modified);
|
||||
@@ -646,16 +621,7 @@ txti_mut_thread_entry_point(void *p)
|
||||
}
|
||||
|
||||
//- rjf: nonzero lang kind -> unpack lang info
|
||||
TXTI_LangLexFunctionType *lex_function = 0;
|
||||
switch(lang_kind)
|
||||
{
|
||||
default:{}break;
|
||||
case TXTI_LangKind_C:
|
||||
case TXTI_LangKind_CPlusPlus:
|
||||
{
|
||||
lex_function = txt_token_array_from_string__c_cpp;
|
||||
}break;
|
||||
}
|
||||
TXT_LangLexFunctionType *lex_function = txt_lex_function_from_lang_kind(lang_kind);
|
||||
|
||||
//- rjf: detect line end kind
|
||||
TXT_LineEndKind line_end_kind = TXT_LineEndKind_Null;
|
||||
@@ -739,7 +705,7 @@ txti_mut_thread_entry_point(void *p)
|
||||
{
|
||||
entity->line_end_kind = line_end_kind;
|
||||
}
|
||||
if(lang_kind != TXTI_LangKind_Null)
|
||||
if(lang_kind != TXT_LangKind_Null)
|
||||
{
|
||||
entity->lang_kind = lang_kind;
|
||||
}
|
||||
|
||||
+2
-17
@@ -56,20 +56,6 @@ struct TXTI_Handle
|
||||
U64 u64[2];
|
||||
};
|
||||
|
||||
////////////////////////////////
|
||||
//~ rjf: Language Kinds
|
||||
|
||||
typedef enum TXTI_LangKind
|
||||
{
|
||||
TXTI_LangKind_Null,
|
||||
TXTI_LangKind_C,
|
||||
TXTI_LangKind_CPlusPlus,
|
||||
TXTI_LangKind_COUNT
|
||||
}
|
||||
TXTI_LangKind;
|
||||
|
||||
typedef TXT_TokenArray TXTI_LangLexFunctionType(Arena *arena, U64 *bytes_processed_counter, String8 string);
|
||||
|
||||
////////////////////////////////
|
||||
//~ rjf: Buffer Entity Types
|
||||
|
||||
@@ -106,7 +92,7 @@ struct TXTI_Entity
|
||||
|
||||
// rjf: metadata
|
||||
TXT_LineEndKind line_end_kind;
|
||||
TXTI_LangKind lang_kind;
|
||||
TXT_LangKind lang_kind;
|
||||
U64 bytes_processed;
|
||||
U64 bytes_to_process;
|
||||
U64 working_count;
|
||||
@@ -157,7 +143,7 @@ struct TXTI_BufferInfo
|
||||
String8 path;
|
||||
U64 timestamp;
|
||||
TXT_LineEndKind line_end_kind;
|
||||
TXTI_LangKind lang_kind;
|
||||
TXT_LangKind lang_kind;
|
||||
U64 total_line_count;
|
||||
U64 last_line_size;
|
||||
U64 max_line_size;
|
||||
@@ -258,7 +244,6 @@ internal void txti_init(void);
|
||||
//~ rjf: Basic Helpers
|
||||
|
||||
internal U64 txti_hash_from_string(String8 string);
|
||||
internal TXTI_LangKind txti_lang_kind_from_extension(String8 extension);
|
||||
|
||||
////////////////////////////////
|
||||
//~ rjf: Message Type Functions
|
||||
|
||||
Reference in New Issue
Block a user