diff --git a/src/df/gfx/df_gfx.c b/src/df/gfx/df_gfx.c index 9d98f8f6..bb2d11d1 100644 --- a/src/df/gfx/df_gfx.c +++ b/src/df/gfx/df_gfx.c @@ -8056,18 +8056,18 @@ df_rgba_from_theme_color(DF_ThemeColor color) } internal DF_ThemeColor -df_theme_color_from_txti_token_kind(TXTI_TokenKind kind) +df_theme_color_from_txt_token_kind(TXT_TokenKind kind) { DF_ThemeColor color = DF_ThemeColor_CodeDefault; switch(kind) { default:break; - case TXTI_TokenKind_Keyword:{color = DF_ThemeColor_CodeKeyword;}break; - case TXTI_TokenKind_Numeric:{color = DF_ThemeColor_CodeNumeric;}break; - case TXTI_TokenKind_String: {color = DF_ThemeColor_CodeString;}break; - case TXTI_TokenKind_Meta: {color = DF_ThemeColor_CodeMeta;}break; - case TXTI_TokenKind_Comment:{color = DF_ThemeColor_CodeComment;}break; - case TXTI_TokenKind_Symbol: {color = DF_ThemeColor_CodeSymbol;}break; + case TXT_TokenKind_Keyword:{color = DF_ThemeColor_CodeKeyword;}break; + case TXT_TokenKind_Numeric:{color = DF_ThemeColor_CodeNumeric;}break; + case TXT_TokenKind_String: {color = DF_ThemeColor_CodeString;}break; + case TXT_TokenKind_Meta: {color = DF_ThemeColor_CodeMeta;}break; + case TXT_TokenKind_Comment:{color = DF_ThemeColor_CodeComment;}break; + case TXT_TokenKind_Symbol: {color = DF_ThemeColor_CodeSymbol;}break; } return color; } @@ -10063,7 +10063,7 @@ df_code_slice(DF_Window *ws, DF_CtrlCtx *ctrl_ctx, EVAL_ParseCtx *parse_ctx, DF_ { U64 line_slice_idx = mouse_pt.line-params->line_num_range.min; String8 line_text = params->line_text[line_slice_idx]; - TXTI_TokenArray line_tokens = params->line_tokens[line_slice_idx]; + TXT_TokenArray line_tokens = params->line_tokens[line_slice_idx]; Rng1U64 line_range = params->line_ranges[line_slice_idx]; U64 mouse_pt_off = line_range.min + (mouse_pt.column-1); Rng1U64 expr_off_rng = txti_expr_range_from_line_off_range_string_tokens(mouse_pt_off, line_range, line_text, &line_tokens); @@ -10261,7 +10261,7 @@ df_code_slice(DF_Window *ws, DF_CtrlCtx *ctrl_ctx, EVAL_ParseCtx *parse_ctx, DF_ { String8 line_string = params->line_text[line_idx]; Rng1U64 line_range = params->line_ranges[line_idx]; - TXTI_TokenArray *line_tokens = ¶ms->line_tokens[line_idx]; + TXT_TokenArray *line_tokens = ¶ms->line_tokens[line_idx]; ui_set_next_text_padding(-2); UI_Key line_key = ui_key_from_stringf(top_container_box->key, "ln_%I64x", line_num); Vec4F32 line_bg_color = line_bg_colors[line_idx]; @@ -10292,9 +10292,9 @@ df_code_slice(DF_Window *ws, DF_CtrlCtx *ctrl_ctx, EVAL_ParseCtx *parse_ctx, DF_ } else { - TXTI_Token *line_tokens_first = line_tokens->v; - TXTI_Token *line_tokens_opl = line_tokens->v + line_tokens->count; - for(TXTI_Token *token = line_tokens_first; token < line_tokens_opl; token += 1) + TXT_Token *line_tokens_first = line_tokens->v; + TXT_Token *line_tokens_opl = line_tokens->v + line_tokens->count; + for(TXT_Token *token = line_tokens_first; token < line_tokens_opl; token += 1) { // rjf: token -> token string String8 token_string = {0}; @@ -10314,9 +10314,9 @@ df_code_slice(DF_Window *ws, DF_CtrlCtx *ctrl_ctx, EVAL_ParseCtx *parse_ctx, DF_ // rjf: token -> token color Vec4F32 token_color = df_rgba_from_theme_color(DF_ThemeColor_CodeDefault); { - DF_ThemeColor new_color_kind = df_theme_color_from_txti_token_kind(token->kind); + DF_ThemeColor new_color_kind = df_theme_color_from_txt_token_kind(token->kind); F32 mix_t = 1.f; - if(token->kind == TXTI_TokenKind_Identifier) + if(token->kind == TXT_TokenKind_Identifier) { B32 mapped_special = 0; for(DF_EntityNode *n = params->relevant_binaries.first; n != 0; n = n->next) @@ -10808,12 +10808,12 @@ df_fancy_string_list_from_code_string(Arena *arena, F32 alpha, B32 indirection_s { Temp scratch = scratch_begin(&arena, 1); D_FancyStringList fancy_strings = {0}; - TXTI_TokenArray tokens = txti_token_array_from_string__cpp(scratch.arena, 0, string); - TXTI_Token *tokens_opl = tokens.v+tokens.count; + TXT_TokenArray tokens = txt_token_array_from_string__c_cpp(scratch.arena, 0, string); + TXT_Token *tokens_opl = tokens.v+tokens.count; S32 indirection_counter = 0; - for(TXTI_Token *token = tokens.v; token < tokens_opl; token += 1) + for(TXT_Token *token = tokens.v; token < tokens_opl; token += 1) { - DF_ThemeColor token_color = df_theme_color_from_txti_token_kind(token->kind); + DF_ThemeColor token_color = df_theme_color_from_txt_token_kind(token->kind); Vec4F32 token_color_rgba = df_rgba_from_theme_color(token_color); token_color_rgba.w *= alpha; String8 token_string = str8_substr(string, token->range); @@ -10833,7 +10833,7 @@ df_fancy_string_list_from_code_string(Arena *arena, F32 alpha, B32 indirection_s }; d_fancy_string_list_push(arena, &fancy_strings, &fancy_string); }break; - case TXTI_TokenKind_Identifier: + case TXT_TokenKind_Identifier: { D_FancyString fancy_string = { @@ -10844,7 +10844,7 @@ df_fancy_string_list_from_code_string(Arena *arena, F32 alpha, B32 indirection_s }; d_fancy_string_list_push(arena, &fancy_strings, &fancy_string); }break; - case TXTI_TokenKind_Numeric: + case TXT_TokenKind_Numeric: { Vec4F32 token_color_rgba_alt = token_color_rgba; token_color_rgba_alt.x *= 0.7f; diff --git a/src/df/gfx/df_gfx.h b/src/df/gfx/df_gfx.h index 48dc090b..421084b5 100644 --- a/src/df/gfx/df_gfx.h +++ b/src/df/gfx/df_gfx.h @@ -453,7 +453,7 @@ struct DF_CodeSliceParams Rng1S64 line_num_range; String8 *line_text; Rng1U64 *line_ranges; - TXTI_TokenArray *line_tokens; + TXT_TokenArray *line_tokens; DF_EntityList *line_bps; DF_EntityList *line_ips; DF_EntityList *line_pins; @@ -1010,7 +1010,7 @@ internal DF_CmdSpecList df_cmd_spec_list_from_event_flags(Arena *arena, OS_Event //- rjf: colors internal Vec4F32 df_rgba_from_theme_color(DF_ThemeColor color); -internal DF_ThemeColor df_theme_color_from_txti_token_kind(TXTI_TokenKind kind); +internal DF_ThemeColor df_theme_color_from_txt_token_kind(TXT_TokenKind kind); //- rjf: fonts/sizes internal F_Tag df_font_from_slot(DF_FontSlot slot); diff --git a/src/df/gfx/df_view_rule_hooks.c b/src/df/gfx/df_view_rule_hooks.c index da3d8842..2c63c1d4 100644 --- a/src/df/gfx/df_view_rule_hooks.c +++ b/src/df/gfx/df_view_rule_hooks.c @@ -715,7 +715,7 @@ DF_GFX_VIEW_RULE_BLOCK_UI_FUNCTION_DEF(text) code_slice_params.line_num_range = r1s64(1, info.lines_count); code_slice_params.line_text = push_array(scratch.arena, String8, info.lines_count); code_slice_params.line_ranges = push_array(scratch.arena, Rng1U64, info.lines_count); - code_slice_params.line_tokens = push_array(scratch.arena, TXTI_TokenArray, info.lines_count); + code_slice_params.line_tokens = push_array(scratch.arena, TXT_TokenArray, info.lines_count); code_slice_params.line_bps = push_array(scratch.arena, DF_EntityList, info.lines_count); code_slice_params.line_ips = push_array(scratch.arena, DF_EntityList, info.lines_count); code_slice_params.line_pins = push_array(scratch.arena, DF_EntityList, info.lines_count); diff --git a/src/df/gfx/df_views.c b/src/df/gfx/df_views.c index a6f1eacb..8084be2c 100644 --- a/src/df/gfx/df_views.c +++ b/src/df/gfx/df_views.c @@ -372,13 +372,13 @@ df_entity_lister_item_array_sort_by_strength__in_place(DF_EntityListerItemArray //////////////////////////////// //~ rjf: Disassembly View -internal TXTI_TokenArray -df_txti_token_array_from_dasm_arch_string(Arena *arena, Architecture arch, String8 string) +internal TXT_TokenArray +df_txt_token_array_from_dasm_arch_string(Arena *arena, Architecture arch, String8 string) { Temp scratch = scratch_begin(&arena, 1); - TXTI_TokenChunkList tokens = {0}; + TXT_TokenChunkList tokens = {0}; { - TXTI_TokenKind active_token_kind = TXTI_TokenKind_Null; + TXT_TokenKind active_token_kind = TXT_TokenKind_Null; U64 active_token_start_off = 0; U64 off = 0; B32 escaped = 0; @@ -388,8 +388,8 @@ df_txti_token_array_from_dasm_arch_string(Arena *arena, Architecture arch, Strin U8 byte = (off+0 < string.size) ? string.str[off+0] : 0; U8 next_byte = (off+1 < string.size) ? string.str[off+1] : 0; B32 ender_found = 0; - advance = (active_token_kind != TXTI_TokenKind_Null ? 1 : 0); - if(off == string.size && active_token_kind != TXTI_TokenKind_Null) + advance = (active_token_kind != TXT_TokenKind_Null ? 1 : 0); + if(off == string.size && active_token_kind != TXT_TokenKind_Null) { ender_found = 1; advance = 1; @@ -397,38 +397,38 @@ df_txti_token_array_from_dasm_arch_string(Arena *arena, Architecture arch, Strin switch(active_token_kind) { default: - case TXTI_TokenKind_Null: + case TXT_TokenKind_Null: { if(byte == ' ' || byte == '\t' || byte == '\v' || byte == '\f' || byte == '\r' || byte == '\n') { active_token_start_off = off; - active_token_kind = TXTI_TokenKind_Whitespace; + active_token_kind = TXT_TokenKind_Whitespace; advance = 1; } else if(('a' <= byte && byte <= 'z') || ('A' <= byte && byte <= 'Z') || byte == '_') { active_token_start_off = off; - active_token_kind = TXTI_TokenKind_Identifier; + active_token_kind = TXT_TokenKind_Identifier; advance = 1; } else if(byte == '\'') { active_token_start_off = off; - active_token_kind = TXTI_TokenKind_String; + active_token_kind = TXT_TokenKind_String; advance = 1; string_is_char = 1; } else if(byte == '"') { active_token_start_off = off; - active_token_kind = TXTI_TokenKind_String; + active_token_kind = TXT_TokenKind_String; advance = 1; string_is_char = 0; } else if(('0' <= byte && byte <= '9') || (byte == '.' && '0' <= next_byte && next_byte <= '9')) { active_token_start_off = off; - active_token_kind = TXTI_TokenKind_Numeric; + active_token_kind = TXT_TokenKind_Numeric; advance = 1; } else if(byte == '~' || byte == '!' || byte == '%' || byte == '^' || @@ -439,29 +439,29 @@ df_txti_token_array_from_dasm_arch_string(Arena *arena, Architecture arch, Strin byte == '>' || byte == ',' || byte == '.') { active_token_start_off = off; - active_token_kind = TXTI_TokenKind_Symbol; + active_token_kind = TXT_TokenKind_Symbol; advance = 1; } else { active_token_start_off = off; - active_token_kind = TXTI_TokenKind_Error; + active_token_kind = TXT_TokenKind_Error; advance = 1; } }break; - case TXTI_TokenKind_Whitespace: + case TXT_TokenKind_Whitespace: if(byte != ' ' && byte != '\t' && byte != '\v' && byte != '\f') { ender_found = 1; advance = 0; }break; - case TXTI_TokenKind_Identifier: + case TXT_TokenKind_Identifier: if((byte < 'a' || 'z' < byte) && (byte < 'A' || 'Z' < byte) && (byte < '0' || '9' < byte) && byte != '_') { ender_found = 1; advance = 0; }break; - case TXTI_TokenKind_String: + case TXT_TokenKind_String: { U8 ender_byte = string_is_char ? '\'' : '"'; if(!escaped && byte == ender_byte) @@ -488,13 +488,13 @@ df_txti_token_array_from_dasm_arch_string(Arena *arena, Architecture arch, Strin } } }break; - case TXTI_TokenKind_Numeric: + case TXT_TokenKind_Numeric: if((byte < 'a' || 'z' < byte) && (byte < 'A' || 'Z' < byte) && (byte < '0' || '9' < byte) && byte != '.') { ender_found = 1; advance = 0; }break; - case TXTI_TokenKind_Symbol: + case TXT_TokenKind_Symbol: if(1) { // NOTE(rjf): avoiding maximum munch rule for now @@ -511,7 +511,7 @@ df_txti_token_array_from_dasm_arch_string(Arena *arena, Architecture arch, Strin ender_found = 1; advance = 0; }break; - case TXTI_TokenKind_Error: + case TXT_TokenKind_Error: { ender_found = 1; advance = 0; @@ -519,22 +519,22 @@ df_txti_token_array_from_dasm_arch_string(Arena *arena, Architecture arch, Strin } if(ender_found != 0) { - TXTI_Token token = {active_token_kind, r1u64(active_token_start_off, off+advance)}; - if(active_token_kind == TXTI_TokenKind_Identifier) + TXT_Token token = {active_token_kind, r1u64(active_token_start_off, off+advance)}; + if(active_token_kind == TXT_TokenKind_Identifier) { String8 token_string = str8_substr(string, token.range); if(df_info_summary_from_string(arch, token_string).size != 0) { - token.kind = TXTI_TokenKind_Keyword; + token.kind = TXT_TokenKind_Keyword; } } - txti_token_chunk_list_push(arena, &tokens, 1024, &token); - active_token_kind = TXTI_TokenKind_Null; + txt_token_chunk_list_push(arena, &tokens, 1024, &token); + active_token_kind = TXT_TokenKind_Null; active_token_start_off = token.range.max; } } } - TXTI_TokenArray result = txti_token_array_from_chunk_list(arena, &tokens); + TXT_TokenArray result = txt_token_array_from_chunk_list(arena, &tokens); scratch_end(scratch); return result; } @@ -5844,8 +5844,8 @@ DF_VIEW_UI_FUNCTION_DEF(Code) ui_spacer(ui_pct(1, 0)); ui_labelf("(read only)"); ui_labelf("%s", - info.line_end_kind == TXTI_LineEndKind_LF ? "lf" : - info.line_end_kind == TXTI_LineEndKind_CRLF ? "crlf" : + info.line_end_kind == TXT_LineEndKind_LF ? "lf" : + info.line_end_kind == TXT_LineEndKind_CRLF ? "crlf" : "bin"); } } @@ -6185,7 +6185,7 @@ DF_VIEW_UI_FUNCTION_DEF(Disassembly) code_slice_params.line_num_range = visible_line_num_range; code_slice_params.line_text = push_array(scratch.arena, String8, visible_line_count); code_slice_params.line_ranges = push_array(scratch.arena, Rng1U64, visible_line_count); - code_slice_params.line_tokens = push_array(scratch.arena, TXTI_TokenArray, visible_line_count); + code_slice_params.line_tokens = push_array(scratch.arena, TXT_TokenArray, visible_line_count); code_slice_params.line_bps = push_array(scratch.arena, DF_EntityList, visible_line_count); code_slice_params.line_ips = push_array(scratch.arena, DF_EntityList, visible_line_count); code_slice_params.line_pins = push_array(scratch.arena, DF_EntityList, visible_line_count); @@ -6230,7 +6230,7 @@ DF_VIEW_UI_FUNCTION_DEF(Disassembly) for(S64 line_num = visible_line_num_range.min; line_num < visible_line_num_range.max; line_num += 1) { U64 idx = line_num-visible_line_num_range.min; - TXTI_TokenArray tokens = df_txti_token_array_from_dasm_arch_string(scratch.arena, df_architecture_from_entity(process), code_slice_params.line_text[idx]); + TXT_TokenArray tokens = df_txt_token_array_from_dasm_arch_string(scratch.arena, df_architecture_from_entity(process), code_slice_params.line_text[idx]); code_slice_params.line_tokens[idx] = tokens; } diff --git a/src/df/gfx/df_views.h b/src/df/gfx/df_views.h index 064609d8..e11c9e7d 100644 --- a/src/df/gfx/df_views.h +++ b/src/df/gfx/df_views.h @@ -438,7 +438,7 @@ internal void df_entity_lister_item_array_sort_by_strength__in_place(DF_EntityLi //////////////////////////////// //~ rjf: Disassembly View -internal TXTI_TokenArray df_txti_token_array_from_dasm_arch_string(Arena *arena, Architecture arch, String8 string); +internal TXT_TokenArray df_txt_token_array_from_dasm_arch_string(Arena *arena, Architecture arch, String8 string); //////////////////////////////// //~ rjf: Eval/Watch Views diff --git a/src/txti/txti.c b/src/txti/txti.c index e7d9da25..d58e0fc4 100644 --- a/src/txti/txti.c +++ b/src/txti/txti.c @@ -72,355 +72,6 @@ txti_lang_kind_from_extension(String8 extension) return kind; } -//////////////////////////////// -//~ rjf: Token Type Functions - -internal void -txti_token_chunk_list_push(Arena *arena, TXTI_TokenChunkList *list, U64 cap, TXTI_Token *token) -{ - TXTI_TokenChunkNode *node = list->last; - if(node == 0 || node->count >= node->cap) - { - node = push_array(arena, TXTI_TokenChunkNode, 1); - SLLQueuePush(list->first, list->last, node); - node->cap = cap; - node->v = push_array_no_zero(arena, TXTI_Token, node->cap); - list->chunk_count += 1; - } - MemoryCopyStruct(&node->v[node->count], token); - node->count += 1; - list->token_count += 1; -} - -internal void -txti_token_list_push(Arena *arena, TXTI_TokenList *list, TXTI_Token *token) -{ - TXTI_TokenNode *node = push_array(arena, TXTI_TokenNode, 1); - MemoryCopyStruct(&node->v, token); - SLLQueuePush(list->first, list->last, node); - list->count += 1; -} - -internal TXTI_TokenArray -txti_token_array_from_chunk_list(Arena *arena, TXTI_TokenChunkList *list) -{ - TXTI_TokenArray array = {0}; - array.count = list->token_count; - array.v = push_array_no_zero(arena, TXTI_Token, array.count); - U64 idx = 0; - for(TXTI_TokenChunkNode *n = list->first; n != 0; n = n->next) - { - MemoryCopy(array.v+idx, n->v, n->count*sizeof(TXTI_Token)); - idx += n->count; - } - return array; -} - -internal TXTI_TokenArray -txti_token_array_from_list(Arena *arena, TXTI_TokenList *list) -{ - TXTI_TokenArray array = {0}; - array.count = list->count; - array.v = push_array_no_zero(arena, TXTI_Token, array.count); - U64 idx = 0; - for(TXTI_TokenNode *n = list->first; n != 0; n = n->next) - { - MemoryCopyStruct(array.v+idx, &n->v); - idx += 1; - } - return array; -} - -//////////////////////////////// -//~ rjf: Lexing Functions - -internal TXTI_TokenArray -txti_token_array_from_string__cpp(Arena *arena, U64 *bytes_processed_counter, String8 string) -{ - Temp scratch = scratch_begin(&arena, 1); - - //- rjf: generate token list - TXTI_TokenChunkList tokens = {0}; - { - B32 comment_is_single_line = 0; - B32 string_is_char = 0; - TXTI_TokenKind active_token_kind = TXTI_TokenKind_Null; - U64 active_token_start_idx = 0; - B32 escaped = 0; - B32 next_escaped = 0; - U64 byte_process_start_idx = 0; - for(U64 idx = 0; idx <= string.size;) - { - U8 byte = (idx+0 < string.size) ? (string.str[idx+0]) : 0; - U8 next_byte = (idx+1 < string.size) ? (string.str[idx+1]) : 0; - - // rjf: update counter - if(bytes_processed_counter != 0 && ((idx-byte_process_start_idx) >= 1000 || idx == string.size)) - { - ins_atomic_u64_add_eval(bytes_processed_counter, (idx-byte_process_start_idx)); - byte_process_start_idx = idx; - } - - // rjf: escaping - if(escaped && (byte != '\r' && byte != '\n')) - { - next_escaped = 0; - } - else if(!escaped && byte == '\\') - { - next_escaped = 1; - } - - // rjf: take starter, determine active token kind - if(active_token_kind == TXTI_TokenKind_Null) - { - // rjf: use next bytes to start a new token - if(0){} - else if(char_is_space(byte)) { active_token_kind = TXTI_TokenKind_Whitespace; } - else if(byte == '_' || - byte == '$' || - char_is_alpha(byte)) { active_token_kind = TXTI_TokenKind_Identifier; } - else if(char_is_digit(byte, 10) || - (byte == '.' && - char_is_digit(next_byte, 10))) { active_token_kind = TXTI_TokenKind_Numeric; } - else if(byte == '"') { active_token_kind = TXTI_TokenKind_String; string_is_char = 0; } - else if(byte == '\'') { active_token_kind = TXTI_TokenKind_String; string_is_char = 1; } - else if(byte == '/' && next_byte == '/') { active_token_kind = TXTI_TokenKind_Comment; comment_is_single_line = 1; } - else if(byte == '/' && next_byte == '*') { active_token_kind = TXTI_TokenKind_Comment; comment_is_single_line = 0; } - else if(byte == '~' || byte == '!' || - byte == '%' || byte == '^' || - byte == '&' || byte == '*' || - byte == '(' || byte == ')' || - byte == '-' || byte == '=' || - byte == '+' || byte == '[' || - byte == ']' || byte == '{' || - byte == '}' || byte == ':' || - byte == ';' || byte == ',' || - byte == '.' || byte == '<' || - byte == '>' || byte == '/' || - byte == '?' || byte == '|') { active_token_kind = TXTI_TokenKind_Symbol; } - else if(byte == '#') { active_token_kind = TXTI_TokenKind_Meta; } - - // rjf: start new token - if(active_token_kind != TXTI_TokenKind_Null) - { - active_token_start_idx = idx; - } - - // rjf: invalid token kind -> emit error - else - { - TXTI_Token token = {TXTI_TokenKind_Error, r1u64(idx, idx+1)}; - txti_token_chunk_list_push(scratch.arena, &tokens, 4096, &token); - } - } - - // rjf: look for ender - U64 ender_pad = 0; - B32 ender_found = 0; - if(active_token_kind != TXTI_TokenKind_Null && idx>active_token_start_idx) - { - if(idx == string.size) - { - ender_pad = 0; - ender_found = 1; - } - else switch(active_token_kind) - { - default:break; - case TXTI_TokenKind_Whitespace: - { - ender_found = !char_is_space(byte); - }break; - case TXTI_TokenKind_Identifier: - { - ender_found = (!char_is_alpha(byte) && !char_is_digit(byte, 10) && byte != '_' && byte != '$'); - }break; - case TXTI_TokenKind_Numeric: - { - ender_found = (!char_is_alpha(byte) && !char_is_digit(byte, 10) && byte != '_' && byte != '.'); - }break; - case TXTI_TokenKind_String: - { - ender_found = (!escaped && ((!string_is_char && byte == '"') || (string_is_char && byte == '\''))); - ender_pad += 1; - }break; - case TXTI_TokenKind_Symbol: - { - ender_found = (byte != '~' && byte != '!' && - byte != '%' && byte != '^' && - byte != '&' && byte != '*' && - byte != '(' && byte != ')' && - byte != '-' && byte != '=' && - byte != '+' && byte != '[' && - byte != ']' && byte != '{' && - byte != '}' && byte != ':' && - byte != ';' && byte != ',' && - byte != '.' && byte != '<' && - byte != '>' && byte != '/' && - byte != '?' && byte != '|'); - }break; - case TXTI_TokenKind_Comment: - { - if(comment_is_single_line) - { - ender_found = (!escaped && (byte == '\r' || byte == '\n')); - } - else - { - ender_found = (active_token_start_idx+1 < idx && byte == '*' && next_byte == '/'); - ender_pad += 2; - } - }break; - case TXTI_TokenKind_Meta: - { - ender_found = (!escaped && (byte == '\r' || byte == '\n')); - }break; - } - } - - // rjf: next byte is ender => emit token - if(ender_found) - { - TXTI_Token token = {active_token_kind, r1u64(active_token_start_idx, idx+ender_pad)}; - active_token_kind = TXTI_TokenKind_Null; - - // rjf: identifier -> keyword in special cases - if(token.kind == TXTI_TokenKind_Identifier) - { - read_only local_persist String8 cpp_keywords[] = - { - str8_lit_comp("alignas"), - str8_lit_comp("alignof"), - str8_lit_comp("and"), - str8_lit_comp("and_eq"), - str8_lit_comp("asm"), - str8_lit_comp("atomic_cancel"), - str8_lit_comp("atomic_commit"), - str8_lit_comp("atomic_noexcept"), - str8_lit_comp("auto"), - str8_lit_comp("bitand"), - str8_lit_comp("bitor"), - str8_lit_comp("bool"), - str8_lit_comp("break"), - str8_lit_comp("case"), - str8_lit_comp("catch"), - str8_lit_comp("char"), - str8_lit_comp("char8_t"), - str8_lit_comp("char16_t"), - str8_lit_comp("char32_t"), - str8_lit_comp("class"), - str8_lit_comp("compl"), - str8_lit_comp("concept"), - str8_lit_comp("const"), - str8_lit_comp("consteval"), - str8_lit_comp("constexpr"), - str8_lit_comp("constinit"), - str8_lit_comp("const_cast"), - str8_lit_comp("continue"), - str8_lit_comp("co_await"), - str8_lit_comp("co_return"), - str8_lit_comp("co_yield"), - str8_lit_comp("decltype"), - str8_lit_comp("default"), - str8_lit_comp("delete"), - str8_lit_comp("do"), - str8_lit_comp("double"), - str8_lit_comp("dynamic_cast"), - str8_lit_comp("else"), - str8_lit_comp("enum"), - str8_lit_comp("explicit"), - str8_lit_comp("export"), - str8_lit_comp("extern"), - str8_lit_comp("false"), - str8_lit_comp("float"), - str8_lit_comp("for"), - str8_lit_comp("friend"), - str8_lit_comp("goto"), - str8_lit_comp("if"), - str8_lit_comp("inline"), - str8_lit_comp("int"), - str8_lit_comp("long"), - str8_lit_comp("mutable"), - str8_lit_comp("namespace"), - str8_lit_comp("new"), - str8_lit_comp("noexcept"), - str8_lit_comp("not"), - str8_lit_comp("not_eq"), - str8_lit_comp("nullptr"), - str8_lit_comp("operator"), - str8_lit_comp("or"), - str8_lit_comp("or_eq"), - str8_lit_comp("private"), - str8_lit_comp("protected"), - str8_lit_comp("public"), - str8_lit_comp("reflexpr"), - str8_lit_comp("register"), - str8_lit_comp("reinterpret_cast"), - str8_lit_comp("requires"), - str8_lit_comp("return"), - str8_lit_comp("short"), - str8_lit_comp("signed"), - str8_lit_comp("sizeof"), - str8_lit_comp("static"), - str8_lit_comp("static_assert"), - str8_lit_comp("static_cast"), - str8_lit_comp("struct"), - str8_lit_comp("switch"), - str8_lit_comp("synchronized"), - str8_lit_comp("template"), - str8_lit_comp("this"), - str8_lit_comp("thread_local"), - str8_lit_comp("throw"), - str8_lit_comp("true"), - str8_lit_comp("try"), - str8_lit_comp("typedef"), - str8_lit_comp("typeid"), - str8_lit_comp("typename"), - str8_lit_comp("union"), - str8_lit_comp("unsigned"), - str8_lit_comp("using"), - str8_lit_comp("virtual"), - str8_lit_comp("void"), - str8_lit_comp("volatile"), - str8_lit_comp("wchar_t"), - str8_lit_comp("while"), - str8_lit_comp("xor"), - str8_lit_comp("xor_eq"), - }; - String8 token_string = str8_substr(string, r1u64(active_token_start_idx, idx+ender_pad)); - for(U64 keyword_idx = 0; keyword_idx < ArrayCount(cpp_keywords); keyword_idx += 1) - { - if(str8_match(cpp_keywords[keyword_idx], token_string, 0)) - { - token.kind = TXTI_TokenKind_Keyword; - break; - } - } - } - - // rjf: push - txti_token_chunk_list_push(scratch.arena, &tokens, 4096, &token); - - // rjf: increment by ender padding - idx += ender_pad; - } - - // rjf: advance by 1 byte if we haven't found an ender - if(!ender_found) - { - idx += 1; - } - escaped = next_escaped; - } - } - - //- rjf: token list -> token array - TXTI_TokenArray result = txti_token_array_from_chunk_list(arena, &tokens); - scratch_end(scratch); - return result; -} - //////////////////////////////// //~ rjf: Message Type Functions @@ -607,7 +258,7 @@ txti_slice_from_handle_line_range(Arena *arena, TXTI_Handle handle, Rng1S64 line result.line_count = (U64)dim_1s64(line_range_clamped)+1; result.line_text = push_array(arena, String8, result.line_count); result.line_ranges = push_array(arena, Rng1U64, result.line_count); - result.line_tokens = push_array(arena, TXTI_TokenArray, result.line_count); + result.line_tokens = push_array(arena, TXT_TokenArray, result.line_count); // rjf: fill line ranges & text U64 line_slice_idx = 0; @@ -626,7 +277,7 @@ txti_slice_from_handle_line_range(Arena *arena, TXTI_Handle handle, Rng1S64 line } // rjf: binary search to find first token - TXTI_Token *tokens_first = 0; + TXT_Token *tokens_first = 0; ProfScope("binary search to find first token") { Rng1U64 slice_range = r1u64(result.line_ranges[0].min, result.line_ranges[result.line_count-1].max); @@ -639,7 +290,7 @@ txti_slice_from_handle_line_range(Arena *arena, TXTI_Handle handle, Rng1S64 line { break; } - TXTI_Token *mid_token = &buffer->tokens.v[mid_idx]; + TXT_Token *mid_token = &buffer->tokens.v[mid_idx]; if(mid_token->range.min > slice_range.max) { opl_idx = mid_idx; @@ -661,18 +312,18 @@ txti_slice_from_handle_line_range(Arena *arena, TXTI_Handle handle, Rng1S64 line } // rjf: grab per-line tokens - TXTI_TokenList *line_tokens_lists = push_array(scratch.arena, TXTI_TokenList, result.line_count); + TXT_TokenList *line_tokens_lists = push_array(scratch.arena, TXT_TokenList, result.line_count); if(tokens_first != 0) ProfScope("grab per-line tokens") { - TXTI_Token *tokens_opl = buffer->tokens.v+buffer->tokens.count; + TXT_Token *tokens_opl = buffer->tokens.v+buffer->tokens.count; U64 line_slice_idx = 0; - for(TXTI_Token *token = tokens_first; token < tokens_opl && line_slice_idx < result.line_count;) + for(TXT_Token *token = tokens_first; token < tokens_opl && line_slice_idx < result.line_count;) { if(token->range.min < result.line_ranges[line_slice_idx].max) { if(token->range.max > result.line_ranges[line_slice_idx].min) { - txti_token_list_push(scratch.arena, &line_tokens_lists[line_slice_idx], token); + txt_token_list_push(scratch.arena, &line_tokens_lists[line_slice_idx], token); } B32 need_token_advance = 0; B32 need_line_advance = 0; @@ -697,7 +348,7 @@ txti_slice_from_handle_line_range(Arena *arena, TXTI_Handle handle, Rng1S64 line // rjf: bake per-line tokens to arrays for(U64 line_slice_idx = 0; line_slice_idx < result.line_count; line_slice_idx += 1) { - result.line_tokens[line_slice_idx] = txti_token_array_from_list(arena, &line_tokens_lists[line_slice_idx]); + result.line_tokens[line_slice_idx] = txt_token_array_from_list(arena, &line_tokens_lists[line_slice_idx]); } } } @@ -733,8 +384,8 @@ txti_string_from_handle_txt_rng(Arena *arena, TXTI_Handle handle, TxtRng range) switch(info.line_end_kind) { default: - case TXTI_LineEndKind_LF:{join.sep = str8_lit("\n");}break; - case TXTI_LineEndKind_CRLF:{join.sep = str8_lit("\r\n");}break; + case TXT_LineEndKind_LF:{join.sep = str8_lit("\n");}break; + case TXT_LineEndKind_CRLF:{join.sep = str8_lit("\r\n");}break; } result = str8_list_join(arena, &line_strings, &join); } @@ -755,18 +406,18 @@ txti_string_from_handle_line_num(Arena *arena, TXTI_Handle handle, S64 line_num) } internal Rng1U64 -txti_expr_range_from_line_off_range_string_tokens(U64 off, Rng1U64 line_range, String8 line_text, TXTI_TokenArray *line_tokens) +txti_expr_range_from_line_off_range_string_tokens(U64 off, Rng1U64 line_range, String8 line_text, TXT_TokenArray *line_tokens) { Rng1U64 result = {0}; Temp scratch = scratch_begin(0, 0); { // rjf: unpack line info - TXTI_Token *line_tokens_first = line_tokens->v; - TXTI_Token *line_tokens_opl = line_tokens->v+line_tokens->count; + TXT_Token *line_tokens_first = line_tokens->v; + TXT_Token *line_tokens_opl = line_tokens->v+line_tokens->count; // rjf: find token containing `off` - TXTI_Token *pt_token = 0; - for(TXTI_Token *token = line_tokens_first; + TXT_Token *pt_token = 0; + for(TXT_Token *token = line_tokens_first; token < line_tokens_opl; token += 1) { @@ -778,14 +429,14 @@ txti_expr_range_from_line_off_range_string_tokens(U64 off, Rng1U64 line_range, S switch(token->kind) { default:{}break; - case TXTI_TokenKind_Symbol: + case TXT_TokenKind_Symbol: { token_ender = (str8_match(token_string, str8_lit("]"), 0)); }break; - case TXTI_TokenKind_Identifier: - case TXTI_TokenKind_Keyword: - case TXTI_TokenKind_String: - case TXTI_TokenKind_Meta: + case TXT_TokenKind_Identifier: + case TXT_TokenKind_Keyword: + case TXT_TokenKind_String: + case TXT_TokenKind_Meta: { token_ender = 1; }break; @@ -809,7 +460,7 @@ txti_expr_range_from_line_off_range_string_tokens(U64 off, Rng1U64 line_range, S { B32 walkback_done = 0; S32 nest = 0; - for(TXTI_Token *wb_token = pt_token; + for(TXT_Token *wb_token = pt_token; wb_token >= line_tokens_first && walkback_done == 0; wb_token -= 1) { @@ -819,7 +470,7 @@ txti_expr_range_from_line_off_range_string_tokens(U64 off, Rng1U64 line_range, S switch(wb_token->kind) { default:{}break; - case TXTI_TokenKind_Symbol: + case TXT_TokenKind_Symbol: { B32 is_scope_resolution = str8_match(wb_token_string, str8_lit("::"), 0); B32 is_dot = str8_match(wb_token_string, str8_lit("."), 0); @@ -837,7 +488,7 @@ txti_expr_range_from_line_off_range_string_tokens(U64 off, Rng1U64 line_range, S include_wb_token = 1; } }break; - case TXTI_TokenKind_Identifier: + case TXT_TokenKind_Identifier: { include_wb_token = 1; }break; @@ -868,9 +519,9 @@ txti_expr_range_from_handle_pt(TXTI_Handle handle, TxtPt pt) // rjf: unpack line info String8 line_text = slice.line_text[0]; Rng1U64 line_range = slice.line_ranges[0]; - TXTI_TokenArray line_tokens = slice.line_tokens[0]; - TXTI_Token *line_tokens_first = line_tokens.v; - TXTI_Token *line_tokens_opl = line_tokens.v+line_tokens.count; + TXT_TokenArray line_tokens = slice.line_tokens[0]; + TXT_Token *line_tokens_first = line_tokens.v; + TXT_Token *line_tokens_opl = line_tokens.v+line_tokens.count; U64 pt_off = line_range.min + (pt.column-1); // rjf: grab offset range of expression @@ -1002,12 +653,12 @@ txti_mut_thread_entry_point(void *p) case TXTI_LangKind_C: case TXTI_LangKind_CPlusPlus: { - lex_function = txti_token_array_from_string__cpp; + lex_function = txt_token_array_from_string__c_cpp; }break; } //- rjf: detect line end kind - TXTI_LineEndKind line_end_kind = TXTI_LineEndKind_Null; + TXT_LineEndKind line_end_kind = TXT_LineEndKind_Null; if(load_valid) { U64 lf_count = 0; @@ -1025,11 +676,11 @@ txti_mut_thread_entry_point(void *p) } if(cr_count >= lf_count/2 && lf_count >= 1) { - line_end_kind = TXTI_LineEndKind_CRLF; + line_end_kind = TXT_LineEndKind_CRLF; } else if(lf_count >= 1) { - line_end_kind = TXTI_LineEndKind_LF; + line_end_kind = TXT_LineEndKind_LF; } } @@ -1084,7 +735,7 @@ txti_mut_thread_entry_point(void *p) if(entity != 0) { entity->buffer_apply_gen += 1; - if(line_end_kind != TXTI_LineEndKind_Null) + if(line_end_kind != TXT_LineEndKind_Null) { entity->line_end_kind = line_end_kind; } diff --git a/src/txti/txti.h b/src/txti/txti.h index c43b3b27..18df67e3 100644 --- a/src/txti/txti.h +++ b/src/txti/txti.h @@ -56,88 +56,6 @@ struct TXTI_Handle U64 u64[2]; }; -//////////////////////////////// -//~ rjf: Parsed Text Info Types - -typedef enum TXTI_LineEndKind -{ - TXTI_LineEndKind_Null, - TXTI_LineEndKind_LF, - TXTI_LineEndKind_CRLF, - TXTI_LineEndKind_COUNT -} -TXTI_LineEndKind; - -typedef enum TXTI_TokenKind -{ - TXTI_TokenKind_Null, - TXTI_TokenKind_Error, - TXTI_TokenKind_Whitespace, - TXTI_TokenKind_Keyword, - TXTI_TokenKind_Identifier, - TXTI_TokenKind_Numeric, - TXTI_TokenKind_String, - TXTI_TokenKind_Symbol, - TXTI_TokenKind_Comment, - TXTI_TokenKind_Meta, // preprocessor, etc. - TXTI_TokenKind_COUNT -} -TXTI_TokenKind; - -typedef struct TXTI_Token TXTI_Token; -struct TXTI_Token -{ - TXTI_TokenKind kind; - Rng1U64 range; -}; - -typedef struct TXTI_TokenChunkNode TXTI_TokenChunkNode; -struct TXTI_TokenChunkNode -{ - TXTI_TokenChunkNode *next; - U64 count; - U64 cap; - TXTI_Token *v; -}; - -typedef struct TXTI_TokenChunkList TXTI_TokenChunkList; -struct TXTI_TokenChunkList -{ - TXTI_TokenChunkNode *first; - TXTI_TokenChunkNode *last; - U64 chunk_count; - U64 token_count; -}; - -typedef struct TXTI_TokenNode TXTI_TokenNode; -struct TXTI_TokenNode -{ - TXTI_TokenNode *next; - TXTI_Token v; -}; - -typedef struct TXTI_TokenList TXTI_TokenList; -struct TXTI_TokenList -{ - TXTI_TokenNode *first; - TXTI_TokenNode *last; - U64 count; -}; - -typedef struct TXTI_TokenArray TXTI_TokenArray; -struct TXTI_TokenArray -{ - U64 count; - TXTI_Token *v; -}; - -typedef struct TXTI_TokenArrayArray TXTI_TokenArrayArray; -struct TXTI_TokenArrayArray -{ - U64 count; - TXTI_TokenArray *v; -}; - //////////////////////////////// //~ rjf: Language Kinds @@ -150,7 +68,7 @@ typedef enum TXTI_LangKind } TXTI_LangKind; -typedef TXTI_TokenArray TXTI_LangLexFunctionType(Arena *arena, U64 *bytes_processed_counter, String8 string); +typedef TXT_TokenArray TXTI_LangLexFunctionType(Arena *arena, U64 *bytes_processed_counter, String8 string); //////////////////////////////// //~ rjf: Buffer Entity Types @@ -173,7 +91,7 @@ struct TXTI_Buffer U64 lines_max_size; // rjf: tokens - TXTI_TokenArray tokens; + TXT_TokenArray tokens; }; typedef struct TXTI_Entity TXTI_Entity; @@ -187,7 +105,7 @@ struct TXTI_Entity U64 mut_gen; // rjf: metadata - TXTI_LineEndKind line_end_kind; + TXT_LineEndKind line_end_kind; TXTI_LangKind lang_kind; U64 bytes_processed; U64 bytes_to_process; @@ -238,7 +156,7 @@ struct TXTI_BufferInfo { String8 path; U64 timestamp; - TXTI_LineEndKind line_end_kind; + TXT_LineEndKind line_end_kind; TXTI_LangKind lang_kind; U64 total_line_count; U64 last_line_size; @@ -255,7 +173,7 @@ struct TXTI_Slice U64 line_count; String8 *line_text; Rng1U64 *line_ranges; - TXTI_TokenArray *line_tokens; + TXT_TokenArray *line_tokens; }; //////////////////////////////// @@ -342,19 +260,6 @@ internal void txti_init(void); internal U64 txti_hash_from_string(String8 string); internal TXTI_LangKind txti_lang_kind_from_extension(String8 extension); -//////////////////////////////// -//~ rjf: Token Type Functions - -internal void txti_token_chunk_list_push(Arena *arena, TXTI_TokenChunkList *list, U64 cap, TXTI_Token *token); -internal void txti_token_list_push(Arena *arena, TXTI_TokenList *list, TXTI_Token *token); -internal TXTI_TokenArray txti_token_array_from_chunk_list(Arena *arena, TXTI_TokenChunkList *list); -internal TXTI_TokenArray txti_token_array_from_list(Arena *arena, TXTI_TokenList *list); - -//////////////////////////////// -//~ rjf: Lexing Functions - -internal TXTI_TokenArray txti_token_array_from_string__cpp(Arena *arena, U64 *bytes_processed_counter, String8 string); - //////////////////////////////// //~ rjf: Message Type Functions @@ -373,7 +278,7 @@ internal TXTI_BufferInfo txti_buffer_info_from_handle(Arena *arena, TXTI_Handle internal TXTI_Slice txti_slice_from_handle_line_range(Arena *arena, TXTI_Handle handle, Rng1S64 line_range); internal String8 txti_string_from_handle_txt_rng(Arena *arena, TXTI_Handle handle, TxtRng range); internal String8 txti_string_from_handle_line_num(Arena *arena, TXTI_Handle handle, S64 line_num); -internal Rng1U64 txti_expr_range_from_line_off_range_string_tokens(U64 off, Rng1U64 line_range, String8 line_text, TXTI_TokenArray *line_tokens); +internal Rng1U64 txti_expr_range_from_line_off_range_string_tokens(U64 off, Rng1U64 line_range, String8 line_text, TXT_TokenArray *line_tokens); internal TxtRng txti_expr_range_from_handle_pt(TXTI_Handle handle, TxtPt pt); //- rjf: buffer mutations