deduplicate txti/txt layer lexing

This commit is contained in:
Ryan Fleury
2024-03-26 10:31:20 -07:00
parent 206d48b363
commit 6393766f82
7 changed files with 91 additions and 535 deletions
+20 -20
View File
@@ -8056,18 +8056,18 @@ df_rgba_from_theme_color(DF_ThemeColor color)
}
internal DF_ThemeColor
df_theme_color_from_txti_token_kind(TXTI_TokenKind kind)
df_theme_color_from_txt_token_kind(TXT_TokenKind kind)
{
DF_ThemeColor color = DF_ThemeColor_CodeDefault;
switch(kind)
{
default:break;
case TXTI_TokenKind_Keyword:{color = DF_ThemeColor_CodeKeyword;}break;
case TXTI_TokenKind_Numeric:{color = DF_ThemeColor_CodeNumeric;}break;
case TXTI_TokenKind_String: {color = DF_ThemeColor_CodeString;}break;
case TXTI_TokenKind_Meta: {color = DF_ThemeColor_CodeMeta;}break;
case TXTI_TokenKind_Comment:{color = DF_ThemeColor_CodeComment;}break;
case TXTI_TokenKind_Symbol: {color = DF_ThemeColor_CodeSymbol;}break;
case TXT_TokenKind_Keyword:{color = DF_ThemeColor_CodeKeyword;}break;
case TXT_TokenKind_Numeric:{color = DF_ThemeColor_CodeNumeric;}break;
case TXT_TokenKind_String: {color = DF_ThemeColor_CodeString;}break;
case TXT_TokenKind_Meta: {color = DF_ThemeColor_CodeMeta;}break;
case TXT_TokenKind_Comment:{color = DF_ThemeColor_CodeComment;}break;
case TXT_TokenKind_Symbol: {color = DF_ThemeColor_CodeSymbol;}break;
}
return color;
}
@@ -10063,7 +10063,7 @@ df_code_slice(DF_Window *ws, DF_CtrlCtx *ctrl_ctx, EVAL_ParseCtx *parse_ctx, DF_
{
U64 line_slice_idx = mouse_pt.line-params->line_num_range.min;
String8 line_text = params->line_text[line_slice_idx];
TXTI_TokenArray line_tokens = params->line_tokens[line_slice_idx];
TXT_TokenArray line_tokens = params->line_tokens[line_slice_idx];
Rng1U64 line_range = params->line_ranges[line_slice_idx];
U64 mouse_pt_off = line_range.min + (mouse_pt.column-1);
Rng1U64 expr_off_rng = txti_expr_range_from_line_off_range_string_tokens(mouse_pt_off, line_range, line_text, &line_tokens);
@@ -10261,7 +10261,7 @@ df_code_slice(DF_Window *ws, DF_CtrlCtx *ctrl_ctx, EVAL_ParseCtx *parse_ctx, DF_
{
String8 line_string = params->line_text[line_idx];
Rng1U64 line_range = params->line_ranges[line_idx];
TXTI_TokenArray *line_tokens = &params->line_tokens[line_idx];
TXT_TokenArray *line_tokens = &params->line_tokens[line_idx];
ui_set_next_text_padding(-2);
UI_Key line_key = ui_key_from_stringf(top_container_box->key, "ln_%I64x", line_num);
Vec4F32 line_bg_color = line_bg_colors[line_idx];
@@ -10292,9 +10292,9 @@ df_code_slice(DF_Window *ws, DF_CtrlCtx *ctrl_ctx, EVAL_ParseCtx *parse_ctx, DF_
}
else
{
TXTI_Token *line_tokens_first = line_tokens->v;
TXTI_Token *line_tokens_opl = line_tokens->v + line_tokens->count;
for(TXTI_Token *token = line_tokens_first; token < line_tokens_opl; token += 1)
TXT_Token *line_tokens_first = line_tokens->v;
TXT_Token *line_tokens_opl = line_tokens->v + line_tokens->count;
for(TXT_Token *token = line_tokens_first; token < line_tokens_opl; token += 1)
{
// rjf: token -> token string
String8 token_string = {0};
@@ -10314,9 +10314,9 @@ df_code_slice(DF_Window *ws, DF_CtrlCtx *ctrl_ctx, EVAL_ParseCtx *parse_ctx, DF_
// rjf: token -> token color
Vec4F32 token_color = df_rgba_from_theme_color(DF_ThemeColor_CodeDefault);
{
DF_ThemeColor new_color_kind = df_theme_color_from_txti_token_kind(token->kind);
DF_ThemeColor new_color_kind = df_theme_color_from_txt_token_kind(token->kind);
F32 mix_t = 1.f;
if(token->kind == TXTI_TokenKind_Identifier)
if(token->kind == TXT_TokenKind_Identifier)
{
B32 mapped_special = 0;
for(DF_EntityNode *n = params->relevant_binaries.first; n != 0; n = n->next)
@@ -10808,12 +10808,12 @@ df_fancy_string_list_from_code_string(Arena *arena, F32 alpha, B32 indirection_s
{
Temp scratch = scratch_begin(&arena, 1);
D_FancyStringList fancy_strings = {0};
TXTI_TokenArray tokens = txti_token_array_from_string__cpp(scratch.arena, 0, string);
TXTI_Token *tokens_opl = tokens.v+tokens.count;
TXT_TokenArray tokens = txt_token_array_from_string__c_cpp(scratch.arena, 0, string);
TXT_Token *tokens_opl = tokens.v+tokens.count;
S32 indirection_counter = 0;
for(TXTI_Token *token = tokens.v; token < tokens_opl; token += 1)
for(TXT_Token *token = tokens.v; token < tokens_opl; token += 1)
{
DF_ThemeColor token_color = df_theme_color_from_txti_token_kind(token->kind);
DF_ThemeColor token_color = df_theme_color_from_txt_token_kind(token->kind);
Vec4F32 token_color_rgba = df_rgba_from_theme_color(token_color);
token_color_rgba.w *= alpha;
String8 token_string = str8_substr(string, token->range);
@@ -10833,7 +10833,7 @@ df_fancy_string_list_from_code_string(Arena *arena, F32 alpha, B32 indirection_s
};
d_fancy_string_list_push(arena, &fancy_strings, &fancy_string);
}break;
case TXTI_TokenKind_Identifier:
case TXT_TokenKind_Identifier:
{
D_FancyString fancy_string =
{
@@ -10844,7 +10844,7 @@ df_fancy_string_list_from_code_string(Arena *arena, F32 alpha, B32 indirection_s
};
d_fancy_string_list_push(arena, &fancy_strings, &fancy_string);
}break;
case TXTI_TokenKind_Numeric:
case TXT_TokenKind_Numeric:
{
Vec4F32 token_color_rgba_alt = token_color_rgba;
token_color_rgba_alt.x *= 0.7f;
+2 -2
View File
@@ -453,7 +453,7 @@ struct DF_CodeSliceParams
Rng1S64 line_num_range;
String8 *line_text;
Rng1U64 *line_ranges;
TXTI_TokenArray *line_tokens;
TXT_TokenArray *line_tokens;
DF_EntityList *line_bps;
DF_EntityList *line_ips;
DF_EntityList *line_pins;
@@ -1010,7 +1010,7 @@ internal DF_CmdSpecList df_cmd_spec_list_from_event_flags(Arena *arena, OS_Event
//- rjf: colors
internal Vec4F32 df_rgba_from_theme_color(DF_ThemeColor color);
internal DF_ThemeColor df_theme_color_from_txti_token_kind(TXTI_TokenKind kind);
internal DF_ThemeColor df_theme_color_from_txt_token_kind(TXT_TokenKind kind);
//- rjf: fonts/sizes
internal F_Tag df_font_from_slot(DF_FontSlot slot);
+1 -1
View File
@@ -715,7 +715,7 @@ DF_GFX_VIEW_RULE_BLOCK_UI_FUNCTION_DEF(text)
code_slice_params.line_num_range = r1s64(1, info.lines_count);
code_slice_params.line_text = push_array(scratch.arena, String8, info.lines_count);
code_slice_params.line_ranges = push_array(scratch.arena, Rng1U64, info.lines_count);
code_slice_params.line_tokens = push_array(scratch.arena, TXTI_TokenArray, info.lines_count);
code_slice_params.line_tokens = push_array(scratch.arena, TXT_TokenArray, info.lines_count);
code_slice_params.line_bps = push_array(scratch.arena, DF_EntityList, info.lines_count);
code_slice_params.line_ips = push_array(scratch.arena, DF_EntityList, info.lines_count);
code_slice_params.line_pins = push_array(scratch.arena, DF_EntityList, info.lines_count);
+30 -30
View File
@@ -372,13 +372,13 @@ df_entity_lister_item_array_sort_by_strength__in_place(DF_EntityListerItemArray
////////////////////////////////
//~ rjf: Disassembly View
internal TXTI_TokenArray
df_txti_token_array_from_dasm_arch_string(Arena *arena, Architecture arch, String8 string)
internal TXT_TokenArray
df_txt_token_array_from_dasm_arch_string(Arena *arena, Architecture arch, String8 string)
{
Temp scratch = scratch_begin(&arena, 1);
TXTI_TokenChunkList tokens = {0};
TXT_TokenChunkList tokens = {0};
{
TXTI_TokenKind active_token_kind = TXTI_TokenKind_Null;
TXT_TokenKind active_token_kind = TXT_TokenKind_Null;
U64 active_token_start_off = 0;
U64 off = 0;
B32 escaped = 0;
@@ -388,8 +388,8 @@ df_txti_token_array_from_dasm_arch_string(Arena *arena, Architecture arch, Strin
U8 byte = (off+0 < string.size) ? string.str[off+0] : 0;
U8 next_byte = (off+1 < string.size) ? string.str[off+1] : 0;
B32 ender_found = 0;
advance = (active_token_kind != TXTI_TokenKind_Null ? 1 : 0);
if(off == string.size && active_token_kind != TXTI_TokenKind_Null)
advance = (active_token_kind != TXT_TokenKind_Null ? 1 : 0);
if(off == string.size && active_token_kind != TXT_TokenKind_Null)
{
ender_found = 1;
advance = 1;
@@ -397,38 +397,38 @@ df_txti_token_array_from_dasm_arch_string(Arena *arena, Architecture arch, Strin
switch(active_token_kind)
{
default:
case TXTI_TokenKind_Null:
case TXT_TokenKind_Null:
{
if(byte == ' ' || byte == '\t' || byte == '\v' || byte == '\f' || byte == '\r' || byte == '\n')
{
active_token_start_off = off;
active_token_kind = TXTI_TokenKind_Whitespace;
active_token_kind = TXT_TokenKind_Whitespace;
advance = 1;
}
else if(('a' <= byte && byte <= 'z') || ('A' <= byte && byte <= 'Z') || byte == '_')
{
active_token_start_off = off;
active_token_kind = TXTI_TokenKind_Identifier;
active_token_kind = TXT_TokenKind_Identifier;
advance = 1;
}
else if(byte == '\'')
{
active_token_start_off = off;
active_token_kind = TXTI_TokenKind_String;
active_token_kind = TXT_TokenKind_String;
advance = 1;
string_is_char = 1;
}
else if(byte == '"')
{
active_token_start_off = off;
active_token_kind = TXTI_TokenKind_String;
active_token_kind = TXT_TokenKind_String;
advance = 1;
string_is_char = 0;
}
else if(('0' <= byte && byte <= '9') || (byte == '.' && '0' <= next_byte && next_byte <= '9'))
{
active_token_start_off = off;
active_token_kind = TXTI_TokenKind_Numeric;
active_token_kind = TXT_TokenKind_Numeric;
advance = 1;
}
else if(byte == '~' || byte == '!' || byte == '%' || byte == '^' ||
@@ -439,29 +439,29 @@ df_txti_token_array_from_dasm_arch_string(Arena *arena, Architecture arch, Strin
byte == '>' || byte == ',' || byte == '.')
{
active_token_start_off = off;
active_token_kind = TXTI_TokenKind_Symbol;
active_token_kind = TXT_TokenKind_Symbol;
advance = 1;
}
else
{
active_token_start_off = off;
active_token_kind = TXTI_TokenKind_Error;
active_token_kind = TXT_TokenKind_Error;
advance = 1;
}
}break;
case TXTI_TokenKind_Whitespace:
case TXT_TokenKind_Whitespace:
if(byte != ' ' && byte != '\t' && byte != '\v' && byte != '\f')
{
ender_found = 1;
advance = 0;
}break;
case TXTI_TokenKind_Identifier:
case TXT_TokenKind_Identifier:
if((byte < 'a' || 'z' < byte) && (byte < 'A' || 'Z' < byte) && (byte < '0' || '9' < byte) && byte != '_')
{
ender_found = 1;
advance = 0;
}break;
case TXTI_TokenKind_String:
case TXT_TokenKind_String:
{
U8 ender_byte = string_is_char ? '\'' : '"';
if(!escaped && byte == ender_byte)
@@ -488,13 +488,13 @@ df_txti_token_array_from_dasm_arch_string(Arena *arena, Architecture arch, Strin
}
}
}break;
case TXTI_TokenKind_Numeric:
case TXT_TokenKind_Numeric:
if((byte < 'a' || 'z' < byte) && (byte < 'A' || 'Z' < byte) && (byte < '0' || '9' < byte) && byte != '.')
{
ender_found = 1;
advance = 0;
}break;
case TXTI_TokenKind_Symbol:
case TXT_TokenKind_Symbol:
if(1)
{
// NOTE(rjf): avoiding maximum munch rule for now
@@ -511,7 +511,7 @@ df_txti_token_array_from_dasm_arch_string(Arena *arena, Architecture arch, Strin
ender_found = 1;
advance = 0;
}break;
case TXTI_TokenKind_Error:
case TXT_TokenKind_Error:
{
ender_found = 1;
advance = 0;
@@ -519,22 +519,22 @@ df_txti_token_array_from_dasm_arch_string(Arena *arena, Architecture arch, Strin
}
if(ender_found != 0)
{
TXTI_Token token = {active_token_kind, r1u64(active_token_start_off, off+advance)};
if(active_token_kind == TXTI_TokenKind_Identifier)
TXT_Token token = {active_token_kind, r1u64(active_token_start_off, off+advance)};
if(active_token_kind == TXT_TokenKind_Identifier)
{
String8 token_string = str8_substr(string, token.range);
if(df_info_summary_from_string(arch, token_string).size != 0)
{
token.kind = TXTI_TokenKind_Keyword;
token.kind = TXT_TokenKind_Keyword;
}
}
txti_token_chunk_list_push(arena, &tokens, 1024, &token);
active_token_kind = TXTI_TokenKind_Null;
txt_token_chunk_list_push(arena, &tokens, 1024, &token);
active_token_kind = TXT_TokenKind_Null;
active_token_start_off = token.range.max;
}
}
}
TXTI_TokenArray result = txti_token_array_from_chunk_list(arena, &tokens);
TXT_TokenArray result = txt_token_array_from_chunk_list(arena, &tokens);
scratch_end(scratch);
return result;
}
@@ -5844,8 +5844,8 @@ DF_VIEW_UI_FUNCTION_DEF(Code)
ui_spacer(ui_pct(1, 0));
ui_labelf("(read only)");
ui_labelf("%s",
info.line_end_kind == TXTI_LineEndKind_LF ? "lf" :
info.line_end_kind == TXTI_LineEndKind_CRLF ? "crlf" :
info.line_end_kind == TXT_LineEndKind_LF ? "lf" :
info.line_end_kind == TXT_LineEndKind_CRLF ? "crlf" :
"bin");
}
}
@@ -6185,7 +6185,7 @@ DF_VIEW_UI_FUNCTION_DEF(Disassembly)
code_slice_params.line_num_range = visible_line_num_range;
code_slice_params.line_text = push_array(scratch.arena, String8, visible_line_count);
code_slice_params.line_ranges = push_array(scratch.arena, Rng1U64, visible_line_count);
code_slice_params.line_tokens = push_array(scratch.arena, TXTI_TokenArray, visible_line_count);
code_slice_params.line_tokens = push_array(scratch.arena, TXT_TokenArray, visible_line_count);
code_slice_params.line_bps = push_array(scratch.arena, DF_EntityList, visible_line_count);
code_slice_params.line_ips = push_array(scratch.arena, DF_EntityList, visible_line_count);
code_slice_params.line_pins = push_array(scratch.arena, DF_EntityList, visible_line_count);
@@ -6230,7 +6230,7 @@ DF_VIEW_UI_FUNCTION_DEF(Disassembly)
for(S64 line_num = visible_line_num_range.min; line_num < visible_line_num_range.max; line_num += 1)
{
U64 idx = line_num-visible_line_num_range.min;
TXTI_TokenArray tokens = df_txti_token_array_from_dasm_arch_string(scratch.arena, df_architecture_from_entity(process), code_slice_params.line_text[idx]);
TXT_TokenArray tokens = df_txt_token_array_from_dasm_arch_string(scratch.arena, df_architecture_from_entity(process), code_slice_params.line_text[idx]);
code_slice_params.line_tokens[idx] = tokens;
}
+1 -1
View File
@@ -438,7 +438,7 @@ internal void df_entity_lister_item_array_sort_by_strength__in_place(DF_EntityLi
////////////////////////////////
//~ rjf: Disassembly View
internal TXTI_TokenArray df_txti_token_array_from_dasm_arch_string(Arena *arena, Architecture arch, String8 string);
internal TXT_TokenArray df_txt_token_array_from_dasm_arch_string(Arena *arena, Architecture arch, String8 string);
////////////////////////////////
//~ rjf: Eval/Watch Views
+31 -380
View File
@@ -72,355 +72,6 @@ txti_lang_kind_from_extension(String8 extension)
return kind;
}
////////////////////////////////
//~ rjf: Token Type Functions
internal void
txti_token_chunk_list_push(Arena *arena, TXTI_TokenChunkList *list, U64 cap, TXTI_Token *token)
{
TXTI_TokenChunkNode *node = list->last;
if(node == 0 || node->count >= node->cap)
{
node = push_array(arena, TXTI_TokenChunkNode, 1);
SLLQueuePush(list->first, list->last, node);
node->cap = cap;
node->v = push_array_no_zero(arena, TXTI_Token, node->cap);
list->chunk_count += 1;
}
MemoryCopyStruct(&node->v[node->count], token);
node->count += 1;
list->token_count += 1;
}
internal void
txti_token_list_push(Arena *arena, TXTI_TokenList *list, TXTI_Token *token)
{
TXTI_TokenNode *node = push_array(arena, TXTI_TokenNode, 1);
MemoryCopyStruct(&node->v, token);
SLLQueuePush(list->first, list->last, node);
list->count += 1;
}
internal TXTI_TokenArray
txti_token_array_from_chunk_list(Arena *arena, TXTI_TokenChunkList *list)
{
TXTI_TokenArray array = {0};
array.count = list->token_count;
array.v = push_array_no_zero(arena, TXTI_Token, array.count);
U64 idx = 0;
for(TXTI_TokenChunkNode *n = list->first; n != 0; n = n->next)
{
MemoryCopy(array.v+idx, n->v, n->count*sizeof(TXTI_Token));
idx += n->count;
}
return array;
}
internal TXTI_TokenArray
txti_token_array_from_list(Arena *arena, TXTI_TokenList *list)
{
TXTI_TokenArray array = {0};
array.count = list->count;
array.v = push_array_no_zero(arena, TXTI_Token, array.count);
U64 idx = 0;
for(TXTI_TokenNode *n = list->first; n != 0; n = n->next)
{
MemoryCopyStruct(array.v+idx, &n->v);
idx += 1;
}
return array;
}
////////////////////////////////
//~ rjf: Lexing Functions
internal TXTI_TokenArray
txti_token_array_from_string__cpp(Arena *arena, U64 *bytes_processed_counter, String8 string)
{
Temp scratch = scratch_begin(&arena, 1);
//- rjf: generate token list
TXTI_TokenChunkList tokens = {0};
{
B32 comment_is_single_line = 0;
B32 string_is_char = 0;
TXTI_TokenKind active_token_kind = TXTI_TokenKind_Null;
U64 active_token_start_idx = 0;
B32 escaped = 0;
B32 next_escaped = 0;
U64 byte_process_start_idx = 0;
for(U64 idx = 0; idx <= string.size;)
{
U8 byte = (idx+0 < string.size) ? (string.str[idx+0]) : 0;
U8 next_byte = (idx+1 < string.size) ? (string.str[idx+1]) : 0;
// rjf: update counter
if(bytes_processed_counter != 0 && ((idx-byte_process_start_idx) >= 1000 || idx == string.size))
{
ins_atomic_u64_add_eval(bytes_processed_counter, (idx-byte_process_start_idx));
byte_process_start_idx = idx;
}
// rjf: escaping
if(escaped && (byte != '\r' && byte != '\n'))
{
next_escaped = 0;
}
else if(!escaped && byte == '\\')
{
next_escaped = 1;
}
// rjf: take starter, determine active token kind
if(active_token_kind == TXTI_TokenKind_Null)
{
// rjf: use next bytes to start a new token
if(0){}
else if(char_is_space(byte)) { active_token_kind = TXTI_TokenKind_Whitespace; }
else if(byte == '_' ||
byte == '$' ||
char_is_alpha(byte)) { active_token_kind = TXTI_TokenKind_Identifier; }
else if(char_is_digit(byte, 10) ||
(byte == '.' &&
char_is_digit(next_byte, 10))) { active_token_kind = TXTI_TokenKind_Numeric; }
else if(byte == '"') { active_token_kind = TXTI_TokenKind_String; string_is_char = 0; }
else if(byte == '\'') { active_token_kind = TXTI_TokenKind_String; string_is_char = 1; }
else if(byte == '/' && next_byte == '/') { active_token_kind = TXTI_TokenKind_Comment; comment_is_single_line = 1; }
else if(byte == '/' && next_byte == '*') { active_token_kind = TXTI_TokenKind_Comment; comment_is_single_line = 0; }
else if(byte == '~' || byte == '!' ||
byte == '%' || byte == '^' ||
byte == '&' || byte == '*' ||
byte == '(' || byte == ')' ||
byte == '-' || byte == '=' ||
byte == '+' || byte == '[' ||
byte == ']' || byte == '{' ||
byte == '}' || byte == ':' ||
byte == ';' || byte == ',' ||
byte == '.' || byte == '<' ||
byte == '>' || byte == '/' ||
byte == '?' || byte == '|') { active_token_kind = TXTI_TokenKind_Symbol; }
else if(byte == '#') { active_token_kind = TXTI_TokenKind_Meta; }
// rjf: start new token
if(active_token_kind != TXTI_TokenKind_Null)
{
active_token_start_idx = idx;
}
// rjf: invalid token kind -> emit error
else
{
TXTI_Token token = {TXTI_TokenKind_Error, r1u64(idx, idx+1)};
txti_token_chunk_list_push(scratch.arena, &tokens, 4096, &token);
}
}
// rjf: look for ender
U64 ender_pad = 0;
B32 ender_found = 0;
if(active_token_kind != TXTI_TokenKind_Null && idx>active_token_start_idx)
{
if(idx == string.size)
{
ender_pad = 0;
ender_found = 1;
}
else switch(active_token_kind)
{
default:break;
case TXTI_TokenKind_Whitespace:
{
ender_found = !char_is_space(byte);
}break;
case TXTI_TokenKind_Identifier:
{
ender_found = (!char_is_alpha(byte) && !char_is_digit(byte, 10) && byte != '_' && byte != '$');
}break;
case TXTI_TokenKind_Numeric:
{
ender_found = (!char_is_alpha(byte) && !char_is_digit(byte, 10) && byte != '_' && byte != '.');
}break;
case TXTI_TokenKind_String:
{
ender_found = (!escaped && ((!string_is_char && byte == '"') || (string_is_char && byte == '\'')));
ender_pad += 1;
}break;
case TXTI_TokenKind_Symbol:
{
ender_found = (byte != '~' && byte != '!' &&
byte != '%' && byte != '^' &&
byte != '&' && byte != '*' &&
byte != '(' && byte != ')' &&
byte != '-' && byte != '=' &&
byte != '+' && byte != '[' &&
byte != ']' && byte != '{' &&
byte != '}' && byte != ':' &&
byte != ';' && byte != ',' &&
byte != '.' && byte != '<' &&
byte != '>' && byte != '/' &&
byte != '?' && byte != '|');
}break;
case TXTI_TokenKind_Comment:
{
if(comment_is_single_line)
{
ender_found = (!escaped && (byte == '\r' || byte == '\n'));
}
else
{
ender_found = (active_token_start_idx+1 < idx && byte == '*' && next_byte == '/');
ender_pad += 2;
}
}break;
case TXTI_TokenKind_Meta:
{
ender_found = (!escaped && (byte == '\r' || byte == '\n'));
}break;
}
}
// rjf: next byte is ender => emit token
if(ender_found)
{
TXTI_Token token = {active_token_kind, r1u64(active_token_start_idx, idx+ender_pad)};
active_token_kind = TXTI_TokenKind_Null;
// rjf: identifier -> keyword in special cases
if(token.kind == TXTI_TokenKind_Identifier)
{
read_only local_persist String8 cpp_keywords[] =
{
str8_lit_comp("alignas"),
str8_lit_comp("alignof"),
str8_lit_comp("and"),
str8_lit_comp("and_eq"),
str8_lit_comp("asm"),
str8_lit_comp("atomic_cancel"),
str8_lit_comp("atomic_commit"),
str8_lit_comp("atomic_noexcept"),
str8_lit_comp("auto"),
str8_lit_comp("bitand"),
str8_lit_comp("bitor"),
str8_lit_comp("bool"),
str8_lit_comp("break"),
str8_lit_comp("case"),
str8_lit_comp("catch"),
str8_lit_comp("char"),
str8_lit_comp("char8_t"),
str8_lit_comp("char16_t"),
str8_lit_comp("char32_t"),
str8_lit_comp("class"),
str8_lit_comp("compl"),
str8_lit_comp("concept"),
str8_lit_comp("const"),
str8_lit_comp("consteval"),
str8_lit_comp("constexpr"),
str8_lit_comp("constinit"),
str8_lit_comp("const_cast"),
str8_lit_comp("continue"),
str8_lit_comp("co_await"),
str8_lit_comp("co_return"),
str8_lit_comp("co_yield"),
str8_lit_comp("decltype"),
str8_lit_comp("default"),
str8_lit_comp("delete"),
str8_lit_comp("do"),
str8_lit_comp("double"),
str8_lit_comp("dynamic_cast"),
str8_lit_comp("else"),
str8_lit_comp("enum"),
str8_lit_comp("explicit"),
str8_lit_comp("export"),
str8_lit_comp("extern"),
str8_lit_comp("false"),
str8_lit_comp("float"),
str8_lit_comp("for"),
str8_lit_comp("friend"),
str8_lit_comp("goto"),
str8_lit_comp("if"),
str8_lit_comp("inline"),
str8_lit_comp("int"),
str8_lit_comp("long"),
str8_lit_comp("mutable"),
str8_lit_comp("namespace"),
str8_lit_comp("new"),
str8_lit_comp("noexcept"),
str8_lit_comp("not"),
str8_lit_comp("not_eq"),
str8_lit_comp("nullptr"),
str8_lit_comp("operator"),
str8_lit_comp("or"),
str8_lit_comp("or_eq"),
str8_lit_comp("private"),
str8_lit_comp("protected"),
str8_lit_comp("public"),
str8_lit_comp("reflexpr"),
str8_lit_comp("register"),
str8_lit_comp("reinterpret_cast"),
str8_lit_comp("requires"),
str8_lit_comp("return"),
str8_lit_comp("short"),
str8_lit_comp("signed"),
str8_lit_comp("sizeof"),
str8_lit_comp("static"),
str8_lit_comp("static_assert"),
str8_lit_comp("static_cast"),
str8_lit_comp("struct"),
str8_lit_comp("switch"),
str8_lit_comp("synchronized"),
str8_lit_comp("template"),
str8_lit_comp("this"),
str8_lit_comp("thread_local"),
str8_lit_comp("throw"),
str8_lit_comp("true"),
str8_lit_comp("try"),
str8_lit_comp("typedef"),
str8_lit_comp("typeid"),
str8_lit_comp("typename"),
str8_lit_comp("union"),
str8_lit_comp("unsigned"),
str8_lit_comp("using"),
str8_lit_comp("virtual"),
str8_lit_comp("void"),
str8_lit_comp("volatile"),
str8_lit_comp("wchar_t"),
str8_lit_comp("while"),
str8_lit_comp("xor"),
str8_lit_comp("xor_eq"),
};
String8 token_string = str8_substr(string, r1u64(active_token_start_idx, idx+ender_pad));
for(U64 keyword_idx = 0; keyword_idx < ArrayCount(cpp_keywords); keyword_idx += 1)
{
if(str8_match(cpp_keywords[keyword_idx], token_string, 0))
{
token.kind = TXTI_TokenKind_Keyword;
break;
}
}
}
// rjf: push
txti_token_chunk_list_push(scratch.arena, &tokens, 4096, &token);
// rjf: increment by ender padding
idx += ender_pad;
}
// rjf: advance by 1 byte if we haven't found an ender
if(!ender_found)
{
idx += 1;
}
escaped = next_escaped;
}
}
//- rjf: token list -> token array
TXTI_TokenArray result = txti_token_array_from_chunk_list(arena, &tokens);
scratch_end(scratch);
return result;
}
////////////////////////////////
//~ rjf: Message Type Functions
@@ -607,7 +258,7 @@ txti_slice_from_handle_line_range(Arena *arena, TXTI_Handle handle, Rng1S64 line
result.line_count = (U64)dim_1s64(line_range_clamped)+1;
result.line_text = push_array(arena, String8, result.line_count);
result.line_ranges = push_array(arena, Rng1U64, result.line_count);
result.line_tokens = push_array(arena, TXTI_TokenArray, result.line_count);
result.line_tokens = push_array(arena, TXT_TokenArray, result.line_count);
// rjf: fill line ranges & text
U64 line_slice_idx = 0;
@@ -626,7 +277,7 @@ txti_slice_from_handle_line_range(Arena *arena, TXTI_Handle handle, Rng1S64 line
}
// rjf: binary search to find first token
TXTI_Token *tokens_first = 0;
TXT_Token *tokens_first = 0;
ProfScope("binary search to find first token")
{
Rng1U64 slice_range = r1u64(result.line_ranges[0].min, result.line_ranges[result.line_count-1].max);
@@ -639,7 +290,7 @@ txti_slice_from_handle_line_range(Arena *arena, TXTI_Handle handle, Rng1S64 line
{
break;
}
TXTI_Token *mid_token = &buffer->tokens.v[mid_idx];
TXT_Token *mid_token = &buffer->tokens.v[mid_idx];
if(mid_token->range.min > slice_range.max)
{
opl_idx = mid_idx;
@@ -661,18 +312,18 @@ txti_slice_from_handle_line_range(Arena *arena, TXTI_Handle handle, Rng1S64 line
}
// rjf: grab per-line tokens
TXTI_TokenList *line_tokens_lists = push_array(scratch.arena, TXTI_TokenList, result.line_count);
TXT_TokenList *line_tokens_lists = push_array(scratch.arena, TXT_TokenList, result.line_count);
if(tokens_first != 0) ProfScope("grab per-line tokens")
{
TXTI_Token *tokens_opl = buffer->tokens.v+buffer->tokens.count;
TXT_Token *tokens_opl = buffer->tokens.v+buffer->tokens.count;
U64 line_slice_idx = 0;
for(TXTI_Token *token = tokens_first; token < tokens_opl && line_slice_idx < result.line_count;)
for(TXT_Token *token = tokens_first; token < tokens_opl && line_slice_idx < result.line_count;)
{
if(token->range.min < result.line_ranges[line_slice_idx].max)
{
if(token->range.max > result.line_ranges[line_slice_idx].min)
{
txti_token_list_push(scratch.arena, &line_tokens_lists[line_slice_idx], token);
txt_token_list_push(scratch.arena, &line_tokens_lists[line_slice_idx], token);
}
B32 need_token_advance = 0;
B32 need_line_advance = 0;
@@ -697,7 +348,7 @@ txti_slice_from_handle_line_range(Arena *arena, TXTI_Handle handle, Rng1S64 line
// rjf: bake per-line tokens to arrays
for(U64 line_slice_idx = 0; line_slice_idx < result.line_count; line_slice_idx += 1)
{
result.line_tokens[line_slice_idx] = txti_token_array_from_list(arena, &line_tokens_lists[line_slice_idx]);
result.line_tokens[line_slice_idx] = txt_token_array_from_list(arena, &line_tokens_lists[line_slice_idx]);
}
}
}
@@ -733,8 +384,8 @@ txti_string_from_handle_txt_rng(Arena *arena, TXTI_Handle handle, TxtRng range)
switch(info.line_end_kind)
{
default:
case TXTI_LineEndKind_LF:{join.sep = str8_lit("\n");}break;
case TXTI_LineEndKind_CRLF:{join.sep = str8_lit("\r\n");}break;
case TXT_LineEndKind_LF:{join.sep = str8_lit("\n");}break;
case TXT_LineEndKind_CRLF:{join.sep = str8_lit("\r\n");}break;
}
result = str8_list_join(arena, &line_strings, &join);
}
@@ -755,18 +406,18 @@ txti_string_from_handle_line_num(Arena *arena, TXTI_Handle handle, S64 line_num)
}
internal Rng1U64
txti_expr_range_from_line_off_range_string_tokens(U64 off, Rng1U64 line_range, String8 line_text, TXTI_TokenArray *line_tokens)
txti_expr_range_from_line_off_range_string_tokens(U64 off, Rng1U64 line_range, String8 line_text, TXT_TokenArray *line_tokens)
{
Rng1U64 result = {0};
Temp scratch = scratch_begin(0, 0);
{
// rjf: unpack line info
TXTI_Token *line_tokens_first = line_tokens->v;
TXTI_Token *line_tokens_opl = line_tokens->v+line_tokens->count;
TXT_Token *line_tokens_first = line_tokens->v;
TXT_Token *line_tokens_opl = line_tokens->v+line_tokens->count;
// rjf: find token containing `off`
TXTI_Token *pt_token = 0;
for(TXTI_Token *token = line_tokens_first;
TXT_Token *pt_token = 0;
for(TXT_Token *token = line_tokens_first;
token < line_tokens_opl;
token += 1)
{
@@ -778,14 +429,14 @@ txti_expr_range_from_line_off_range_string_tokens(U64 off, Rng1U64 line_range, S
switch(token->kind)
{
default:{}break;
case TXTI_TokenKind_Symbol:
case TXT_TokenKind_Symbol:
{
token_ender = (str8_match(token_string, str8_lit("]"), 0));
}break;
case TXTI_TokenKind_Identifier:
case TXTI_TokenKind_Keyword:
case TXTI_TokenKind_String:
case TXTI_TokenKind_Meta:
case TXT_TokenKind_Identifier:
case TXT_TokenKind_Keyword:
case TXT_TokenKind_String:
case TXT_TokenKind_Meta:
{
token_ender = 1;
}break;
@@ -809,7 +460,7 @@ txti_expr_range_from_line_off_range_string_tokens(U64 off, Rng1U64 line_range, S
{
B32 walkback_done = 0;
S32 nest = 0;
for(TXTI_Token *wb_token = pt_token;
for(TXT_Token *wb_token = pt_token;
wb_token >= line_tokens_first && walkback_done == 0;
wb_token -= 1)
{
@@ -819,7 +470,7 @@ txti_expr_range_from_line_off_range_string_tokens(U64 off, Rng1U64 line_range, S
switch(wb_token->kind)
{
default:{}break;
case TXTI_TokenKind_Symbol:
case TXT_TokenKind_Symbol:
{
B32 is_scope_resolution = str8_match(wb_token_string, str8_lit("::"), 0);
B32 is_dot = str8_match(wb_token_string, str8_lit("."), 0);
@@ -837,7 +488,7 @@ txti_expr_range_from_line_off_range_string_tokens(U64 off, Rng1U64 line_range, S
include_wb_token = 1;
}
}break;
case TXTI_TokenKind_Identifier:
case TXT_TokenKind_Identifier:
{
include_wb_token = 1;
}break;
@@ -868,9 +519,9 @@ txti_expr_range_from_handle_pt(TXTI_Handle handle, TxtPt pt)
// rjf: unpack line info
String8 line_text = slice.line_text[0];
Rng1U64 line_range = slice.line_ranges[0];
TXTI_TokenArray line_tokens = slice.line_tokens[0];
TXTI_Token *line_tokens_first = line_tokens.v;
TXTI_Token *line_tokens_opl = line_tokens.v+line_tokens.count;
TXT_TokenArray line_tokens = slice.line_tokens[0];
TXT_Token *line_tokens_first = line_tokens.v;
TXT_Token *line_tokens_opl = line_tokens.v+line_tokens.count;
U64 pt_off = line_range.min + (pt.column-1);
// rjf: grab offset range of expression
@@ -1002,12 +653,12 @@ txti_mut_thread_entry_point(void *p)
case TXTI_LangKind_C:
case TXTI_LangKind_CPlusPlus:
{
lex_function = txti_token_array_from_string__cpp;
lex_function = txt_token_array_from_string__c_cpp;
}break;
}
//- rjf: detect line end kind
TXTI_LineEndKind line_end_kind = TXTI_LineEndKind_Null;
TXT_LineEndKind line_end_kind = TXT_LineEndKind_Null;
if(load_valid)
{
U64 lf_count = 0;
@@ -1025,11 +676,11 @@ txti_mut_thread_entry_point(void *p)
}
if(cr_count >= lf_count/2 && lf_count >= 1)
{
line_end_kind = TXTI_LineEndKind_CRLF;
line_end_kind = TXT_LineEndKind_CRLF;
}
else if(lf_count >= 1)
{
line_end_kind = TXTI_LineEndKind_LF;
line_end_kind = TXT_LineEndKind_LF;
}
}
@@ -1084,7 +735,7 @@ txti_mut_thread_entry_point(void *p)
if(entity != 0)
{
entity->buffer_apply_gen += 1;
if(line_end_kind != TXTI_LineEndKind_Null)
if(line_end_kind != TXT_LineEndKind_Null)
{
entity->line_end_kind = line_end_kind;
}
+6 -101
View File
@@ -56,88 +56,6 @@ struct TXTI_Handle
U64 u64[2];
};
////////////////////////////////
//~ rjf: Parsed Text Info Types
typedef enum TXTI_LineEndKind
{
TXTI_LineEndKind_Null,
TXTI_LineEndKind_LF,
TXTI_LineEndKind_CRLF,
TXTI_LineEndKind_COUNT
}
TXTI_LineEndKind;
typedef enum TXTI_TokenKind
{
TXTI_TokenKind_Null,
TXTI_TokenKind_Error,
TXTI_TokenKind_Whitespace,
TXTI_TokenKind_Keyword,
TXTI_TokenKind_Identifier,
TXTI_TokenKind_Numeric,
TXTI_TokenKind_String,
TXTI_TokenKind_Symbol,
TXTI_TokenKind_Comment,
TXTI_TokenKind_Meta, // preprocessor, etc.
TXTI_TokenKind_COUNT
}
TXTI_TokenKind;
typedef struct TXTI_Token TXTI_Token;
struct TXTI_Token
{
TXTI_TokenKind kind;
Rng1U64 range;
};
typedef struct TXTI_TokenChunkNode TXTI_TokenChunkNode;
struct TXTI_TokenChunkNode
{
TXTI_TokenChunkNode *next;
U64 count;
U64 cap;
TXTI_Token *v;
};
typedef struct TXTI_TokenChunkList TXTI_TokenChunkList;
struct TXTI_TokenChunkList
{
TXTI_TokenChunkNode *first;
TXTI_TokenChunkNode *last;
U64 chunk_count;
U64 token_count;
};
typedef struct TXTI_TokenNode TXTI_TokenNode;
struct TXTI_TokenNode
{
TXTI_TokenNode *next;
TXTI_Token v;
};
typedef struct TXTI_TokenList TXTI_TokenList;
struct TXTI_TokenList
{
TXTI_TokenNode *first;
TXTI_TokenNode *last;
U64 count;
};
typedef struct TXTI_TokenArray TXTI_TokenArray;
struct TXTI_TokenArray
{
U64 count;
TXTI_Token *v;
};
typedef struct TXTI_TokenArrayArray TXTI_TokenArrayArray;
struct TXTI_TokenArrayArray
{
U64 count;
TXTI_TokenArray *v;
};
////////////////////////////////
//~ rjf: Language Kinds
@@ -150,7 +68,7 @@ typedef enum TXTI_LangKind
}
TXTI_LangKind;
typedef TXTI_TokenArray TXTI_LangLexFunctionType(Arena *arena, U64 *bytes_processed_counter, String8 string);
typedef TXT_TokenArray TXTI_LangLexFunctionType(Arena *arena, U64 *bytes_processed_counter, String8 string);
////////////////////////////////
//~ rjf: Buffer Entity Types
@@ -173,7 +91,7 @@ struct TXTI_Buffer
U64 lines_max_size;
// rjf: tokens
TXTI_TokenArray tokens;
TXT_TokenArray tokens;
};
typedef struct TXTI_Entity TXTI_Entity;
@@ -187,7 +105,7 @@ struct TXTI_Entity
U64 mut_gen;
// rjf: metadata
TXTI_LineEndKind line_end_kind;
TXT_LineEndKind line_end_kind;
TXTI_LangKind lang_kind;
U64 bytes_processed;
U64 bytes_to_process;
@@ -238,7 +156,7 @@ struct TXTI_BufferInfo
{
String8 path;
U64 timestamp;
TXTI_LineEndKind line_end_kind;
TXT_LineEndKind line_end_kind;
TXTI_LangKind lang_kind;
U64 total_line_count;
U64 last_line_size;
@@ -255,7 +173,7 @@ struct TXTI_Slice
U64 line_count;
String8 *line_text;
Rng1U64 *line_ranges;
TXTI_TokenArray *line_tokens;
TXT_TokenArray *line_tokens;
};
////////////////////////////////
@@ -342,19 +260,6 @@ internal void txti_init(void);
internal U64 txti_hash_from_string(String8 string);
internal TXTI_LangKind txti_lang_kind_from_extension(String8 extension);
////////////////////////////////
//~ rjf: Token Type Functions
internal void txti_token_chunk_list_push(Arena *arena, TXTI_TokenChunkList *list, U64 cap, TXTI_Token *token);
internal void txti_token_list_push(Arena *arena, TXTI_TokenList *list, TXTI_Token *token);
internal TXTI_TokenArray txti_token_array_from_chunk_list(Arena *arena, TXTI_TokenChunkList *list);
internal TXTI_TokenArray txti_token_array_from_list(Arena *arena, TXTI_TokenList *list);
////////////////////////////////
//~ rjf: Lexing Functions
internal TXTI_TokenArray txti_token_array_from_string__cpp(Arena *arena, U64 *bytes_processed_counter, String8 string);
////////////////////////////////
//~ rjf: Message Type Functions
@@ -373,7 +278,7 @@ internal TXTI_BufferInfo txti_buffer_info_from_handle(Arena *arena, TXTI_Handle
internal TXTI_Slice txti_slice_from_handle_line_range(Arena *arena, TXTI_Handle handle, Rng1S64 line_range);
internal String8 txti_string_from_handle_txt_rng(Arena *arena, TXTI_Handle handle, TxtRng range);
internal String8 txti_string_from_handle_line_num(Arena *arena, TXTI_Handle handle, S64 line_num);
internal Rng1U64 txti_expr_range_from_line_off_range_string_tokens(U64 off, Rng1U64 line_range, String8 line_text, TXTI_TokenArray *line_tokens);
internal Rng1U64 txti_expr_range_from_line_off_range_string_tokens(U64 off, Rng1U64 line_range, String8 line_text, TXT_TokenArray *line_tokens);
internal TxtRng txti_expr_range_from_handle_pt(TXTI_Handle handle, TxtPt pt);
//- rjf: buffer mutations