plug in x64 intel-syntax disassembly 'language' into text analysis cache layer, to use that layer to naturally do parser/highlighter work for disassembly

This commit is contained in:
Ryan Fleury
2024-03-28 12:01:14 -07:00
parent d3570e8147
commit bcbb4142cf
3 changed files with 179 additions and 4 deletions
+1 -1
View File
@@ -829,7 +829,7 @@ DF_GFX_VIEW_RULE_BLOCK_UI_FUNCTION_DEF(disasm)
{
U128 dasm_text_hash = hs_hash_from_key(dasm_info.text_key, rewind_idx);
dasm_text_data = hs_data_from_hash(hs_scope, dasm_text_hash);
dasm_text_info = txt_text_info_from_hash_lang(txt_scope, dasm_text_hash, TXT_LangKind_Null);
dasm_text_info = txt_text_info_from_hash_lang(txt_scope, dasm_text_hash, TXT_LangKind_DisasmX64Intel);
if(dasm_text_info.lines_count != 0)
{
break;
+176 -3
View File
@@ -40,9 +40,10 @@ txt_lex_function_from_lang_kind(TXT_LangKind kind)
switch(kind)
{
default:{}break;
case TXT_LangKind_C: {fn = txt_token_array_from_string__c_cpp;}break;
case TXT_LangKind_CPlusPlus: {fn = txt_token_array_from_string__c_cpp;}break;
case TXT_LangKind_Odin: {fn = txt_token_array_from_string__odin;}break;
case TXT_LangKind_C: {fn = txt_token_array_from_string__c_cpp;}break;
case TXT_LangKind_CPlusPlus: {fn = txt_token_array_from_string__c_cpp;}break;
case TXT_LangKind_Odin: {fn = txt_token_array_from_string__odin;}break;
case TXT_LangKind_DisasmX64Intel:{fn = txt_token_array_from_string__disasm_x64_intel;}break;
}
return fn;
}
@@ -624,6 +625,178 @@ txt_token_array_from_string__odin(Arena *arena, U64 *bytes_processed_counter, St
return result;
}
internal TXT_TokenArray
txt_token_array_from_string__disasm_x64_intel(Arena *arena, U64 *bytes_processed_counter, String8 string)
{
Temp scratch = scratch_begin(&arena, 1);
//- rjf: parse tokens
TXT_TokenChunkList tokens = {0};
{
TXT_TokenKind active_token_kind = TXT_TokenKind_Null;
U64 active_token_start_off = 0;
U64 off = 0;
B32 escaped = 0;
B32 string_is_char = 0;
for(U64 advance = 0; off <= string.size; off += advance)
{
U8 byte = (off+0 < string.size) ? string.str[off+0] : 0;
U8 next_byte = (off+1 < string.size) ? string.str[off+1] : 0;
B32 ender_found = 0;
advance = (active_token_kind != TXT_TokenKind_Null ? 1 : 0);
if(off == string.size && active_token_kind != TXT_TokenKind_Null)
{
ender_found = 1;
advance = 1;
}
switch(active_token_kind)
{
default:
case TXT_TokenKind_Null:
{
if(byte == ' ' || byte == '\t' || byte == '\v' || byte == '\f' || byte == '\r' || byte == '\n')
{
active_token_start_off = off;
active_token_kind = TXT_TokenKind_Whitespace;
advance = 1;
}
else if(('a' <= byte && byte <= 'z') || ('A' <= byte && byte <= 'Z') || byte == '_')
{
active_token_start_off = off;
active_token_kind = TXT_TokenKind_Identifier;
advance = 1;
}
else if(byte == '\'')
{
active_token_start_off = off;
active_token_kind = TXT_TokenKind_String;
advance = 1;
string_is_char = 1;
}
else if(byte == '"')
{
active_token_start_off = off;
active_token_kind = TXT_TokenKind_String;
advance = 1;
string_is_char = 0;
}
else if(('0' <= byte && byte <= '9') || (byte == '.' && '0' <= next_byte && next_byte <= '9'))
{
active_token_start_off = off;
active_token_kind = TXT_TokenKind_Numeric;
advance = 1;
}
else if(byte == '~' || byte == '!' || byte == '%' || byte == '^' ||
byte == '&' || byte == '*' || byte == '(' || byte == ')' ||
byte == '-' || byte == '=' || byte == '+' || byte == '[' ||
byte == ']' || byte == '{' || byte == '}' || byte == ';' ||
byte == ':' || byte == '?' || byte == '/' || byte == '<' ||
byte == '>' || byte == ',' || byte == '.')
{
active_token_start_off = off;
active_token_kind = TXT_TokenKind_Symbol;
advance = 1;
}
else
{
active_token_start_off = off;
active_token_kind = TXT_TokenKind_Error;
advance = 1;
}
}break;
case TXT_TokenKind_Whitespace:
if(byte != ' ' && byte != '\t' && byte != '\v' && byte != '\f')
{
ender_found = 1;
advance = 0;
}break;
case TXT_TokenKind_Identifier:
if((byte < 'a' || 'z' < byte) && (byte < 'A' || 'Z' < byte) && (byte < '0' || '9' < byte) && byte != '_')
{
ender_found = 1;
advance = 0;
}break;
case TXT_TokenKind_String:
{
U8 ender_byte = string_is_char ? '\'' : '"';
if(!escaped && byte == ender_byte)
{
ender_found = 1;
advance = 1;
}
else if(escaped)
{
escaped = 0;
advance = 1;
}
else if(byte == '\\')
{
escaped = 1;
advance = 1;
}
else
{
U8 byte_class = utf8_class[byte>>3];
if(byte_class > 1)
{
advance = (U64)byte_class;
}
}
}break;
case TXT_TokenKind_Numeric:
if((byte < 'a' || 'z' < byte) && (byte < 'A' || 'Z' < byte) && (byte < '0' || '9' < byte) && byte != '.')
{
ender_found = 1;
advance = 0;
}break;
case TXT_TokenKind_Symbol:
if(1)
{
// NOTE(rjf): avoiding maximum munch rule for now
ender_found = 1;
advance = 0;
}
else if(byte != '~' && byte != '!' && byte != '#' && byte != '%' &&
byte != '^' && byte != '&' && byte != '*' && byte != '(' &&
byte != ')' && byte != '-' && byte != '=' && byte != '+' &&
byte != '[' && byte != ']' && byte != '{' && byte != '}' &&
byte != ';' && byte != ':' && byte != '?' && byte != '/' &&
byte != '<' && byte != '>' && byte != ',' && byte != '.')
{
ender_found = 1;
advance = 0;
}break;
case TXT_TokenKind_Error:
{
ender_found = 1;
advance = 0;
}break;
}
if(ender_found != 0)
{
TXT_Token token = {active_token_kind, r1u64(active_token_start_off, off+advance)};
if(active_token_kind == TXT_TokenKind_Identifier)
{
String8 token_string = str8_substr(string, token.range);
B32 identifier_is_instruction = 0;
if(identifier_is_instruction)
{
token.kind = TXT_TokenKind_Keyword;
}
}
txt_token_chunk_list_push(arena, &tokens, 1024, &token);
active_token_kind = TXT_TokenKind_Null;
active_token_start_off = token.range.max;
}
}
}
//- rjf: token list -> token array
TXT_TokenArray result = txt_token_array_from_chunk_list(arena, &tokens);
scratch_end(scratch);
return result;
}
////////////////////////////////
//~ rjf: Main Layer Initialization
+2
View File
@@ -113,6 +113,7 @@ typedef enum TXT_LangKind
TXT_LangKind_C,
TXT_LangKind_CPlusPlus,
TXT_LangKind_Odin,
TXT_LangKind_DisasmX64Intel,
TXT_LangKind_COUNT
}
TXT_LangKind;
@@ -248,6 +249,7 @@ internal TXT_TokenArray txt_token_array_from_list(Arena *arena, TXT_TokenList *l
internal TXT_TokenArray txt_token_array_from_string__c_cpp(Arena *arena, U64 *bytes_processed_counter, String8 string);
internal TXT_TokenArray txt_token_array_from_string__odin(Arena *arena, U64 *bytes_processed_counter, String8 string);
internal TXT_TokenArray txt_token_array_from_string__disasm_x64_intel(Arena *arena, U64 *bytes_processed_counter, String8 string);
////////////////////////////////
//~ rjf: Main Layer Initialization