prepped lexer and parser for c-library generation

This commit is contained in:
Edward R. Gonzalez 2024-12-09 16:45:18 -05:00
parent e6f30c7e1d
commit e786d7c3b6
6 changed files with 500 additions and 364 deletions

View File

@ -456,6 +456,12 @@ do \
CodeBody header_parsing = def_body(CT_Global_Body);
for ( Code entry = parsed_header_parsing.begin(); entry != parsed_header_parsing.end(); ++ entry ) switch (entry->Type)
{
case CT_Preprocess_IfDef:
{
ignore_preprocess_cond_block(txt("GEN_INTELLISENSE_DIRECTIVES"), entry, parsed_header_strings, header_strings );
}
break;
case CT_Preprocess_Pragma:
{
if ( entry->Content.contains(txt("ADT")) )
@ -546,6 +552,12 @@ do \
CodeBody types = def_body(CT_Global_Body);
for ( Code entry = parsed_types.begin(); entry != parsed_types.end(); ++ entry ) switch(entry->Type)
{
case CT_Preprocess_IfDef:
{
ignore_preprocess_cond_block(txt("GEN_INTELLISENSE_DIRECTIVES"), entry, parsed_header_strings, header_strings );
}
break;
case CT_Using:
{
CodeUsing using_ver = cast(CodeUsing, entry);
@ -600,6 +612,12 @@ do \
CodeBody ast = def_body(CT_Global_Body);
for ( Code entry = parsed_ast.begin(); entry != parsed_ast.end(); ++ entry ) switch (entry->Type)
{
case CT_Preprocess_IfDef:
{
ignore_preprocess_cond_block(txt("GEN_INTELLISENSE_DIRECTIVES"), entry, parsed_header_strings, header_strings );
}
break;
case CT_Preprocess_If:
{
CodePreprocessCond cond = cast(CodePreprocessCond, entry);
@ -992,6 +1010,12 @@ R"(#define <interface_name>( code ) _Generic( (code), \
CodeBody inlines = def_body(CT_Global_Body);
for ( Code entry = parsed_inlines.begin(); entry != parsed_inlines.end(); ++ entry ) switch( entry->Type )
{
case CT_Preprocess_IfDef:
{
ignore_preprocess_cond_block(txt("GEN_INTELLISENSE_DIRECTIVES"), entry, parsed_header_strings, header_strings );
}
break;
case CT_Function:
{
// Were going to wrap usage of these procedures into generic selectors in code_types.hpp section,
@ -1019,6 +1043,12 @@ R"(#define <interface_name>( code ) _Generic( (code), \
CodeBody header_end = def_body(CT_Global_Body);
for ( Code entry = parsed_header_end.begin(); entry != parsed_header_end.end(); ++ entry, ++ idx ) switch( entry->Type )
{
case CT_Preprocess_IfDef:
{
ignore_preprocess_cond_block(txt("GEN_INTELLISENSE_DIRECTIVES"), entry, parsed_header_strings, header_strings );
}
break;
case CT_Variable:
{
CodeVar var = cast(CodeVar, entry);
@ -1058,13 +1088,14 @@ R"(#define <interface_name>( code ) _Generic( (code), \
#pragma region Resolve Components
CodeBody array_arena = gen_array(txt("Arena"), txt("Array_Arena"));
CodeBody array_pool = gen_array(txt("Pool"), txt("Array_Pool"));
CodeBody array_token = gen_array(txt("Token"), txt("Array_Token"));
Code src_static_data = scan_file( project_dir "components/static_data.cpp" );
Code src_ast_case_macros = scan_file( project_dir "components/ast_case_macros.cpp" );
Code src_code_serialization = scan_file( project_dir "components/code_serialization.cpp" );
Code src_interface = scan_file( project_dir "components/interface.cpp" );
Code src_lexer = scan_file( project_dir "components/lexer.cpp" );
Code src_parser = scan_file( project_dir "components/parser.cpp" );
// Code src_lexer = scan_file( project_dir "components/lexer.cpp" );
// Code src_parser = scan_file( project_dir "components/parser.cpp" );
Code src_parsing_interface = scan_file( project_dir "components/interface.parsing.cpp" );
Code src_untyped = scan_file( project_dir "components/interface.untyped.cpp" );
@ -1072,6 +1103,12 @@ R"(#define <interface_name>( code ) _Generic( (code), \
CodeBody src_ast = def_body(CT_Global_Body);
for ( Code entry = parsed_src_ast.begin(); entry != parsed_src_ast.end(); ++ entry ) switch( entry ->Type )
{
case CT_Preprocess_IfDef:
{
ignore_preprocess_cond_block(txt("GEN_INTELLISENSE_DIRECTIVES"), entry, parsed_header_strings, header_strings );
}
break;
case CT_Function:
{
// Were going to wrap usage of these procedures into generic selectors in code_types.hpp section,
@ -1098,6 +1135,12 @@ R"(#define <interface_name>( code ) _Generic( (code), \
CodeBody src_upfront = def_body(CT_Global_Body);
for ( Code entry = parsed_src_upfront.begin(); entry != parsed_src_upfront.end(); ++ entry ) switch( entry ->Type )
{
case CT_Preprocess_IfDef:
{
ignore_preprocess_cond_block(txt("GEN_INTELLISENSE_DIRECTIVES"), entry, parsed_header_strings, header_strings );
}
break;
case CT_Enum: {
convert_cpp_enum_to_c(cast(CodeEnum, entry), src_upfront);
}
@ -1137,6 +1180,97 @@ R"(#define <interface_name>( code ) _Generic( (code), \
src_upfront.append(entry);
break;
}
CodeBody parsed_src_lexer = parse_file( project_dir "components/lexer.cpp" );
CodeBody src_lexer = def_body(CT_Global_Body);
for ( Code entry = parsed_src_ast.begin(); entry != parsed_src_ast.end(); ++ entry ) switch( entry ->Type )
{
case CT_Preprocess_IfDef:
{
ignore_preprocess_cond_block(txt("GEN_INTELLISENSE_DIRECTIVES"), entry, parsed_header_strings, header_strings );
}
break;
CT_Enum:
{
if (entry->Name.Len)
{
convert_cpp_enum_to_c()
}
}
break;
CT_Struct:
{
if ( entry->Name.is_equal(txt("Token")))
{
// Add struct Token forward and typedef early.
CodeStruct token_fwd = parse_struct(code( struct Token; ));
CodeTypedef token_typedef = parse_typedef(code( typedef struct Token Token; ));
header_parsing.append(token_fwd);
header_parsing.append(token_typedef);
// Skip typedef since we added it
b32 continue_for = true;
for (Code array_entry = array_token.begin(); continue_for && array_entry != array_token.end(); ++ array_entry) switch (array_entry->Type)
{
case CT_Typedef:
{
// pop the array entry
array_token->NumEntries -= 1;
Code next = array_entry->Next;
Code prev = array_entry->Prev;
next->Prev = array_entry->Prev;
prev->Next = next;
if ( array_token->Front == array_entry )
array_token->Front = next;
src_lexer.append(array_entry);
continue_for = false;
}
break;
}
// Append the struct
src_lexer.append(entry);
// Append the token array
src_lexer.append(array_token);
continue;
}
CodeTypedef struct_tdef = parse_typedef(token_fmt("name", entry->Name, stringize( typedef struct <name> <name>; )))
src_lexer.append(entry);
src_lexer.append(struct_tdef);
}
break;
default:
src_lexer.append(entry);
break;
}
CodeBody parsed_src_parser = parse_file( project_dir "components/parser.cpp" );
CodeBody src_parser = def_body(CT_Global_Body);
for ( Code entry = parsed_src_parser.begin(); entry != parsed_src_parser.end(); ++ entry ) switch( entry ->Type )
{
case CT_Preprocess_IfDef:
{
ignore_preprocess_cond_block(txt("GEN_INTELLISENSE_DIRECTIVES"), entry, parsed_header_strings, header_strings );
}
break;
case CT_Struct:
{
CodeTypedef tdef = parse_typedef(token_fmt("name", entry->Name, stringize( typedef struct <name> <name>; )));
header_memory.append(entry);
header_memory.append(tdef);
}
default:
src_parser.append(entry);
break;
}
#pragma endregion Resolve Components
// THERE SHOULD BE NO NEW GENERIC CONTAINER DEFINTIONS PAST THIS POINT (It will not have slots for the generic selection generated macros)
@ -1285,7 +1419,7 @@ R"(#define <interface_name>( code ) _Generic( (code), \
header.print( format_code_to_untyped(src_upfront) );
header.print_fmt( "\n#pragma region Parsing\n\n" );
header.print( format_code_to_untyped(etoktype) );
// header.print( lexer );
header.print( lexer );
// header.print( parser );
// header.print( parsing_interface );
header.print_fmt( "\n#pragma endregion Parsing\n" );

View File

@ -246,9 +246,7 @@ int gen_main()
CodeBody etoktype = gen_etoktype( "enums/ETokType.csv", "enums/AttributeTokens.csv" );
//CodeNS nspaced_etoktype = def_namespace( name(parser), def_namespace_body( args(etoktype)) );
CodeBody nspaced_etoktype = def_global_body( args(
untyped_str(txt("GEN_NS_PARSER_BEGIN\n")),
etoktype,
untyped_str(txt("GEN_NS_PARSER_END\n"))
etoktype
));
Builder

View File

@ -6,6 +6,7 @@
// This file was generated automatially by gencpp's bootstrap.cpp (See: https://github.com/Ed94/gencpp)
GEN_NS_PARSER_BEGIN
#define GEN_DEFINE_ATTRIBUTE_TOKENS Entry( Tok_Attribute_API_Export, "GEN_API_Export_Code" ) Entry( Tok_Attribute_API_Import, "GEN_API_Import_Code" )
enum TokType : u32
@ -112,7 +113,7 @@ enum TokType : u32
inline StrC toktype_to_str( TokType type )
{
local_persist StrC lookup[] {
local_persist StrC lookup[] = {
{ sizeof( "__invalid__" ), "__invalid__" },
{ sizeof( "private" ), "private" },
{ sizeof( "protected" ), "protected" },

View File

@ -12,7 +12,7 @@ CodeClass parse_class( StrC def )
{
GEN_USING_NS_PARSER;
check_parse_args( def );
TokArray toks = lex( def );
if ( toks.Arr == nullptr )
return InvalidCode;
@ -20,7 +20,7 @@ CodeClass parse_class( StrC def )
Context.Tokens = toks;
push_scope();
CodeClass result = (CodeClass) parse_class_struct( Tok_Decl_Class );
pop(& Context);
parser_pop(& Context);
return result;
}
@ -39,9 +39,9 @@ CodeConstructor parse_constructor( StrC def )
Specifier specs_found[ 16 ] { Spec_NumSpecifiers };
s32 NumSpecifiers = 0;
while ( left && is_specifier(currtok) )
while ( left && tok_is_specifier(currtok) )
{
Specifier spec = strc_to_specifier( to_str(currtok) );
Specifier spec = strc_to_specifier( tok_to_str(currtok) );
b32 ignore_spec = false;
@ -59,8 +59,8 @@ CodeConstructor parse_constructor( StrC def )
break;
default :
log_failure( "Invalid specifier %s for variable\n%s", spec_to_str( spec ), to_string(Context) );
pop(& Context);
log_failure( "Invalid specifier %s for variable\n%s", spec_to_str( spec ), parser_to_string(Context) );
parser_pop(& Context);
return InvalidCode;
}
@ -109,7 +109,7 @@ CodeEnum parse_enum( StrC def )
TokArray toks = lex( def );
if ( toks.Arr == nullptr )
{
pop(& Context);
parser_pop(& Context);
return InvalidCode;
}
@ -181,7 +181,7 @@ CodeBody parse_global_body( StrC def )
Context.Tokens = toks;
push_scope();
CodeBody result = parse_global_nspace( CT_Global_Body );
pop(& Context);
parser_pop(& Context);
return result;
}
@ -236,7 +236,7 @@ CodeStruct parse_struct( StrC def )
Context.Tokens = toks;
push_scope();
CodeStruct result = (CodeStruct) parse_class_struct( Tok_Decl_Struct );
pop(& Context);
parser_pop(& Context);
return result;
}

View File

@ -35,62 +35,62 @@ struct Token
constexpr Token NullToken { nullptr, 0, Tok_Invalid, false, 0, TF_Null };
AccessSpec to_access_specifier(Token tok)
AccessSpec tok_to_access_specifier(Token tok)
{
return scast(AccessSpec, tok.Type);
}
StrC to_str(Token tok)
StrC tok_to_str(Token tok)
{
return { tok.Length, tok.Text };
}
bool is_valid( Token tok )
bool tok_is_valid( Token tok )
{
return tok.Text && tok.Length && tok.Type != Tok_Invalid;
}
bool is_access_operator(Token tok)
bool tok_is_access_operator(Token tok)
{
return bitfield_is_equal( u32, tok.Flags, TF_AccessOperator );
}
bool is_access_specifier(Token tok)
bool tok_is_access_specifier(Token tok)
{
return bitfield_is_equal( u32, tok.Flags, TF_AccessSpecifier );
}
bool is_attribute(Token tok)
bool tok_is_attribute(Token tok)
{
return bitfield_is_equal( u32, tok.Flags, TF_Attribute );
}
bool is_operator(Token tok)
bool tok_is_operator(Token tok)
{
return bitfield_is_equal( u32, tok.Flags, TF_Operator );
}
bool is_preprocessor(Token tok)
bool tok_is_preprocessor(Token tok)
{
return bitfield_is_equal( u32, tok.Flags, TF_Preprocess );
}
bool is_preprocess_cond(Token tok)
bool tok_is_preprocess_cond(Token tok)
{
return bitfield_is_equal( u32, tok.Flags, TF_Preprocess_Cond );
}
bool is_specifier(Token tok)
bool tok_is_specifier(Token tok)
{
return bitfield_is_equal( u32, tok.Flags, TF_Specifier );
}
bool is_end_definition(Token tok)
bool tok_is_end_definition(Token tok)
{
return bitfield_is_equal( u32, tok.Flags, TF_EndDefinition );
}
String to_string(Token tok)
String tok_to_string(Token tok)
{
String result = string_make_reserve( GlobalAllocator, kilobytes(4) );
@ -111,9 +111,9 @@ struct TokArray
s32 Idx;
};
bool __eat( TokType type );
bool lex__eat( TokType type );
Token* current(TokArray* self, bool skip_formatting )
Token* lex_current(TokArray* self, bool skip_formatting )
{
if ( skip_formatting )
{
@ -124,7 +124,7 @@ Token* current(TokArray* self, bool skip_formatting )
return & self->Arr[self->Idx];
}
Token* peek(TokArray self, bool skip_formatting)
Token* lex_peek(TokArray self, bool skip_formatting)
{
s32 idx = self.Idx;
@ -139,7 +139,7 @@ Token* peek(TokArray self, bool skip_formatting)
return & self.Arr[idx];
}
Token* previous(TokArray self, bool skip_formatting)
Token* lex_previous(TokArray self, bool skip_formatting)
{
s32 idx = self.Idx;
@ -154,7 +154,7 @@ Token* previous(TokArray self, bool skip_formatting)
return & self.Arr[idx - 1];
}
Token* next(TokArray self, bool skip_formatting)
Token* lex_next(TokArray self, bool skip_formatting)
{
s32 idx = self.Idx;
@ -169,9 +169,9 @@ Token* next(TokArray self, bool skip_formatting)
return & self.Arr[idx + 1];
}
global Arena_256KB defines_map_arena;
global HashTable(StrC) defines;
global Array(Token) Tokens;
global Arena_256KB Lexer_defines_map_arena;
global HashTable(StrC) Lexer_defines;
global Array(Token) Lexer_Tokens;
#define current ( * ctx->scanner )
@ -190,7 +190,7 @@ global Array(Token) Tokens;
ctx->scanner++; \
}
#define SkipWhitespace() \
#define skip_whitespace() \
while ( ctx->left && char_is_space( current ) ) \
{ \
move_forward(); \
@ -237,10 +237,10 @@ s32 lex_preprocessor_directive( LexContext* ctx )
{
char const* hash = ctx->scanner;
Token hash_tok = { hash, 1, Tok_Preprocess_Hash, ctx->line, ctx->column, TF_Preprocess };
array_append( Tokens, hash_tok );
array_append( Lexer_Tokens, hash_tok );
move_forward();
SkipWhitespace();
skip_whitespace();
ctx->token.Text = ctx->scanner;
while (ctx->left && ! char_is_space(current) )
@ -249,7 +249,7 @@ s32 lex_preprocessor_directive( LexContext* ctx )
ctx->token.Length++;
}
ctx->token.Type = strc_to_toktype( to_str(ctx->token) );
ctx->token.Type = strc_to_toktype( tok_to_str(ctx->token) );
bool is_preprocessor = ctx->token.Type >= Tok_Preprocess_Define && ctx->token.Type <= Tok_Preprocess_Pragma;
if ( ! is_preprocessor )
@ -313,14 +313,14 @@ s32 lex_preprocessor_directive( LexContext* ctx )
ctx->token.Length = ctx->token.Length + ctx->token.Text - hash;
ctx->token.Text = hash;
array_append( Tokens, ctx->token );
array_append( Lexer_Tokens, ctx->token );
return Lex_Continue; // Skip found token, its all handled here.
}
if ( ctx->token.Type == Tok_Preprocess_Else || ctx->token.Type == Tok_Preprocess_EndIf )
{
ctx->token.Flags |= TF_Preprocess_Cond;
array_append( Tokens, ctx->token );
array_append( Lexer_Tokens, ctx->token );
end_line();
return Lex_Continue;
}
@ -329,9 +329,9 @@ s32 lex_preprocessor_directive( LexContext* ctx )
ctx->token.Flags |= TF_Preprocess_Cond;
}
array_append( Tokens, ctx->token );
array_append( Lexer_Tokens, ctx->token );
SkipWhitespace();
skip_whitespace();
if ( ctx->token.Type == Tok_Preprocess_Define )
{
@ -353,10 +353,10 @@ s32 lex_preprocessor_directive( LexContext* ctx )
name.Length++;
}
array_append( Tokens, name );
array_append( Lexer_Tokens, name );
u64 key = crc32( name.Text, name.Length );
hashtable_set(ctx->defines, key, to_str(name) );
hashtable_set(ctx->defines, key, tok_to_str(name) );
}
Token preprocess_content = { ctx->scanner, 0, Tok_Preprocess_Content, ctx->line, ctx->column, TF_Preprocess };
@ -399,7 +399,7 @@ s32 lex_preprocessor_directive( LexContext* ctx )
move_forward();
}
array_append( Tokens, preprocess_content );
array_append( Lexer_Tokens, preprocess_content );
return Lex_Continue; // Skip found token, its all handled here.
}
@ -462,7 +462,7 @@ s32 lex_preprocessor_directive( LexContext* ctx )
preprocess_content.Length++;
}
array_append( Tokens, preprocess_content );
array_append( Lexer_Tokens, preprocess_content );
return Lex_Continue; // Skip found token, its all handled here.
}
@ -471,11 +471,11 @@ void lex_found_token( LexContext* ctx )
{
if ( ctx->token.Type != Tok_Invalid )
{
array_append( Tokens, ctx->token );
array_append( Lexer_Tokens, ctx->token );
return;
}
TokType type = strc_to_toktype( to_str(ctx->token) );
TokType type = strc_to_toktype( tok_to_str(ctx->token) );
if (type <= Tok_Access_Public && type >= Tok_Access_Private )
{
@ -489,7 +489,7 @@ void lex_found_token( LexContext* ctx )
if ( type == Tok_Decl_Extern_Linkage )
{
SkipWhitespace();
skip_whitespace();
if ( current != '"' )
{
@ -498,7 +498,7 @@ void lex_found_token( LexContext* ctx )
}
ctx->token.Type = type;
array_append( Tokens, ctx->token );
array_append( Lexer_Tokens, ctx->token );
return;
}
@ -508,7 +508,7 @@ void lex_found_token( LexContext* ctx )
{
ctx->token.Type = type;
ctx->token.Flags |= TF_Specifier;
array_append( Tokens, ctx->token );
array_append( Lexer_Tokens, ctx->token );
return;
}
@ -516,7 +516,7 @@ void lex_found_token( LexContext* ctx )
if ( type != Tok_Invalid )
{
ctx->token.Type = type;
array_append( Tokens, ctx->token );
array_append( Lexer_Tokens, ctx->token );
return;
}
@ -570,7 +570,7 @@ void lex_found_token( LexContext* ctx )
ctx->token.Type = Tok_Identifier;
}
array_append( Tokens, ctx->token );
array_append( Lexer_Tokens, ctx->token );
}
neverinline
@ -581,7 +581,7 @@ TokArray lex( StrC content )
c.content = content;
c.left = content.Len;
c.scanner = content.Ptr;
c.defines = defines;
c.defines = Lexer_defines;
char const* word = c.scanner;
s32 word_length = 0;
@ -589,7 +589,7 @@ TokArray lex( StrC content )
c.line = 1;
c.column = 1;
SkipWhitespace();
skip_whitespace();
if ( c.left <= 0 )
{
log_failure( "gen::lex: no tokens found (only whitespace provided)" );
@ -614,7 +614,7 @@ TokArray lex( StrC content )
hashtable_set(c.defines, key, (StrC) * entry );
}
array_clear(Tokens);
array_clear(Lexer_Tokens);
while (c.left )
{
@ -644,14 +644,14 @@ TokArray lex( StrC content )
c.token.Type = Tok_NewLine;
c.token.Length++;
array_append( Tokens, c.token );
array_append( Lexer_Tokens, c.token );
continue;
}
}
c.token.Length = 0;
SkipWhitespace();
skip_whitespace();
if ( c.left <= 0 )
break;
@ -680,7 +680,7 @@ TokArray lex( StrC content )
c.token.Length++;
move_forward();
array_append( Tokens, c.token );
array_append( Lexer_Tokens, c.token );
}
}
@ -1135,7 +1135,7 @@ TokArray lex( StrC content )
move_forward();
c.token.Length++;
}
array_append( Tokens, c.token );
array_append( Lexer_Tokens, c.token );
continue;
}
else if ( current == '*' )
@ -1171,7 +1171,7 @@ TokArray lex( StrC content )
move_forward();
c.token.Length++;
}
array_append( Tokens, c.token );
array_append( Lexer_Tokens, c.token );
// end_line();
continue;
}
@ -1264,14 +1264,14 @@ TokArray lex( StrC content )
}
else
{
s32 start = max( 0, array_num(Tokens) - 100 );
s32 start = max( 0, array_num(Lexer_Tokens) - 100 );
log_fmt("\n%d\n", start);
for ( s32 idx = start; idx < array_num(Tokens); idx++ )
for ( s32 idx = start; idx < array_num(Lexer_Tokens); idx++ )
{
log_fmt( "Token %d Type: %s : %.*s\n"
, idx
, toktype_to_str( Tokens[ idx ].Type ).Ptr
, Tokens[ idx ].Length, Tokens[ idx ].Text
, toktype_to_str( Lexer_Tokens[ idx ].Type ).Ptr
, Lexer_Tokens[ idx ].Length, Lexer_Tokens[ idx ].Text
);
}
@ -1288,7 +1288,7 @@ TokArray lex( StrC content )
FoundToken:
{
lex_found_token( ctx );
TokType last_type = array_back(Tokens)->Type;
TokType last_type = array_back(Lexer_Tokens)->Type;
if ( last_type == Tok_Preprocess_Macro )
{
c.token = { c.scanner, 0, Tok_Invalid, c.line, c.column, TF_Null };
@ -1304,22 +1304,23 @@ TokArray lex( StrC content )
c.token.Length++;
move_forward();
array_append( Tokens, c.token );
array_append( Lexer_Tokens, c.token );
continue;
}
}
}
}
if ( array_num(Tokens) == 0 )
if ( array_num(Lexer_Tokens) == 0 )
{
log_failure( "Failed to lex any tokens" );
return { {}, 0 };
}
hashtable_clear(defines);
hashtable_clear(Lexer_defines);
// defines_map_arena.free();
return { Tokens, 0 };
TokArray result = { Lexer_Tokens, 0 };
return result;
}
#undef current
#undef move_forward

File diff suppressed because it is too large Load Diff