From 0f16d1131e311f1216b09f6d57504b0dcc785ab3 Mon Sep 17 00:00:00 2001 From: Ed_ Date: Tue, 1 Aug 2023 14:02:54 -0400 Subject: [PATCH] Got past parsing, fixing serialization --- project/components/ast.cpp | 22 +-- project/components/header_end.hpp | 24 ++- project/components/interface.parsing.cpp | 239 +++++++++++++++++------ project/components/untyped.cpp | 36 ++++ test/gen/singleheader_copy.hpp | 0 test/test.singleheader_ast.cpp | 11 +- 6 files changed, 252 insertions(+), 80 deletions(-) create mode 100644 test/gen/singleheader_copy.hpp diff --git a/project/components/ast.cpp b/project/components/ast.cpp index d6e9538..50ae27e 100644 --- a/project/components/ast.cpp +++ b/project/components/ast.cpp @@ -508,39 +508,39 @@ String AST::to_string() break; case Preprocess_Define: - result.append_fmt( "#define %s %s", Name, Content ); + result.append_fmt( "#define %s %s\n", Name, Content ); break; case Preprocess_If: - result.append_fmt( "#if %s", Content ); + result.append_fmt( "#if %s\n", Content ); break; case Preprocess_IfDef: - result.append_fmt( "#ifdef %s", Content ); + result.append_fmt( "#ifdef %s\n", Content ); break; case Preprocess_IfNotDef: - result.append_fmt( "#ifndef %s", Content ); + result.append_fmt( "#ifndef %s\n", Content ); break; case Preprocess_Include: - result.append_fmt( "#include \"%s\"", Content ); + result.append_fmt( "#include \"%s\"\n", Content ); break; case Preprocess_ElIf: - result.append_fmt( "#elif %s", Content ); + result.append_fmt( "#elif %s\n", Content ); break; case Preprocess_Else: - result.append_fmt( "#else" ); + result.append_fmt( "#else\n" ); break; case Preprocess_EndIf: - result.append_fmt( "#endif" ); + result.append_fmt( "#endif\n" ); break; case Preprocess_Pragma: - result.append_fmt( "#pragma %s", Content ); + result.append_fmt( "#pragma %s\n", Content ); break; case Specifiers: @@ -653,11 +653,11 @@ String AST::to_string() if ( UnderlyingType->Type == Typename && UnderlyingType->ArrExpr ) { - result.append_fmt( "[%s];", UnderlyingType->ArrExpr->to_string() ); + result.append_fmt( "[%s];\n", UnderlyingType->ArrExpr->to_string() ); } else { - result.append( ";" ); + result.append( ";\n" ); } } break; diff --git a/project/components/header_end.hpp b/project/components/header_end.hpp index 5921178..d3b4db6 100644 --- a/project/components/header_end.hpp +++ b/project/components/header_end.hpp @@ -26,12 +26,29 @@ void AST::append( AST* other ) char const* AST::debug_str() { + if ( Parent ) + { + char const* fmt = stringize( + \nType : %s + \nParent : %s %s + \nName : %s + ); + + // These should be used immediately in a log. + // Thus if its desired to keep the debug str + // for multiple calls to bprintf, + // allocate this to proper string. + return str_fmt_buf( fmt + , type_str() + , Parent->Name + , Parent->type_str() + , Name ? Name : "" + ); + } + char const* fmt = stringize( - \nCode Debug: \nType : %s - \nParent : %s \nName : %s - \nComment : %s ); // These should be used immediately in a log. @@ -40,7 +57,6 @@ char const* AST::debug_str() // allocate this to proper string. return str_fmt_buf( fmt , type_str() - , Parent ? Parent->Name : "" , Name ? Name : "" ); } diff --git a/project/components/interface.parsing.cpp b/project/components/interface.parsing.cpp index b3b8262..82e79d5 100644 --- a/project/components/interface.parsing.cpp +++ b/project/components/interface.parsing.cpp @@ -1,6 +1,3 @@ -/* -These constructors are the most implementation intensive other than the editor or scanner. -*/ namespace Parser { @@ -331,6 +328,12 @@ namespace Parser continue; // Skip found token, its all handled here. } + if ( token.Type == TokType::Preprocess_Else || token.Type == TokType::Preprocess_EndIf ) + { + Tokens.append( token ); + continue; + } + Tokens.append( token ); SkipWhitespace(); @@ -355,12 +358,6 @@ namespace Parser defines.set( key, name ); } - if ( token.Type == TokType::Preprocess_Else || token.Type == TokType::Preprocess_EndIf ) - { - Tokens.append( token ); - continue; - } - Token content = { scanner, 0, TokType::Preprocess_Content, false, line, column }; if ( token.Type == TokType::Preprocess_Include ) @@ -1165,8 +1162,10 @@ Code parse_static_assert() Code assert = make_code(); assert->Type = ECode::Untyped; - eat( TokType::StaticAssert ); + Token content = currtok; + + eat( TokType::StaticAssert ); eat( TokType::Capture_Start ); s32 level = 0; @@ -1182,6 +1181,14 @@ Code parse_static_assert() eat( TokType::Capture_End ); eat( TokType::Statement_End ); + content.Length = ( (sptr)prevtok.Text + prevtok.Length ) - (sptr)content.Text; + + content.Text = str_fmt_buf( "%.*s\n", content.Length, content.Text ); + content.Length++; + + assert->Content = get_cached_string( content ); + assert->Name = assert->Content; + Context.pop(); return assert; } @@ -1682,7 +1689,10 @@ CodeOperator parse_operator_after_ret_type( // Parse Operator eat( TokType::Decl_Operator ); - if ( ! check( TokType::Operator ) ) + if ( ! left && currtok.Type != TokType::Operator + && currtok.Type != TokType::Star + && currtok.Type != TokType::Ampersand + && currtok.Type != TokType::Ampersand_DBL ) { log_failure( "Expected operator after 'operator' keyword\n%s", Context.to_string() ); Context.pop(); @@ -1697,12 +1707,26 @@ CodeOperator parse_operator_after_ret_type( if ( currtok.Text[1] == '=' ) op = Assign_Add; + if ( currtok.Text[1] == '+' ) + op = Increment; + else op = Add; } break; case '-': { + if ( currtok.Text[1] == '>' ) + { + if ( currtok.Text[2] == '*' ) + op = MemberOfPointer; + + else + op = MemberOfPointer; + + break; + } + if ( currtok.Text[1] == '=' ) op = Assign_Subtract; @@ -1876,11 +1900,14 @@ CodeOperator parse_operator_after_ret_type( return CodeInvalid; } - eat( TokType::Operator ); + eat( currtok.Type ); // Parse Params CodeParam params = parse_params(); + if ( params.ast == nullptr && op == EOperator::Multiply ) + op = MemberOfPointer; + while ( left && currtok.is_specifier() ) { if ( specifiers.ast == nullptr ) @@ -2273,15 +2300,11 @@ CodeBody parse_class_struct_body( Parser::TokType which ) break; case TokType::Decl_Class: - { member = parse_complicated_definition( TokType::Decl_Class ); - } break; case TokType::Decl_Enum: - { member = parse_complicated_definition( TokType::Decl_Enum ); - } break; case TokType::Decl_Friend: @@ -2293,9 +2316,7 @@ CodeBody parse_class_struct_body( Parser::TokType which ) break; case TokType::Decl_Struct: - { member = parse_complicated_definition( TokType::Decl_Struct ); - } break; case TokType::Decl_Template: @@ -2307,10 +2328,7 @@ CodeBody parse_class_struct_body( Parser::TokType which ) break; case TokType::Decl_Union: - { member = parse_complicated_definition( TokType::Decl_Union ); - } - break; case TokType::Decl_Using: @@ -2489,7 +2507,8 @@ Code parse_class_struct( Parser::TokType which, bool inplace_def = false ) attributes = parse_attributes(); - name = parse_identifier(); + if ( check( TokType::Identifier ) ) + name = parse_identifier(); local_persist char interface_arr_mem[ kilobytes(4) ] {0}; @@ -2834,7 +2853,6 @@ CodeEnum parse_enum( bool inplace_def ) Token name = { nullptr, 0, TokType::Invalid }; Code array_expr = { nullptr }; CodeType type = { nullptr }; - Token body = { nullptr, 0, TokType::Invalid }; char entries_code[ kilobytes(128) ] { 0 }; s32 entries_length = 0; @@ -2851,16 +2869,12 @@ CodeEnum parse_enum( bool inplace_def ) attributes = parse_attributes(); - if ( currtok.Type != TokType::Identifier ) + if ( check( TokType::Identifier ) ) { - log_failure( "Expected identifier for enum name\n%s", Context.to_string() ); - Context.pop(); - return CodeInvalid; + name = currtok; + eat( TokType::Identifier ); } - name = currtok; - eat( TokType::Identifier ); - if ( currtok.Type == TokType::Assign_Classifer ) { eat( TokType::Assign_Classifer ); @@ -2874,45 +2888,91 @@ CodeEnum parse_enum( bool inplace_def ) } } + CodeBody body = { nullptr }; + if ( currtok.Type == TokType::BraceCurly_Open ) { + body = (CodeBody) make_code(); + eat( TokType::BraceCurly_Open ); - body = currtok; + Code member = CodeInvalid; while ( currtok.Type != TokType::BraceCurly_Close ) { - if ( currtok.Type == TokType::Comment ) + switch ( currtok.Type ) { - eat( TokType::Comment ); - continue; - } - else if ( currtok.Type == TokType::Preprocess_Macro ) - { - eat( TokType::Preprocess_Macro ); - continue; + case TokType::Comment: + member = def_comment( currtok ); + eat( TokType::Comment ); + break; + + case TokType::Preprocess_Define: + member = parse_define(); + break; + + case TokType::Preprocess_If: + case TokType::Preprocess_IfDef: + case TokType::Preprocess_IfNotDef: + case TokType::Preprocess_ElIf: + member = parse_preprocess_cond(); + break; + + case TokType::Preprocess_Else: + member = preprocess_else; + eat( TokType::Preprocess_Else ); + break; + + case TokType::Preprocess_EndIf: + member = preprocess_endif; + eat( TokType::Preprocess_EndIf ); + break; + + case TokType::Preprocess_Macro: + member = untyped_str( currtok ); + eat( TokType::Preprocess_Macro ); + break; + + case TokType::Preprocess_Pragma: + member = parse_pragma(); + break; + + default: + Token entry = currtok; + + eat( TokType::Identifier); + + if ( currtok.Type == TokType::Operator && currtok.Text[0] == '=' ) + { + eat( TokType::Operator ); + + while ( currtok.Type != TokType::Comma && currtok.Type != TokType::BraceCurly_Close ) + { + eat( currtok.Type ); + } + } + + if ( currtok.Type == TokType::Comma ) + { + eat( TokType::Comma ); + } + + entry.Length = ( (sptr)prevtok.Text + prevtok.Length ) - (sptr)entry.Text; + + member = untyped_str( entry ); + break; } - eat( TokType::Identifier); - - if ( currtok.Type == TokType::Operator && currtok.Text[0] == '=' ) + if ( member == Code::Invalid ) { - eat( TokType::Operator ); - - while ( currtok.Type != TokType::Comma && currtok.Type != TokType::BraceCurly_Close ) - { - eat( currtok.Type ); - } + log_failure( "Failed to parse member\n%s", Context.to_string() ); + Context.pop(); + return CodeInvalid; } - if ( currtok.Type == TokType::Comma ) - { - eat( TokType::Comma ); - } + body.append( member ); } - body.Length = ( (sptr)prevtok.Text + prevtok.Length ) - (sptr)body.Text; - eat( TokType::BraceCurly_Close ); } @@ -2924,14 +2984,10 @@ CodeEnum parse_enum( bool inplace_def ) CodeEnum result = (CodeEnum) make_code(); - if ( body.Length ) + if ( body.ast ) { - // mem_copy( entries_code, body.Text, body.Length ); - - Code untyped_body = untyped_str( body ); - result->Type = is_enum_class ? Enum_Class : Enum; - result->Body = untyped_body; + result->Body = body; } else { @@ -3884,10 +3940,67 @@ CodeUnion parse_union( bool inplace_def ) while ( ! check( TokType::BraceCurly_Close ) ) { - Code entry = parse_variable(); + Code member = { nullptr }; + switch ( currtok.Type ) + { + case TokType::Comment: + member = def_comment( currtok ); + eat( TokType::Comment ); + break; - if ( entry ) - body.append( entry ); + case TokType::Decl_Class: + member = parse_complicated_definition( TokType::Decl_Class ); + break; + + case TokType::Decl_Enum: + member = parse_complicated_definition( TokType::Decl_Enum ); + break; + + case TokType::Decl_Struct: + member = parse_complicated_definition( TokType::Decl_Struct ); + break; + + case TokType::Decl_Union: + member = parse_complicated_definition( TokType::Decl_Union ); + break; + + case TokType::Preprocess_Define: + member = parse_define(); + break; + + case TokType::Preprocess_If: + case TokType::Preprocess_IfDef: + case TokType::Preprocess_IfNotDef: + case TokType::Preprocess_ElIf: + member = parse_preprocess_cond(); + break; + + case TokType::Preprocess_Else: + member = preprocess_else; + eat( TokType::Preprocess_Else ); + break; + + case TokType::Preprocess_EndIf: + member = preprocess_endif; + eat( TokType::Preprocess_EndIf ); + break; + + case TokType::Preprocess_Macro: + member = untyped_str( currtok ); + eat( TokType::Preprocess_Macro ); + break; + + case TokType::Preprocess_Pragma: + member = parse_pragma(); + break; + + default: + member = parse_variable(); + break; + } + + if ( member ) + body.append( member ); } eat( TokType::BraceCurly_Close ); diff --git a/project/components/untyped.cpp b/project/components/untyped.cpp index 12f4971..46c6e14 100644 --- a/project/components/untyped.cpp +++ b/project/components/untyped.cpp @@ -99,17 +99,35 @@ sw token_fmt_va( char* buf, uw buf_size, s32 num_tokens, va_list va ) Code untyped_str( StrC content ) { + if ( content.Len == 0 ) + { + log_failure( "untyped_str: empty string" ); + return CodeInvalid; + } + Code result = make_code(); result->Name = get_cached_string( content ); result->Type = ECode::Untyped; result->Content = result->Name; + if ( result->Name == nullptr ) + { + log_failure( "untyped_str: could not cache string" ); + return CodeInvalid; + } + return result; } Code untyped_fmt( char const* fmt, ...) { + if ( fmt == nullptr ) + { + log_failure( "untyped_fmt: null format string" ); + return CodeInvalid; + } + local_persist thread_local char buf[GEN_PRINTF_MAXLEN] = { 0 }; @@ -124,11 +142,23 @@ Code untyped_fmt( char const* fmt, ...) result->Type = ECode::Untyped; result->Content = get_cached_string( { length, buf } ); + if ( result->Name == nullptr ) + { + log_failure( "untyped_fmt: could not cache string" ); + return CodeInvalid; + } + return result; } Code untyped_token_fmt( s32 num_tokens, ... ) { + if ( num_tokens == 0 ) + { + log_failure( "untyped_token_fmt: zero tokens" ); + return CodeInvalid; + } + local_persist thread_local char buf[GEN_PRINTF_MAXLEN] = { 0 }; @@ -143,5 +173,11 @@ Code untyped_token_fmt( s32 num_tokens, ... ) result->Type = ECode::Untyped; result->Content = result->Name; + if ( result->Name == nullptr ) + { + log_failure( "untyped_fmt: could not cache string" ); + return CodeInvalid; + } + return result; } diff --git a/test/gen/singleheader_copy.hpp b/test/gen/singleheader_copy.hpp new file mode 100644 index 0000000..e69de29 diff --git a/test/test.singleheader_ast.cpp b/test/test.singleheader_ast.cpp index cf26097..143f554 100644 --- a/test/test.singleheader_ast.cpp +++ b/test/test.singleheader_ast.cpp @@ -18,11 +18,18 @@ void check_singleheader_ast() CodeBody ast = parse_global_body( { file.size, (char const*)file.data } ); - log_fmt("generated AST!!!"); + log_fmt("generated AST!!!\n"); + + s32 idx = 0; + for ( Code entry : ast ) + { + log_fmt("Entry %d: %s", idx, entry.to_string() ); + idx++; + } Builder builder; builder.open( "singleheader_copy.hpp" ); - log_fmt("serializng ast"); + log_fmt("serializng ast\n"); builder.print( ast ); builder.write();