From 528ef72a51dafb1c8f130e81bd36d199c67aa1e1 Mon Sep 17 00:00:00 2001 From: Ed_ Date: Tue, 1 Aug 2023 05:17:24 -0400 Subject: [PATCH] More progress on parsing Made it to line 2597 of self parsing its singleheader Complex global or member defintions are now supported. --- Readme.md | 2 +- docs/Parsing.md | 43 +-- project/Readme.md | 2 +- project/components/ast.cpp | 12 +- project/components/data_structures.hpp | 12 +- project/components/interface.parsing.cpp | 323 ++++++++++++++--------- project/dependencies/macros.hpp | 3 + 7 files changed, 252 insertions(+), 145 deletions(-) diff --git a/Readme.md b/Readme.md index c6db963..ad1502c 100644 --- a/Readme.md +++ b/Readme.md @@ -17,7 +17,7 @@ A `natvis` and `natstepfilter` are provided in the scripts directory. ***The editor and scanner have not been implemented yet. The scanner will come first, then the editor.*** -A C variant is hosted [here](https://github.com/Ed94/genc); I haven't gotten headwind on it, should be easier to make than this... +A C variant is hosted [here](https://github.com/Ed94/genc); I will complete it when this library is feature complete, it should be easier to make than this... ## Usage diff --git a/docs/Parsing.md b/docs/Parsing.md index 7f81d66..518f22a 100644 --- a/docs/Parsing.md +++ b/docs/Parsing.md @@ -3,26 +3,6 @@ The library features a naive parser tailored for only what the library needs to construct the supported syntax of C++ into its AST. This parser does not, and should not do the compiler's job. By only supporting this minimal set of features, the parser is kept under 5000 loc. - -Everything is done in one pass for both the preprocessor directives and the rest of the language. -The parser performs no macro expansion as the scope of gencpp feature-set is to only support the preprocessor for the goal of having rudimentary awareness of preprocessor ***conditionals***, ***defines***, and ***includes***, and ***`pragmas`**. - -The keywords supported for the preprocessor are: - -* include -* define -* if -* ifdef -* elif -* endif -* undef -* pragma - -Each directive `#` line is considered one preproecessor unit, and will be treated as one Preprocessor AST. *These ASTs will be considered members or entries of braced scope they reside within*. -All keywords except *include* are suppported as members of a scope for a class/struct, global, or namespace body. - -Any preprocessor definition abuse that changes the syntax of the core language is unsupported and will fail to parse if not kept within an execution scope (function body, or expression assignment). - The parsing implementation supports the following for the user: ```cpp @@ -47,3 +27,26 @@ CodeVar parse_variable ( StrC var_def ); ***Parsing will aggregate any tokens within a function body or expression statement to an untyped Code AST.*** +Everything is done in one pass for both the preprocessor directives and the rest of the language. +The parser performs no macro expansion as the scope of gencpp feature-set is to only support the preprocessor for the goal of having rudimentary awareness of preprocessor ***conditionals***, ***defines***, and ***includes***, and ***`pragmas`**. + +The keywords supported for the preprocessor are: + +* include +* define +* if +* ifdef +* elif +* endif +* undef +* pragma + +Each directive `#` line is considered one preproecessor unit, and will be treated as one Preprocessor AST. *These ASTs will be considered members or entries of braced scope they reside within*. +All keywords except *include* are suppported as members of a scope for a class/struct, global, or namespace body. + +Any preprocessor definition abuse that changes the syntax of the core language is unsupported and will fail to parse if not kept within an execution scope (function body, or expression assignment). + +Exceptions to the above rule (If its too hard to keep track of just follow the above notion): + +* Typedefs allow of a macro exansion to be defined after the keyword; Ex: `typedef GEN_FILE_OPEN_PROC( file_open_proc );` + diff --git a/project/Readme.md b/project/Readme.md index 5698371..cfe968f 100644 --- a/project/Readme.md +++ b/project/Readme.md @@ -12,7 +12,7 @@ Just like the `gen.` they include their components: `dependencies/.` ) They directly include `depedencies/file_handling.` as the core library does not include file processing by defualt. -**TODO : Right now the library is not finished structurally, as such the first self-hosting iteration is still WIP** +**TODO : Right now the library is not finished, as such the first self-hosting iteration is still WIP** Both libraries use *pre-generated* (self-hosting I guess) version of the library to then generate the latest version of itself. (sort of a verification that the generated version is equivalent). diff --git a/project/components/ast.cpp b/project/components/ast.cpp index c4ea141..d6e9538 100644 --- a/project/components/ast.cpp +++ b/project/components/ast.cpp @@ -707,7 +707,9 @@ String AST::to_string() ); } - if ( Parent && Parent->Type != ECode::Typedef ) + bool add_semicolon = Parent && Parent->Type != ECode::Typedef && Parent->Type != ECode::Variable; + + if ( add_semicolon ) result.append(";"); } break; @@ -756,6 +758,9 @@ String AST::to_string() if ( ValueType->ArrExpr ) result.append_fmt( "[%s]", ValueType->ArrExpr->to_string() ); + if ( BitfieldSize ) + result.append_fmt( " : %lu", BitfieldSize ); + if ( Value ) result.append_fmt( " = %s", Value->to_string() ); @@ -764,7 +769,10 @@ String AST::to_string() break; } - if ( UnderlyingType->ArrExpr ) + if ( BitfieldSize ) + result.append_fmt( "%s : %lu", ValueType->to_string(), BitfieldSize ); + + else if ( UnderlyingType->ArrExpr ) result.append_fmt( "%s %s[%s];", UnderlyingType->to_string(), Name, UnderlyingType->ArrExpr->to_string() ); else diff --git a/project/components/data_structures.hpp b/project/components/data_structures.hpp index 6fdfcc3..65cd330 100644 --- a/project/components/data_structures.hpp +++ b/project/components/data_structures.hpp @@ -231,7 +231,10 @@ struct AST AST* UnderlyingType; // Enum, Typedef AST* ValueType; // Parameter, Variable }; - AST* Params; // Function, Operator, Template + union { + AST* Params; // Function, Operator, Template + AST* BitfieldSize; // Varaiable (Class/Struct Data Member) + }; union { AST* ArrExpr; // Type Symbol AST* Body; // Class, Enum, Function, Namespace, Struct, Union @@ -275,7 +278,10 @@ struct AST_POD AST* UnderlyingType; // Enum, Typedef AST* ValueType; // Parameter, Variable }; - AST* Params; // Function, Operator, Template + union { + AST* Params; // Function, Operator, Template + AST* BitfieldSize; // Varaiable (Class/Struct Data Member) + }; union { AST* ArrExpr; // Type Symbol AST* Body; // Class, Enum, Function, Namespace, Struct, Union @@ -1064,7 +1070,7 @@ struct AST_Var CodeAttributes Attributes; CodeSpecifiers Specs; CodeType ValueType; - char _PAD_PROPERTIES_[ sizeof(AST*) ]; + Code BitfieldSize; Code Value; }; }; diff --git a/project/components/interface.parsing.cpp b/project/components/interface.parsing.cpp index b1bf188..b3b8262 100644 --- a/project/components/interface.parsing.cpp +++ b/project/components/interface.parsing.cpp @@ -81,6 +81,11 @@ namespace Parser { return Idx + 1 < Arr.num() ? &Arr[Idx + 1] : nullptr; } + + Token& operator []( s32 idx ) + { + return Arr[idx]; + } }; struct StackNode @@ -1015,8 +1020,8 @@ if ( def.Ptr == nullptr ) \ internal Code parse_function_body(); internal Code parse_global_nspace(); -internal CodeClass parse_class ( bool from_typedef = false ); -internal CodeEnum parse_enum ( bool from_typedef = false ); +internal CodeClass parse_class ( bool inplace_def = false ); +internal CodeEnum parse_enum ( bool inplace_def = false ); internal CodeBody parse_export_body (); internal CodeBody parse_extern_link_body(); internal CodeExtern parse_exten_link (); @@ -1024,14 +1029,16 @@ internal CodeFriend parse_friend (); internal CodeFn parse_function (); internal CodeNamespace parse_namespace (); internal CodeOpCast parse_operator_cast (); -internal CodeStruct parse_struct ( bool from_typedef = false ); +internal CodeStruct parse_struct ( bool inplace_def = false ); internal CodeVar parse_variable (); internal CodeTemplate parse_template (); internal CodeType parse_type (); internal CodeTypedef parse_typedef (); -internal CodeUnion parse_union ( bool from_typedef = false ); +internal CodeUnion parse_union ( bool inplace_def = false ); internal CodeUsing parse_using (); +constexpr bool inplace_def = true; + internal inline CodeDefine parse_define() { @@ -1925,6 +1932,8 @@ CodeVar parse_variable_after_name( Code array_expr = parse_array_decl(); Code expr = { nullptr }; + Code bitfield_expr = { nullptr }; + if ( currtok.IsAssign ) { eat( TokType::Operator ); @@ -1947,6 +1956,28 @@ CodeVar parse_variable_after_name( expr = untyped_str( expr_tok ); } + if ( currtok.Type == TokType::Assign_Classifer ) + { + eat( TokType::Assign_Classifer ); + + Token expr_tok = currtok; + + if ( currtok.Type == TokType::Statement_End ) + { + log_failure( "Expected expression after bitfield \n%s", Context.to_string() ); + Context.pop(); + return CodeInvalid; + } + + while ( left && currtok.Type != TokType::Statement_End ) + { + eat( currtok.Type ); + } + + expr_tok.Length = ( (sptr)currtok.Text + currtok.Length ) - (sptr)expr_tok.Text; + bitfield_expr = untyped_str( expr_tok ); + } + eat( TokType::Statement_End ); using namespace ECode; @@ -1962,6 +1993,9 @@ CodeVar parse_variable_after_name( if (array_expr ) type->ArrExpr = array_expr; + if ( bitfield_expr ) + result->BitfieldSize = bitfield_expr; + if ( attributes ) result->Attributes = attributes; @@ -2009,6 +2043,7 @@ Code parse_variable_assignment() return expr; } + internal inline Code parse_operator_function_or_variable( bool expects_function, CodeAttributes attributes, CodeSpecifiers specifiers ) { @@ -2059,6 +2094,133 @@ Code parse_operator_function_or_variable( bool expects_function, CodeAttributes return result; } +internal inline +Code parse_complicated_definition( Parser::TokType which ) +{ + using namespace Parser; + push_scope(); + + labeled_scope_start + PARSE_FORWARD_OR_DEFINITION: + Code result = CodeInvalid; + + // ; + switch ( which ) + { + case TokType::Decl_Class: + result = parse_class(); + Context.pop(); + return result; + + case TokType::Decl_Enum: + result = parse_enum(); + Context.pop(); + return result; + + case TokType::Decl_Struct: + result = parse_struct(); + Context.pop(); + return result; + + case TokType::Decl_Union: + result = parse_union(); + Context.pop(); + return result; + + default: + log_failure( "Error, wrong token type given to parse_complicated_definition " + "(only supports class, enum, struct, union) \n%s" + , Context.to_string() ); + + Context.pop(); + return CodeInvalid; + } + labeled_scope_end + + TokArray tokens = Context.Tokens; + + s32 idx = tokens.Idx; + s32 level = 0; + for ( ; idx < tokens.Arr.num(); idx ++ ) + { + if ( tokens[idx].Type == TokType::BraceCurly_Open ) + level++; + + if ( tokens[idx].Type == TokType::BraceCurly_Close ) + level--; + + if ( level == 0 && tokens[idx].Type == TokType::Statement_End ) + break; + } + + if ( (idx - 2 ) == tokens.Idx ) + { + // Its a forward declaration only + goto PARSE_FORWARD_OR_DEFINITION; + } + + Token tok = tokens[ idx - 1 ]; + if ( tok.Type == TokType::Identifier ) + { + tok = tokens[ idx - 2 ]; + + bool is_indirection = tok.Type == TokType::Ampersand + || tok.Type == TokType::Star; + + bool ok_to_parse = false; + + if ( tok.Type == TokType::BraceCurly_Close ) + { + // Its an inplace definition + // { ... } ; + ok_to_parse = true; + } + else if ( tok.Type == TokType::Identifier && tokens[ idx - 3 ].Type == TokType::Decl_Struct ) + { + // Its a variable with type ID using struct namespace. + // ; + ok_to_parse = true; + } + else if ( is_indirection ) + { + // Its a indirection type with type ID using struct namespace. + // ; + ok_to_parse = true; + } + + if ( ! ok_to_parse ) + { + log_failure( "Unsupported or bad member definition after struct declaration\n%s", Context.to_string() ); + Context.pop(); + return CodeInvalid; + } + + Code result = parse_operator_function_or_variable( false, { nullptr }, { nullptr } ); + Context.pop(); + return result; + } + else if ( tok.Type == TokType::BraceCurly_Close ) + { + // Its a definition + // { ... }; + goto PARSE_FORWARD_OR_DEFINITION; + } + else if ( tok.Type == TokType::BraceSquare_Close) + { + // Its an array definition + // [ ... ]; + Code result = parse_operator_function_or_variable( false, { nullptr }, { nullptr } ); + Context.pop(); + return result; + } + else + { + log_failure( "Unsupported or bad member definition after struct declaration\n%s", Context.to_string() ); + Context.pop(); + return CodeInvalid; + } +} + internal CodeBody parse_class_struct_body( Parser::TokType which ) { @@ -2112,57 +2274,13 @@ CodeBody parse_class_struct_body( Parser::TokType which ) case TokType::Decl_Class: { - s32 idx = Context.Tokens.Idx; - for ( ; Context.Tokens.Arr[idx].Type != TokType::Statement_End; idx ++) - { - if ( Context.Tokens.Arr[idx].Type == TokType::BraceCurly_Open ) - { - idx = Context.Tokens.Idx; - break; - } - } - - Token tok_before_end = Context.Tokens.Arr[ idx ]; - if ( tok_before_end.Type == TokType::Identifier - || tok_before_end.Type == TokType::Capture_End - || tok_before_end.Type == TokType::Spec_Const - || tok_before_end.Type == TokType::Spec_Volatile - || tok_before_end.Type == TokType::Spec_Override - || tok_before_end.Type == TokType::Spec_Final ) - { - member = parse_operator_function_or_variable( false, attributes, specifiers ); - break; - } - - member = parse_class(); + member = parse_complicated_definition( TokType::Decl_Class ); } break; case TokType::Decl_Enum: { - s32 idx = Context.Tokens.Idx; - for ( ; Context.Tokens.Arr[idx].Type != TokType::Statement_End; idx ++) - { - if ( Context.Tokens.Arr[idx].Type == TokType::BraceCurly_Open ) - { - idx = Context.Tokens.Idx; - break; - } - } - - Token tok_before_end = Context.Tokens.Arr[ idx ]; - if ( tok_before_end.Type == TokType::Identifier - || tok_before_end.Type == TokType::Capture_End - || tok_before_end.Type == TokType::Spec_Const - || tok_before_end.Type == TokType::Spec_Volatile - || tok_before_end.Type == TokType::Spec_Override - || tok_before_end.Type == TokType::Spec_Final ) - { - member = parse_operator_function_or_variable( false, attributes, specifiers ); - break; - } - - member = parse_enum(); + member = parse_complicated_definition( TokType::Decl_Enum ); } break; @@ -2176,29 +2294,7 @@ CodeBody parse_class_struct_body( Parser::TokType which ) case TokType::Decl_Struct: { - s32 idx = Context.Tokens.Idx; - for ( ; Context.Tokens.Arr[idx].Type != TokType::Statement_End; idx ++) - { - if ( Context.Tokens.Arr[idx].Type == TokType::BraceCurly_Open ) - { - idx = Context.Tokens.Idx; - break; - } - } - - Token tok_before_end = Context.Tokens.Arr[ idx ]; - if ( tok_before_end.Type == TokType::Identifier - || tok_before_end.Type == TokType::Capture_End - || tok_before_end.Type == TokType::Spec_Const - || tok_before_end.Type == TokType::Spec_Volatile - || tok_before_end.Type == TokType::Spec_Override - || tok_before_end.Type == TokType::Spec_Final ) - { - member = parse_operator_function_or_variable( false, attributes, specifiers ); - break; - } - - member = parse_struct(); + member = parse_complicated_definition( TokType::Decl_Struct ); } break; @@ -2212,29 +2308,7 @@ CodeBody parse_class_struct_body( Parser::TokType which ) case TokType::Decl_Union: { - s32 idx = Context.Tokens.Idx; - for ( ; Context.Tokens.Arr[idx].Type != TokType::Statement_End; idx ++) - { - if ( Context.Tokens.Arr[idx].Type == TokType::BraceCurly_Open ) - { - idx = Context.Tokens.Idx; - break; - } - } - - Token tok_before_end = Context.Tokens.Arr[ idx ]; - if ( tok_before_end.Type == TokType::Identifier - || tok_before_end.Type == TokType::Capture_End - || tok_before_end.Type == TokType::Spec_Const - || tok_before_end.Type == TokType::Spec_Volatile - || tok_before_end.Type == TokType::Spec_Override - || tok_before_end.Type == TokType::Spec_Final ) - { - member = parse_operator_function_or_variable( false, attributes, specifiers ); - break; - } - - member = parse_union(); + member = parse_complicated_definition( TokType::Decl_Union ); } break; @@ -2269,10 +2343,12 @@ CodeBody parse_class_struct_body( Parser::TokType which ) case TokType::Preprocess_Else: member = preprocess_else; + eat( TokType::Preprocess_Else ); break; case TokType::Preprocess_EndIf: member = preprocess_endif; + eat( TokType::Preprocess_EndIf ); break; case TokType::Preprocess_Unsupported: @@ -2383,7 +2459,7 @@ CodeBody parse_class_struct_body( Parser::TokType which ) } internal -Code parse_class_struct( Parser::TokType which, bool from_typedef = false ) +Code parse_class_struct( Parser::TokType which, bool inplace_def = false ) { using namespace Parser; @@ -2451,7 +2527,7 @@ Code parse_class_struct( Parser::TokType which, bool from_typedef = false ) body = parse_class_struct_body( which ); } - if ( ! from_typedef ) + if ( ! inplace_def ) eat( TokType::Statement_End ); if ( which == TokType::Decl_Class ) @@ -2537,12 +2613,12 @@ CodeBody parse_global_nspace( CodeT which ) eat( TokType::Comment ); break; - case TokType::Decl_Enum: - member = parse_enum(); + case TokType::Decl_Class: + member = parse_complicated_definition( TokType::Decl_Class ); break; - case TokType::Decl_Class: - member = parse_class(); + case TokType::Decl_Enum: + member = parse_complicated_definition( TokType::Decl_Enum ); break; case TokType::Decl_Extern_Linkage: @@ -2557,12 +2633,7 @@ CodeBody parse_global_nspace( CodeT which ) break; case TokType::Decl_Struct: - if ( currtok.Line >= 2202 ) - { - log_fmt("here"); - } - - member = parse_struct(); + member = parse_complicated_definition( TokType::Decl_Struct ); break; case TokType::Decl_Template: @@ -2574,7 +2645,7 @@ CodeBody parse_global_nspace( CodeT which ) break; case TokType::Decl_Union: - member = parse_union(); + member = parse_complicated_definition( TokType::Decl_Union ); break; case TokType::Decl_Using: @@ -2723,11 +2794,11 @@ CodeBody parse_global_nspace( CodeT which ) } internal -CodeClass parse_class( bool from_typedef ) +CodeClass parse_class( bool inplace_def ) { using namespace Parser; push_scope(); - CodeClass result = (CodeClass) parse_class_struct( Parser::TokType::Decl_Class, from_typedef ); + CodeClass result = (CodeClass) parse_class_struct( Parser::TokType::Decl_Class, inplace_def ); Context.pop(); return result; } @@ -2749,7 +2820,7 @@ CodeClass parse_class( StrC def ) } internal -CodeEnum parse_enum( bool from_typedef ) +CodeEnum parse_enum( bool inplace_def ) { using namespace Parser; using namespace ECode; @@ -2758,6 +2829,8 @@ CodeEnum parse_enum( bool from_typedef ) SpecifierT specs_found[16] { ESpecifier::NumSpecifiers }; s32 NumSpecifiers = 0; + CodeAttributes attributes = { nullptr }; + Token name = { nullptr, 0, TokType::Invalid }; Code array_expr = { nullptr }; CodeType type = { nullptr }; @@ -2776,7 +2849,7 @@ CodeEnum parse_enum( bool from_typedef ) is_enum_class = true; } - // TODO : Parse attributes + attributes = parse_attributes(); if ( currtok.Type != TokType::Identifier ) { @@ -2809,6 +2882,17 @@ CodeEnum parse_enum( bool from_typedef ) while ( currtok.Type != TokType::BraceCurly_Close ) { + if ( currtok.Type == TokType::Comment ) + { + eat( TokType::Comment ); + continue; + } + else if ( currtok.Type == TokType::Preprocess_Macro ) + { + eat( TokType::Preprocess_Macro ); + continue; + } + eat( TokType::Identifier); if ( currtok.Type == TokType::Operator && currtok.Text[0] == '=' ) @@ -2832,7 +2916,7 @@ CodeEnum parse_enum( bool from_typedef ) eat( TokType::BraceCurly_Close ); } - if ( ! from_typedef ) + if ( ! inplace_def ) eat( TokType::Statement_End ); using namespace ECode; @@ -2856,6 +2940,9 @@ CodeEnum parse_enum( bool from_typedef ) result->Name = get_cached_string( name ); + if ( attributes ) + result->Attributes = attributes; + if ( type ) result->UnderlyingType = type; @@ -3324,11 +3411,11 @@ CodeOpCast parse_operator_cast( StrC def ) } internal inline -CodeStruct parse_struct( bool from_typedef ) +CodeStruct parse_struct( bool inplace_def ) { using namespace Parser; push_scope(); - CodeStruct result = (CodeStruct) parse_class_struct( TokType::Decl_Struct, from_typedef ); + CodeStruct result = (CodeStruct) parse_class_struct( TokType::Decl_Struct, inplace_def ); Context.pop(); return result; } @@ -3763,7 +3850,7 @@ CodeTypedef parse_typedef( StrC def ) } internal -CodeUnion parse_union( bool from_typedef ) +CodeUnion parse_union( bool inplace_def ) { using namespace Parser; push_scope(); @@ -3805,7 +3892,7 @@ CodeUnion parse_union( bool from_typedef ) eat( TokType::BraceCurly_Close ); - if ( ! from_typedef ) + if ( ! inplace_def ) eat( TokType::Statement_End ); CodeUnion diff --git a/project/dependencies/macros.hpp b/project/dependencies/macros.hpp index 2e3125e..752dc3c 100644 --- a/project/dependencies/macros.hpp +++ b/project/dependencies/macros.hpp @@ -86,6 +86,9 @@ } \ while(0); +#define labeled_scope_start if ( false ) { +#define labeled_scope_end } + #define clamp( x, lower, upper ) min( max( ( x ), ( lower ) ), ( upper ) ) #define count_of( x ) ( ( size_of( x ) / size_of( 0 [ x ] ) ) / ( ( sw )( ! ( size_of( x ) % size_of( 0 [ x ] ) ) ) ) ) #define is_between( x, lower, upper ) ( ( ( lower ) <= ( x ) ) && ( ( x ) <= ( upper ) ) )