From 543427dfe54ed891e371f1906b5d81048075fb00 Mon Sep 17 00:00:00 2001 From: Ed_ Date: Sun, 3 Sep 2023 23:36:51 -0400 Subject: [PATCH] Fixes to parsing for validation Removed whitespace stripping from parse_type, prepped for doing some major changes for function signature typenames Moved template argument parsing to its own helper function since its used in more the one spot. Latest failure is due to stack overflow when validating parameters. (Shouldn't be hard to debug) --- project/components/ast.hpp | 6 +- project/components/ast_types.hpp | 3 +- project/components/interface.parsing.cpp | 148 ++++++++++------------- 3 files changed, 68 insertions(+), 89 deletions(-) diff --git a/project/components/ast.hpp b/project/components/ast.hpp index c920c76..c7460f9 100644 --- a/project/components/ast.hpp +++ b/project/components/ast.hpp @@ -236,7 +236,7 @@ struct AST { AST* InlineCmt; // Class, Constructor, Destructor, Enum, Friend, Functon, Operator, OpCast, Struct, Typedef, Using, Variable AST* Attributes; // Class, Enum, Function, Struct, Typedef, Union, Using, Variable - AST* Specs; // Destructor, Function, Operator, Type symbol, Variable + AST* Specs; // Destructor, Function, Operator, Typename, Variable union { AST* InitializerList; // Constructor AST* ParentType; // Class, Struct @@ -249,7 +249,7 @@ struct AST AST* Params; // Constructor, Function, Operator, Template }; union { - AST* ArrExpr; // Type Symbol + AST* ArrExpr; // Typename AST* Body; // Class, Constructr, Destructor, Enum, Function, Namespace, Struct, Union AST* Declaration; // Friend, Template AST* Value; // Parameter, Variable @@ -288,7 +288,7 @@ struct AST_POD { AST* InlineCmt; // Class, Constructor, Destructor, Enum, Friend, Functon, Operator, OpCast, Struct, Typedef, Using, Variable AST* Attributes; // Class, Enum, Function, Struct, Typename, Union, Using, Variable - AST* Specs; // Function, Operator, Type symbol, Variable + AST* Specs; // Function, Operator, Typename, Variable union { AST* InitializerList; // Constructor AST* ParentType; // Class, Struct diff --git a/project/components/ast_types.hpp b/project/components/ast_types.hpp index 236aab9..54e61cc 100644 --- a/project/components/ast_types.hpp +++ b/project/components/ast_types.hpp @@ -475,7 +475,8 @@ struct AST_Type char _PAD_CMT_[ sizeof(AST*) ]; CodeAttributes Attributes; CodeSpecifiers Specs; - char _PAD_PROPERTIES_[ sizeof(AST*) * 2 ]; + CodeType ReturnType; // Only used for function signatures + CodeParam Params; // Only used for function signatures Code ArrExpr; }; }; diff --git a/project/components/interface.parsing.cpp b/project/components/interface.parsing.cpp index f13c962..fd8ff84 100644 --- a/project/components/interface.parsing.cpp +++ b/project/components/interface.parsing.cpp @@ -1713,6 +1713,40 @@ CodeAttributes parse_attributes() return { nullptr }; } +/* + This a brute-froce make all the arguments part of the token provided. + Can have in-place function signatures, regular identifiers, in-place typenames, compile-time expressions, parameter-pack expansion, etc. + This means that validation can only go so far, and so if there is any different in formatting + passed the basic stripping supported it report a soft failure. +*/ +internal inline +void parse_template_args( Parser::Token& token ) +{ + using namespace Parser; + + if ( currtok.Type == TokType::Operator && currtok.Text[0] == '<' && currtok.Length == 1 ) + { + eat( TokType::Operator ); + + s32 level = 0; + while ( left && ( currtok.Text[0] != '>' || level > 0 )) + { + if ( currtok.Text[0] == '<' ) + level++; + + if ( currtok.Text[0] == '>' ) + level--; + + eat( currtok.Type ); + } + + eat( TokType::Operator ); + + // Extend length of name to last token + token.Length = ( (sptr)prevtok.Text + prevtok.Length ) - (sptr)token.Text; + } +} + internal Parser::Token parse_identifier() { @@ -1721,9 +1755,10 @@ Parser::Token parse_identifier() Token name = currtok; Context.Scope->Name = name; - eat( TokType::Identifier ); + parse_template_args( name ); + while ( check( TokType::Access_StaticSymbol ) ) { eat( TokType::Access_StaticSymbol ); @@ -1745,62 +1780,7 @@ Parser::Token parse_identifier() name.Length = ( (sptr)currtok.Text + currtok.Length ) - (sptr)name.Text; eat( TokType::Identifier ); - if ( check( TokType::Operator ) && currtok.Text[0] == '<' ) - { - eat( TokType::Operator ); - - // Template arguments can be complex so were not validating if they are correct. - s32 level = 0; - while ( left && (currtok.Text[0] != '>' || level > 0 ) ) - { - if ( currtok.Text[0] == '<' ) - level++; - - else if ( currtok.Text[0] == '>' && level > 0 ) - level--; - - eat( currtok.Type ); - } - - if ( left == 0 ) - { - log_failure( "Error, unexpected end of template arguments\n%s", Context.to_string() ); - Context.pop(); - return { nullptr, 0, TokType::Invalid }; - } - - eat( TokType::Operator ); - name.Length = ( (sptr)prevtok.Text + (sptr)prevtok.Length ) - (sptr)name.Text; - } - } - - if ( check( TokType::Operator ) && currtok.Text[0] == '<' ) - { - eat( TokType::Operator ); - - // Template arguments can be complex so were not validating if they are correct. - s32 level = 0; - while ( left && (currtok.Text[0] != '>' || level > 0 ) ) - { - if ( currtok.Text[0] == '<' ) - level++; - - else if ( currtok.Text[0] == '>' && level > 0 ) - level--; - - eat( currtok.Type ); - } - - if ( left == 0 ) - { - log_failure( "Error, unexpected end of template arguments\n%s", Context.to_string() ); - Context.pop(); - return { nullptr, 0, TokType::Invalid }; - } - - eat( TokType::Operator ); - - name.Length = ( (sptr)prevtok.Text + (sptr)prevtok.Length ) - (sptr)name.Text; + parse_template_args( name ); } Context.pop(); @@ -4525,6 +4505,9 @@ CodeTemplate parse_template( StrC def ) return parse_template(); } +// This is a bit of a mess, but it works +// Parsing typename is arguably one of the worst aspects of C/C++. +// This is an effort to parse it without a full blown or half-blown compliant parser. internal CodeType parse_type( bool* is_function ) { @@ -4539,8 +4522,10 @@ CodeType parse_type( bool* is_function ) Token name = { nullptr, 0, TokType::Invalid }; Token brute_sig = { currtok.Text, 0, TokType::Invalid }; + // Attributes are assumed to be before the type signature CodeAttributes attributes = parse_attributes(); + // Deal with specifiers before the type signature while ( left && currtok.is_specifier() ) { SpecifierT spec = ESpecifier::to_type( currtok ); @@ -4564,9 +4549,11 @@ CodeType parse_type( bool* is_function ) return CodeInvalid; } + // All kinds of nonsense can makeup a type signature, first we check for a in-place definition of a class, enum, or struct if ( currtok.Type == TokType::Decl_Class || currtok.Type == TokType::Decl_Enum - || currtok.Type == TokType::Decl_Struct ) + || currtok.Type == TokType::Decl_Struct + || currtok.Type == TokType::Decl_Union ) { name = currtok; eat( currtok.Type ); @@ -4575,6 +4562,8 @@ CodeType parse_type( bool* is_function ) eat( TokType::Identifier ); Context.Scope->Name = name; } + + // Check if native type keywords are used, eat them for the signature. else if ( currtok.Type >= TokType::Type_Unsigned && currtok.Type <= TokType::Type_MS_W64 ) { name = currtok; @@ -4588,6 +4577,8 @@ CodeType parse_type( bool* is_function ) name.Length = ( (sptr)prevtok.Text + prevtok.Length ) - (sptr)name.Text; Context.Scope->Name = name; } + + // The usual Identifier type signature that may have namespace qualifiers else { name = parse_identifier(); @@ -4598,29 +4589,6 @@ CodeType parse_type( bool* is_function ) Context.pop(); return CodeInvalid; } - - // Problably dealing with a templated symbol - if ( currtok.Type == TokType::Operator && currtok.Text[0] == '<' && currtok.Length == 1 ) - { - eat( TokType::Operator ); - - s32 level = 0; - while ( left && ( currtok.Text[0] != '>' || level > 0 )) - { - if ( currtok.Text[0] == '<' ) - level++; - - if ( currtok.Text[0] == '>' ) - level--; - - eat( currtok.Type ); - } - - eat( TokType::Operator ); - - // Extend length of name to last token - name.Length = ( (sptr)prevtok.Text + prevtok.Length ) - (sptr)name.Text; - } } while ( left && currtok.is_specifier() ) @@ -4642,10 +4610,14 @@ CodeType parse_type( bool* is_function ) eat( currtok.Type ); } + // For function type signatures + CodeType return_type = NoCode; + CodeParam params = NoCode; + bool is_first_capture = true; while ( check( TokType::Capture_Start ) && context_tok.Type != TokType::Decl_Operator ) { - // Brute force capture the entire thing. + // Brute force capture the entire thing // Function typedefs are complicated and there are not worth dealing with for validation at this point... eat( TokType::Capture_Start ); @@ -4704,9 +4676,9 @@ CodeType parse_type( bool* is_function ) } } - String - name_stripped = String::make( GlobalAllocator, name ); - name_stripped.strip_space(); + // This is bad we cannot strip the name if it contains the full function signature's parameters, parameters at minimum must be separate. + String name_stripped = String::make( GlobalAllocator, name ); + // name_stripped.strip_space(); result->Name = get_cached_string( name_stripped ); @@ -4716,6 +4688,12 @@ CodeType parse_type( bool* is_function ) if ( is_param_pack ) result->IsParamPack = true; + if ( return_type ) + result->ReturnType = return_type; + + if ( params ) + result->Params = params; + Context.pop(); return result; }