Fixes to parsing for validation

Removed whitespace stripping from parse_type, prepped for doing some major changes for function signature typenames

Moved template argument parsing to its own helper function since its used in more the one spot.

Latest failure is due to stack overflow when validating parameters. (Shouldn't be hard to debug)
This commit is contained in:
Edward R. Gonzalez 2023-09-03 23:36:51 -04:00
parent f2d4ec96f0
commit 543427dfe5
3 changed files with 68 additions and 89 deletions

View File

@ -236,7 +236,7 @@ struct AST
{ {
AST* InlineCmt; // Class, Constructor, Destructor, Enum, Friend, Functon, Operator, OpCast, Struct, Typedef, Using, Variable AST* InlineCmt; // Class, Constructor, Destructor, Enum, Friend, Functon, Operator, OpCast, Struct, Typedef, Using, Variable
AST* Attributes; // Class, Enum, Function, Struct, Typedef, Union, Using, Variable AST* Attributes; // Class, Enum, Function, Struct, Typedef, Union, Using, Variable
AST* Specs; // Destructor, Function, Operator, Type symbol, Variable AST* Specs; // Destructor, Function, Operator, Typename, Variable
union { union {
AST* InitializerList; // Constructor AST* InitializerList; // Constructor
AST* ParentType; // Class, Struct AST* ParentType; // Class, Struct
@ -249,7 +249,7 @@ struct AST
AST* Params; // Constructor, Function, Operator, Template AST* Params; // Constructor, Function, Operator, Template
}; };
union { union {
AST* ArrExpr; // Type Symbol AST* ArrExpr; // Typename
AST* Body; // Class, Constructr, Destructor, Enum, Function, Namespace, Struct, Union AST* Body; // Class, Constructr, Destructor, Enum, Function, Namespace, Struct, Union
AST* Declaration; // Friend, Template AST* Declaration; // Friend, Template
AST* Value; // Parameter, Variable AST* Value; // Parameter, Variable
@ -288,7 +288,7 @@ struct AST_POD
{ {
AST* InlineCmt; // Class, Constructor, Destructor, Enum, Friend, Functon, Operator, OpCast, Struct, Typedef, Using, Variable AST* InlineCmt; // Class, Constructor, Destructor, Enum, Friend, Functon, Operator, OpCast, Struct, Typedef, Using, Variable
AST* Attributes; // Class, Enum, Function, Struct, Typename, Union, Using, Variable AST* Attributes; // Class, Enum, Function, Struct, Typename, Union, Using, Variable
AST* Specs; // Function, Operator, Type symbol, Variable AST* Specs; // Function, Operator, Typename, Variable
union { union {
AST* InitializerList; // Constructor AST* InitializerList; // Constructor
AST* ParentType; // Class, Struct AST* ParentType; // Class, Struct

View File

@ -475,7 +475,8 @@ struct AST_Type
char _PAD_CMT_[ sizeof(AST*) ]; char _PAD_CMT_[ sizeof(AST*) ];
CodeAttributes Attributes; CodeAttributes Attributes;
CodeSpecifiers Specs; CodeSpecifiers Specs;
char _PAD_PROPERTIES_[ sizeof(AST*) * 2 ]; CodeType ReturnType; // Only used for function signatures
CodeParam Params; // Only used for function signatures
Code ArrExpr; Code ArrExpr;
}; };
}; };

View File

@ -1713,6 +1713,40 @@ CodeAttributes parse_attributes()
return { nullptr }; return { nullptr };
} }
/*
This a brute-froce make all the arguments part of the token provided.
Can have in-place function signatures, regular identifiers, in-place typenames, compile-time expressions, parameter-pack expansion, etc.
This means that validation can only go so far, and so if there is any different in formatting
passed the basic stripping supported it report a soft failure.
*/
internal inline
void parse_template_args( Parser::Token& token )
{
using namespace Parser;
if ( currtok.Type == TokType::Operator && currtok.Text[0] == '<' && currtok.Length == 1 )
{
eat( TokType::Operator );
s32 level = 0;
while ( left && ( currtok.Text[0] != '>' || level > 0 ))
{
if ( currtok.Text[0] == '<' )
level++;
if ( currtok.Text[0] == '>' )
level--;
eat( currtok.Type );
}
eat( TokType::Operator );
// Extend length of name to last token
token.Length = ( (sptr)prevtok.Text + prevtok.Length ) - (sptr)token.Text;
}
}
internal internal
Parser::Token parse_identifier() Parser::Token parse_identifier()
{ {
@ -1721,9 +1755,10 @@ Parser::Token parse_identifier()
Token name = currtok; Token name = currtok;
Context.Scope->Name = name; Context.Scope->Name = name;
eat( TokType::Identifier ); eat( TokType::Identifier );
parse_template_args( name );
while ( check( TokType::Access_StaticSymbol ) ) while ( check( TokType::Access_StaticSymbol ) )
{ {
eat( TokType::Access_StaticSymbol ); eat( TokType::Access_StaticSymbol );
@ -1745,62 +1780,7 @@ Parser::Token parse_identifier()
name.Length = ( (sptr)currtok.Text + currtok.Length ) - (sptr)name.Text; name.Length = ( (sptr)currtok.Text + currtok.Length ) - (sptr)name.Text;
eat( TokType::Identifier ); eat( TokType::Identifier );
if ( check( TokType::Operator ) && currtok.Text[0] == '<' ) parse_template_args( name );
{
eat( TokType::Operator );
// Template arguments can be complex so were not validating if they are correct.
s32 level = 0;
while ( left && (currtok.Text[0] != '>' || level > 0 ) )
{
if ( currtok.Text[0] == '<' )
level++;
else if ( currtok.Text[0] == '>' && level > 0 )
level--;
eat( currtok.Type );
}
if ( left == 0 )
{
log_failure( "Error, unexpected end of template arguments\n%s", Context.to_string() );
Context.pop();
return { nullptr, 0, TokType::Invalid };
}
eat( TokType::Operator );
name.Length = ( (sptr)prevtok.Text + (sptr)prevtok.Length ) - (sptr)name.Text;
}
}
if ( check( TokType::Operator ) && currtok.Text[0] == '<' )
{
eat( TokType::Operator );
// Template arguments can be complex so were not validating if they are correct.
s32 level = 0;
while ( left && (currtok.Text[0] != '>' || level > 0 ) )
{
if ( currtok.Text[0] == '<' )
level++;
else if ( currtok.Text[0] == '>' && level > 0 )
level--;
eat( currtok.Type );
}
if ( left == 0 )
{
log_failure( "Error, unexpected end of template arguments\n%s", Context.to_string() );
Context.pop();
return { nullptr, 0, TokType::Invalid };
}
eat( TokType::Operator );
name.Length = ( (sptr)prevtok.Text + (sptr)prevtok.Length ) - (sptr)name.Text;
} }
Context.pop(); Context.pop();
@ -4525,6 +4505,9 @@ CodeTemplate parse_template( StrC def )
return parse_template(); return parse_template();
} }
// This is a bit of a mess, but it works
// Parsing typename is arguably one of the worst aspects of C/C++.
// This is an effort to parse it without a full blown or half-blown compliant parser.
internal internal
CodeType parse_type( bool* is_function ) CodeType parse_type( bool* is_function )
{ {
@ -4539,8 +4522,10 @@ CodeType parse_type( bool* is_function )
Token name = { nullptr, 0, TokType::Invalid }; Token name = { nullptr, 0, TokType::Invalid };
Token brute_sig = { currtok.Text, 0, TokType::Invalid }; Token brute_sig = { currtok.Text, 0, TokType::Invalid };
// Attributes are assumed to be before the type signature
CodeAttributes attributes = parse_attributes(); CodeAttributes attributes = parse_attributes();
// Deal with specifiers before the type signature
while ( left && currtok.is_specifier() ) while ( left && currtok.is_specifier() )
{ {
SpecifierT spec = ESpecifier::to_type( currtok ); SpecifierT spec = ESpecifier::to_type( currtok );
@ -4564,9 +4549,11 @@ CodeType parse_type( bool* is_function )
return CodeInvalid; return CodeInvalid;
} }
// All kinds of nonsense can makeup a type signature, first we check for a in-place definition of a class, enum, or struct
if ( currtok.Type == TokType::Decl_Class if ( currtok.Type == TokType::Decl_Class
|| currtok.Type == TokType::Decl_Enum || currtok.Type == TokType::Decl_Enum
|| currtok.Type == TokType::Decl_Struct ) || currtok.Type == TokType::Decl_Struct
|| currtok.Type == TokType::Decl_Union )
{ {
name = currtok; name = currtok;
eat( currtok.Type ); eat( currtok.Type );
@ -4575,6 +4562,8 @@ CodeType parse_type( bool* is_function )
eat( TokType::Identifier ); eat( TokType::Identifier );
Context.Scope->Name = name; Context.Scope->Name = name;
} }
// Check if native type keywords are used, eat them for the signature.
else if ( currtok.Type >= TokType::Type_Unsigned && currtok.Type <= TokType::Type_MS_W64 ) else if ( currtok.Type >= TokType::Type_Unsigned && currtok.Type <= TokType::Type_MS_W64 )
{ {
name = currtok; name = currtok;
@ -4588,6 +4577,8 @@ CodeType parse_type( bool* is_function )
name.Length = ( (sptr)prevtok.Text + prevtok.Length ) - (sptr)name.Text; name.Length = ( (sptr)prevtok.Text + prevtok.Length ) - (sptr)name.Text;
Context.Scope->Name = name; Context.Scope->Name = name;
} }
// The usual Identifier type signature that may have namespace qualifiers
else else
{ {
name = parse_identifier(); name = parse_identifier();
@ -4598,29 +4589,6 @@ CodeType parse_type( bool* is_function )
Context.pop(); Context.pop();
return CodeInvalid; return CodeInvalid;
} }
// Problably dealing with a templated symbol
if ( currtok.Type == TokType::Operator && currtok.Text[0] == '<' && currtok.Length == 1 )
{
eat( TokType::Operator );
s32 level = 0;
while ( left && ( currtok.Text[0] != '>' || level > 0 ))
{
if ( currtok.Text[0] == '<' )
level++;
if ( currtok.Text[0] == '>' )
level--;
eat( currtok.Type );
}
eat( TokType::Operator );
// Extend length of name to last token
name.Length = ( (sptr)prevtok.Text + prevtok.Length ) - (sptr)name.Text;
}
} }
while ( left && currtok.is_specifier() ) while ( left && currtok.is_specifier() )
@ -4642,10 +4610,14 @@ CodeType parse_type( bool* is_function )
eat( currtok.Type ); eat( currtok.Type );
} }
// For function type signatures
CodeType return_type = NoCode;
CodeParam params = NoCode;
bool is_first_capture = true; bool is_first_capture = true;
while ( check( TokType::Capture_Start ) && context_tok.Type != TokType::Decl_Operator ) while ( check( TokType::Capture_Start ) && context_tok.Type != TokType::Decl_Operator )
{ {
// Brute force capture the entire thing. // Brute force capture the entire thing
// Function typedefs are complicated and there are not worth dealing with for validation at this point... // Function typedefs are complicated and there are not worth dealing with for validation at this point...
eat( TokType::Capture_Start ); eat( TokType::Capture_Start );
@ -4704,9 +4676,9 @@ CodeType parse_type( bool* is_function )
} }
} }
String // This is bad we cannot strip the name if it contains the full function signature's parameters, parameters at minimum must be separate.
name_stripped = String::make( GlobalAllocator, name ); String name_stripped = String::make( GlobalAllocator, name );
name_stripped.strip_space(); // name_stripped.strip_space();
result->Name = get_cached_string( name_stripped ); result->Name = get_cached_string( name_stripped );
@ -4716,6 +4688,12 @@ CodeType parse_type( bool* is_function )
if ( is_param_pack ) if ( is_param_pack )
result->IsParamPack = true; result->IsParamPack = true;
if ( return_type )
result->ReturnType = return_type;
if ( params )
result->Params = params;
Context.pop(); Context.pop();
return result; return result;
} }