From 0b03b3cd925588a816fff693292c37a5c5e61ed0 Mon Sep 17 00:00:00 2001 From: Ed_ Date: Sun, 15 Dec 2024 13:39:00 -0500 Subject: [PATCH] Unreal parsing support: Added MF_Allow_As_Attribute & MF_Allow_As_Definition --- base/components/ast_types.hpp | 3 +- base/components/interface.cpp | 3 ++ base/components/lexer.cpp | 43 ++++++----------------- base/components/parser.cpp | 58 ++++++++++++++++++++++++++++++-- base/components/parser_types.hpp | 18 ++++++++-- 5 files changed, 88 insertions(+), 37 deletions(-) diff --git a/base/components/ast_types.hpp b/base/components/ast_types.hpp index 43a0b80..5efd39a 100644 --- a/base/components/ast_types.hpp +++ b/base/components/ast_types.hpp @@ -42,13 +42,14 @@ struct AST_Body }; static_assert( sizeof(AST_Body) == sizeof(AST), "ERROR: AST_Body is not the same size as AST"); +// TODO(Ed): Support chaining attributes (Use parameter linkage pattern) struct AST_Attributes { union { char _PAD_[ sizeof(Specifier) * AST_ArrSpecs_Cap + sizeof(AST*) ]; StrCached Content; }; - StrCached Name; + StrCached Name; Code Prev; Code Next; Token* Tok; diff --git a/base/components/interface.cpp b/base/components/interface.cpp index 74ac369..83e44e6 100644 --- a/base/components/interface.cpp +++ b/base/components/interface.cpp @@ -473,6 +473,7 @@ void register_macro( Macro macro ) { GEN_ASSERT_NOT_NULL(macro.Name.Ptr); GEN_ASSERT(macro.Name.Len > 0); u32 key = crc32( macro.Name.Ptr, macro.Name.Len ); + macro.Name = cache_str(macro.Name); hashtable_set( _ctx->Macros, key, macro ); } @@ -486,6 +487,7 @@ void register_macros( s32 num, ... ) Macro macro = va_arg(va, Macro); GEN_ASSERT_NOT_NULL(macro.Name.Ptr); GEN_ASSERT(macro.Name.Len > 0); + macro.Name = cache_str(macro.Name); u32 key = crc32( macro.Name.Ptr, macro.Name.Len ); hashtable_set( _ctx->Macros, key, macro ); @@ -502,6 +504,7 @@ void register_macros( s32 num, Macro* macros ) Macro macro = * macros; GEN_ASSERT_NOT_NULL(macro.Name.Ptr); GEN_ASSERT(macro.Name.Len > 0); + macro.Name = cache_str(macro.Name); u32 key = crc32( macro.Name.Ptr, macro.Name.Len ); hashtable_set( _ctx->Macros, key, macro ); diff --git a/base/components/lexer.cpp b/base/components/lexer.cpp index 289983b..1d7f4ee 100644 --- a/base/components/lexer.cpp +++ b/base/components/lexer.cpp @@ -532,45 +532,24 @@ void lex_found_token( LexContext* ctx ) ctx->token.Type = macrotype_to_toktype(macro->Type); b32 is_functional = macro_is_functional(* macro); resolved_to_macro = has_args ? is_functional : ! is_functional; + if ( ! resolved_to_macro ) { + log_fmt("Info(%d, %d): %S identified as a macro but usage here does not resolve to one (interpreting as identifier)\n" + , ctx->token.Line + , ctx->token.Line + , macro->Name + ); + } } if ( resolved_to_macro ) { // TODO(Ed): When we introduce a macro AST (and expression support), we'll properly lex this section. // Want to ignore any arguments the define may have as they can be execution expressions. - if ( has_args ) - { + if ( has_args ) { ctx->token.Flags |= TF_Macro_Functional; - - // move_forward(); - // ctx->token.Text.Len++; - - // s32 level = 0; - // while ( ctx->left && ((* ctx->scanner) != ')' || level > 0) ) - // { - // if ( (* ctx->scanner) == '(' ) - // level++; - - // else if ( (* ctx->scanner) == ')' && level > 0 ) - // level--; - - // move_forward(); - // ctx->token.Text.Len++; - // } - - // move_forward(); - // ctx->token.Text.Len++; } - - //if ( (* ctx->scanner) == '\r' && ctx->scanner[1] == '\n' ) - //{ - // move_forward(); - // ctx->token..Text.Length++; - //} - //else if ( (* ctx->scanner) == '\n' ) - //{ - // move_forward(); - // ctx->token..Text.Length++; - //} + if ( bitfield_is_set(MacroFlags, macro->Flags, MF_Allow_As_Attribute) ) { + ctx->token.Flags |= TF_Attribute; + } } else { diff --git a/base/components/parser.cpp b/base/components/parser.cpp index 8cbd4d8..be520ba 100644 --- a/base/components/parser.cpp +++ b/base/components/parser.cpp @@ -204,6 +204,7 @@ internal CodeBody parse_global_nspace ( CodeType which internal Code parse_global_nspace_constructor_destructor( CodeSpecifiers specifiers ); internal Token parse_identifier ( bool* possible_member_function ); internal CodeInclude parse_include (); +internal Code parse_macro_as_definiton ( CodeAttributes attributes, CodeSpecifiers specifiers ); internal CodeOperator parse_operator_after_ret_type ( ModuleFlag mflags, CodeAttributes attributes, CodeSpecifiers specifiers, CodeTypename ret_type ); internal Code parse_operator_function_or_variable( bool expects_function, CodeAttributes attributes, CodeSpecifiers specifiers ); internal CodePragma parse_pragma (); @@ -578,7 +579,7 @@ CodeAttributes parse_attributes() s32 len = 0; // There can be more than one attribute. If there is flatten them to a single string. - // TODO(Ed): Support keeping an linked list of attributes similar to parameters + // TODO(Ed): Support chaining attributes (Use parameter linkage pattern) while ( left && tok_is_attribute(currtok) ) { if ( check( Tok_Attribute_Open ) ) @@ -2434,6 +2435,12 @@ Code parse_operator_function_or_variable( bool expects_function, CodeAttributes Code result = InvalidCode; + Code macro_stmt = parse_macro_as_definiton(attributes, specifiers); + if (macro_stmt) { + parser_pop(& _ctx->parser); + return macro_stmt; + } + CodeTypename type = parser_parse_type( parser_not_from_template, nullptr ); // @@ -2506,6 +2513,36 @@ Code parse_operator_function_or_variable( bool expects_function, CodeAttributes return result; } +internal +Code parse_macro_as_definiton( CodeAttributes attributes, CodeSpecifiers specifiers ) +{ + push_scope(); + + if (currtok.Type != Tok_Preprocess_Macro_Stmt ) { + parser_pop(& _ctx->parser); + return NullCode; + } + Macro* macro = lookup_macro(currtok.Text); + b32 can_resolve_to_definition = macro && bitfield_is_set(MacroFlags, macro->Flags, MF_Allow_As_Definition); + if ( ! can_resolve_to_definition) { + parser_pop(& _ctx->parser); + return NullCode; + } + + // TODO(Ed): When AST_Macro is made, have it support attributs and specifiers for when its behaving as a declaration/definition. + Code code = parse_simple_preprocess( Tok_Preprocess_Macro_Stmt ); + + // Attributes and sepcifiers will be collapsed into the macro's serialization. + StrBuilder resolved_definition = strbuilder_fmt_buf(_ctx->Allocator_Temp, "%S %S %S" + , attributes ? strbuilder_to_str( attributes_to_strbuilder(attributes)) : txt("") + , specifiers ? strbuilder_to_str( specifiers_to_strbuilder(specifiers)) : txt("") + , code->Content + ); + Code result = untyped_str( strbuilder_to_str(resolved_definition) ); + parser_pop(& _ctx->parser); + return result; +} + internal CodePragma parse_pragma() { @@ -3761,8 +3798,11 @@ CodeEnum parser_parse_enum( bool inplace_def ) // Unreal UMETA macro support if ( currtok.Type == Tok_Preprocess_Macro_Expr ) { - eat( Tok_Preprocess_Macro_Expr ); + Code macro = parse_simple_preprocess( Tok_Preprocess_Macro_Expr ); // = + + // We're intentially ignoring this code as its going to be serialized as an untyped string with the rest of the enum "entry". + // TODO(Ed): We need a CodeEnumEntry, AST_EnumEntry types } if ( currtok.Type == Tok_Comma ) @@ -4060,6 +4100,13 @@ CodeFn parser_parse_function() } // + // Note(Ed): We're enforcing that using this codepath requires non-macro jank. + // Code macro_stmt = parse_macro_as_definiton(attributes, specifiers); + // if (macro_stmt) { + // parser_pop(& _ctx->parser); + // return macro_stmt; + // } + CodeTypename ret_type = parser_parse_type(parser_not_from_template, nullptr); if ( cast(Code, ret_type) == Code_Invalid ) { parser_pop(& _ctx->parser); @@ -5448,6 +5495,13 @@ CodeVar parser_parse_variable() } // + // Note(Ed): We're enforcing that using this codepath requires non-macro jank. + // Code macro_stmt = parse_macro_as_definiton(attributes, specifiers); + // if (macro_stmt) { + // parser_pop(& _ctx->parser); + // return macro_stmt; + // } + CodeTypename type = parser_parse_type(parser_not_from_template, nullptr); // diff --git a/base/components/parser_types.hpp b/base/components/parser_types.hpp index fd03708..9275b53 100644 --- a/base/components/parser_types.hpp +++ b/base/components/parser_types.hpp @@ -177,8 +177,22 @@ enum EMacroFlags : u16 { MF_Functional = bit(0), // Macro has parameters (args expected to be passed) MF_Expects_Body = bit(1), // Expects to assign a braced scope to its body. - MF_Allow_As_Identifier = bit(2), // lex__eat wil treat this macro as an identifier if the parser attempts to consume it as one. - // ^^^ This is a sort of kludge because we don't support push/pop macro programs rn. ^^^ + + // lex__eat wil treat this macro as an identifier if the parser attempts to consume it as one. + // ^^^ This is a kludge because we don't support push/pop macro pragmas rn. + MF_Allow_As_Identifier = bit(2), + + // lex__eat wil treat this macro as an attribute if the parser attempts to consume it as one. + // ^^^ This a kludge because unreal has a macro that behaves as both a 'statement' and an attribute (UE_DEPRECATED, PRAGMA_ENABLE_DEPRECATION_WARNINGS, etc) + // TODO(Ed): We can keep the MF_Allow_As_Attribute flag for macros, however, we need to add the ability of AST_Attributes to chain themselves. + // Its thats already a thing in the standard language anyway + // & it would allow UE_DEPRECATED, (UE_PROPERTY / UE_FUNCTION) to chain themselves as attributes of a resolved member function/varaible definition + MF_Allow_As_Attribute = bit(3), + + // When a macro is encountered after attributs and specifiers while parsing a function, or variable: + // It will consume the macro and treat it as resolving the definition. (Yes this is for Unreal Engine) + // (MUST BE OF MT_Statement TYPE) + MF_Allow_As_Definition = bit(4), MF_Null = 0, MF_UnderlyingType = GEN_U16_MAX,