#ifdef GEN_INTELLISENSE_DIRECTIVES #pragma once #include "gen/etoktype.cpp" #include "interface.upfront.cpp" #include "lexer.cpp" #endif GEN_NS_PARSER_BEGIN // TODO(Ed) : Rename ETok_Capture_Start, ETok_Capture_End to Open_Parenthesis adn Close_Parenthesis constexpr bool dont_skip_formatting = false; constexpr bool skip_formatting = true; struct StackNode { StackNode* Prev; Token Start; Token Name; // The name of the AST node (if parsed) StrC ProcName; // The name of the procedure }; struct ParseContext { TokArray Tokens; StackNode* Scope; }; void push( ParseContext* ctx, StackNode* node ) { node->Prev = ctx->Scope; ctx->Scope = node; #if 0 && Build_Debug log_fmt("\tEntering Context: %.*s\n", Scope->ProcName.Len, Scope->ProcName.Ptr ); #endif } void pop(ParseContext* ctx) { #if 0 && Build_Debug log_fmt("\tPopping Context: %.*s\n", Scope->ProcName.Len, Scope->ProcName.Ptr ); #endif ctx->Scope = ctx->Scope->Prev; } String to_string(ParseContext ctx) { String result = string_make_reserve( GlobalAllocator, kilobytes(4) ); Token scope_start = ctx.Scope->Start; Token last_valid = ctx.Tokens.Idx >= array_num(ctx.Tokens.Arr) ? ctx.Tokens.Arr[array_num(ctx.Tokens.Arr) -1] : (* current(& ctx.Tokens, true)); sptr length = scope_start.Length; char const* current = scope_start.Text + length; while ( current <= array_back( & ctx.Tokens.Arr)->Text && *current != '\n' && length < 74 ) { current++; length++; } String line = string_make_strc( GlobalAllocator, { length, scope_start.Text } ); string_append_fmt( & result, "\tScope : %s\n", line ); string_free(& line); sptr dist = (sptr)last_valid.Text - (sptr)scope_start.Text + 2; sptr length_from_err = dist; String line_from_err = string_make_strc( GlobalAllocator, { length_from_err, last_valid.Text } ); if ( length_from_err < 100 ) string_append_fmt(& result, "\t(%d, %d):%*c\n", last_valid.Line, last_valid.Column, length_from_err, '^' ); else string_append_fmt(& result, "\t(%d, %d)\n", last_valid.Line, last_valid.Column ); StackNode* curr_scope = ctx.Scope; s32 level = 0; do { if ( is_valid(curr_scope->Name) ) { string_append_fmt(& result, "\t%d: %s, AST Name: %.*s\n", level, curr_scope->ProcName.Ptr, curr_scope->Name.Length, curr_scope->Name.Text ); } else { string_append_fmt(& result, "\t%d: %s\n", level, curr_scope->ProcName.Ptr ); } curr_scope = curr_scope->Prev; level++; } while ( curr_scope ); return result; } global ParseContext Context; bool __eat(TokArray* self, TokType type ) { if ( array_num(self->Arr) - self->Idx <= 0 ) { log_failure( "No tokens left.\n%s", to_string(Context) ); return false; } Token at_idx = self->Arr[ self->Idx ]; if ( ( at_idx.Type == Tok_NewLine && type != Tok_NewLine ) || ( at_idx.Type == Tok_Comment && type != Tok_Comment ) ) { self->Idx ++; } if ( at_idx.Type != type ) { Token tok = * current( self, skip_formatting ); log_failure( "Parse Error, TokArray::eat, Expected: ' %s ' not ' %.*s ' (%d, %d)`\n%s" , to_str(type).Ptr , at_idx.Length, at_idx.Text , tok.Line , tok.Column , to_string(Context) ); return false; } #if 0 && Build_Debug log_fmt("Ate: %S\n", self->Arr[Idx].to_string() ); #endif self->Idx ++; return true; } internal void init() { Tokens = array_init_reserve(Token, arena_allocator_info( & LexArena) , ( LexAllocator_Size - sizeof( ArrayHeader ) ) / sizeof(Token) ); fixed_arena_init(& defines_map_arena); defines = hashtable_init_reserve(StrC, allocator_info( & defines_map_arena), 256 ); } internal void deinit() { parser::Tokens = { nullptr }; } #pragma region Helper Macros #define check_parse_args( def ) _check_parse_args(def, stringize(_func_) ) bool _check_parse_args( StrC def, char const* func_name ) { if ( def.Len <= 0 ) { log_failure( str_fmt_buf("gen::%s: length must greater than 0", func_name) ); pop(& Context); return false; } if ( def.Ptr == nullptr ) { log_failure( str_fmt_buf("gen::%s: def was null", func_name) ); pop(& Context); return false; } return true; } # define currtok_noskip (* current( & Context.Tokens, dont_skip_formatting )) # define currtok (* current( & Context.Tokens, skip_formatting )) # define prevtok (* previous( Context.Tokens, dont_skip_formatting)) # define nexttok (* next( Context.Tokens, skip_formatting )) # define eat( Type_ ) __eat( & Context.Tokens, Type_ ) # define left ( array_num(Context.Tokens.Arr) - Context.Tokens.Idx ) #ifdef check #define CHECK_WAS_DEFINED #pragma push_macro("check") #undef check #endif # define check_noskip( Type_ ) ( left && currtok_noskip.Type == Type_ ) # define check( Type_ ) ( left && currtok.Type == Type_ ) # define push_scope() \ parser::StackNode scope { nullptr, currtok_noskip, parser::NullToken, txt( __func__ ) }; \ push( & parser::Context, & scope ) #pragma endregion Helper Macros // Procedure Forwards ( Entire parser internal parser interface ) internal Code parse_array_decl (); internal CodeAttributes parse_attributes (); internal CodeComment parse_comment (); internal Code parse_complicated_definition ( TokType which ); internal CodeBody parse_class_struct_body ( TokType which, Token name = NullToken ); internal Code parse_class_struct ( TokType which, bool inplace_def ); internal CodeDefine parse_define (); internal Code parse_expression (); internal Code parse_forward_or_definition ( TokType which, bool is_inplace ); internal CodeFn parse_function_after_name ( ModuleFlag mflags, CodeAttributes attributes, CodeSpecifiers specifiers, CodeTypename ret_type, Token name ); internal Code parse_function_body (); internal Code parse_global_nspace (); internal Code parse_global_nspace_constructor_destructor( CodeSpecifiers specifiers ); internal Token parse_identifier ( bool* possible_member_function = nullptr ); internal CodeInclude parse_include (); internal CodeOperator parse_operator_after_ret_type ( ModuleFlag mflags, CodeAttributes attributes, CodeSpecifiers specifiers, CodeTypename ret_type ); internal Code parse_operator_function_or_variable( bool expects_function, CodeAttributes attributes, CodeSpecifiers specifiers ); internal CodePragma parse_pragma (); internal CodeParam parse_params ( bool use_template_capture = false ); internal CodePreprocessCond parse_preprocess_cond (); internal Code parse_simple_preprocess ( TokType which ); internal Code parse_static_assert (); internal void parse_template_args ( Token& token ); internal CodeVar parse_variable_after_name ( ModuleFlag mflags, CodeAttributes attributes, CodeSpecifiers specifiers, CodeTypename type, StrC name ); internal CodeVar parse_variable_declaration_list (); internal CodeClass parse_class ( bool inplace_def = false ); internal CodeConstructor parse_constructor ( CodeSpecifiers specifiers ); internal CodeDestructor parse_destructor ( CodeSpecifiers specifiers = NullCode ); internal CodeEnum parse_enum ( bool inplace_def = false ); internal CodeBody parse_export_body (); internal CodeBody parse_extern_link_body(); internal CodeExtern parse_extern_link (); internal CodeFriend parse_friend (); internal CodeFn parse_function (); internal CodeNS parse_namespace (); internal CodeOpCast parse_operator_cast ( CodeSpecifiers specifiers = NullCode ); internal CodeStruct parse_struct ( bool inplace_def = false ); internal CodeVar parse_variable (); internal CodeTemplate parse_template (); internal CodeTypename parse_type ( bool from_template = false, bool* is_function = nullptr ); internal CodeTypedef parse_typedef (); internal CodeUnion parse_union ( bool inplace_def = false ); internal CodeUsing parse_using (); constexpr bool inplace_def = true; // Internal parsing functions constexpr bool strip_formatting_dont_preserve_newlines = false; /* This function was an attempt at stripping formatting from any c++ code. It has edge case failures that prevent it from being used in function bodies. */ internal String strip_formatting( StrC raw_text, bool preserve_newlines = true ) { String content = string_make_reserve( GlobalAllocator, raw_text.Len ); if ( raw_text.Len == 0 ) return content; #define cut_length ( scanner - raw_text.Ptr - last_cut ) #define cut_ptr ( raw_text.Ptr + last_cut ) #define pos ( sptr( scanner ) - sptr( raw_text.Ptr ) ) #define move_fwd() do { scanner++; tokleft--; } while(0) s32 tokleft = raw_text.Len; sptr last_cut = 0; char const* scanner = raw_text.Ptr; if ( scanner[0] == ' ' ) { move_fwd(); last_cut = 1; } bool within_string = false; bool within_char = false; bool must_keep_newline = false; while ( tokleft ) { // Skip over the content of string literals if ( scanner[0] == '"' ) { move_fwd(); while ( tokleft && ( scanner[0] != '"' || *( scanner - 1 ) == '\\' ) ) { if ( scanner[0] == '\\' && tokleft > 1 ) { scanner += 2; tokleft -= 2; } else { move_fwd(); } } // Skip the closing " if ( tokleft ) move_fwd(); string_append_c_str_len( & content, cut_ptr, cut_length ); last_cut = sptr( scanner ) - sptr( raw_text.Ptr ); continue; } // Skip over the content of character literals if ( scanner[0] == '\'' ) { move_fwd(); while ( tokleft && ( scanner[0] != '\'' || ( *(scanner -1 ) == '\\' ) ) ) { move_fwd(); } // Skip the closing ' if ( tokleft ) move_fwd(); string_append_c_str_len( & content, cut_ptr, cut_length ); last_cut = sptr( scanner ) - sptr( raw_text.Ptr ); continue; } // Block comments if ( tokleft > 1 && scanner[0] == '/' && scanner[1] == '*' ) { while ( tokleft > 1 && !(scanner[0] == '*' && scanner[1] == '/') ) move_fwd(); scanner += 2; tokleft -= 2; string_append_c_str_len( & content, cut_ptr, cut_length ); last_cut = sptr( scanner ) - sptr( raw_text.Ptr ); continue; } // Line comments if ( tokleft > 1 && scanner[0] == '/' && scanner[1] == '/' ) { must_keep_newline = true; scanner += 2; tokleft -= 2; while ( tokleft && scanner[ 0 ] != '\n' ) move_fwd(); if (tokleft) move_fwd(); string_append_c_str_len( & content, cut_ptr, cut_length ); last_cut = sptr( scanner ) - sptr( raw_text.Ptr ); continue; } // Tabs if (scanner[0] == '\t') { if (pos > last_cut) string_append_c_str_len( & content, cut_ptr, cut_length); if ( * string_back( content ) != ' ' ) string_append_char( & content, ' ' ); move_fwd(); last_cut = sptr(scanner) - sptr(raw_text.Ptr); continue; } if ( tokleft > 1 && scanner[0] == '\r' && scanner[1] == '\n' ) { if ( must_keep_newline || preserve_newlines ) { must_keep_newline = false; scanner += 2; tokleft -= 2; string_append_c_str_len( & content, cut_ptr, cut_length ); last_cut = sptr( scanner ) - sptr( raw_text.Ptr ); continue; } if ( pos > last_cut ) string_append_c_str_len( & content, cut_ptr, cut_length ); // Replace with a space if ( * string_back( content ) != ' ' ) string_append_char( & content, ' ' ); scanner += 2; tokleft -= 2; last_cut = sptr( scanner ) - sptr( raw_text.Ptr ); continue; } if ( scanner[0] == '\n' ) { if ( must_keep_newline || preserve_newlines ) { must_keep_newline = false; move_fwd(); string_append_c_str_len( & content, cut_ptr, cut_length ); last_cut = sptr( scanner ) - sptr( raw_text.Ptr ); continue; } if ( pos > last_cut ) string_append_c_str_len( & content, cut_ptr, cut_length ); // Replace with a space if ( * string_back( content ) != ' ' ) string_append_char( & content, ' ' ); move_fwd(); last_cut = sptr( scanner ) - sptr( raw_text.Ptr ); continue; } // Escaped newlines if ( scanner[0] == '\\' ) { string_append_c_str_len( & content, cut_ptr, cut_length ); s32 amount_to_skip = 1; if ( tokleft > 1 && scanner[1] == '\n' ) { amount_to_skip = 2; } else if ( tokleft > 2 && scanner[1] == '\r' && scanner[2] == '\n' ) { amount_to_skip = 3; } if ( amount_to_skip > 1 && pos == last_cut ) { scanner += amount_to_skip; tokleft -= amount_to_skip; } else move_fwd(); last_cut = sptr( scanner ) - sptr( raw_text.Ptr ); continue; } // Consectuive spaces if ( tokleft > 1 && char_is_space( scanner[0] ) && char_is_space( scanner[ 1 ] ) ) { string_append_c_str_len( & content, cut_ptr, cut_length ); do { move_fwd(); } while ( tokleft && char_is_space( scanner[0] ) ); last_cut = sptr( scanner ) - sptr( raw_text.Ptr ); // Preserve only 1 space of formattting char* last = string_back(content); if ( last == nullptr || * last != ' ' ) string_append_char( & content, ' ' ); continue; } move_fwd(); } if ( last_cut < raw_text.Len ) { string_append_c_str_len( & content, cut_ptr, raw_text.Len - last_cut ); } #undef cut_ptr #undef cut_length #undef pos #undef move_fwd return content; } internal Code parse_array_decl() { push_scope(); if ( check( Tok_Operator ) && currtok.Text[0] == '[' && currtok.Text[1] == ']' ) { Code array_expr = untyped_str( to_str(currtok) ); eat( Tok_Operator ); // [] pop(& Context); return array_expr; } if ( check( Tok_BraceSquare_Open ) ) { eat( Tok_BraceSquare_Open ); // [ if ( left == 0 ) { log_failure( "Error, unexpected end of array declaration ( '[]' scope started )\n%s", to_string(Context) ); pop(& Context); return InvalidCode; } if ( currtok.Type == Tok_BraceSquare_Close ) { log_failure( "Error, empty array expression in definition\n%s", to_string(Context) ); pop(& Context); return InvalidCode; } Token untyped_tok = currtok; while ( left && currtok.Type != Tok_BraceSquare_Close ) { eat( currtok.Type ); } untyped_tok.Length = ( (sptr)prevtok.Text + prevtok.Length ) - (sptr)untyped_tok.Text; Code array_expr = untyped_str( to_str(untyped_tok) ); // [ if ( left == 0 ) { log_failure( "Error, unexpected end of array declaration, expected ]\n%s", to_string(Context) ); pop(& Context); return InvalidCode; } if ( currtok.Type != Tok_BraceSquare_Close ) { log_failure( "%s: Error, expected ] in array declaration, not %s\n%s", to_str( currtok.Type ), to_string(Context) ); pop(& Context); return InvalidCode; } eat( Tok_BraceSquare_Close ); // [ ] // Its a multi-dimensional array if ( check( Tok_BraceSquare_Open )) { Code adjacent_arr_expr = parse_array_decl(); // [ ][ ]... array_expr->Next.ast = adjacent_arr_expr.ast; } pop(& Context); return array_expr; } pop(& Context); return { nullptr }; } internal inline CodeAttributes parse_attributes() { push_scope(); Token start = currtok; s32 len = 0; // There can be more than one attribute. If there is flatten them to a single string. // TODO(Ed): Support keeping an linked list of attributes similar to parameters while ( left && is_attribute(currtok) ) { if ( check( Tok_Attribute_Open ) ) { eat( Tok_Attribute_Open ); // [[ while ( left && currtok.Type != Tok_Attribute_Close ) { eat( currtok.Type ); } // [[ eat( Tok_Attribute_Close ); // [[ ]] len = ( ( sptr )prevtok.Text + prevtok.Length ) - ( sptr )start.Text; } else if ( check( Tok_Decl_GNU_Attribute ) ) { eat( Tok_Decl_GNU_Attribute ); eat( Tok_Capture_Start ); eat( Tok_Capture_Start ); // __attribute__(( while ( left && currtok.Type != Tok_Capture_End ) { eat( currtok.Type ); } // __attribute__(( eat( Tok_Capture_End ); eat( Tok_Capture_End ); // __attribute__(( )) len = ( ( sptr )prevtok.Text + prevtok.Length ) - ( sptr )start.Text; } else if ( check( Tok_Decl_MSVC_Attribute ) ) { eat( Tok_Decl_MSVC_Attribute ); eat( Tok_Capture_Start ); // __declspec( while ( left && currtok.Type != Tok_Capture_End ) { eat( currtok.Type ); } // __declspec( eat( Tok_Capture_End ); // __declspec( ) len = ( ( sptr )prevtok.Text + prevtok.Length ) - ( sptr )start.Text; } else if ( is_attribute(currtok) ) { eat( currtok.Type ); // // If its a macro based attribute, this could be a functional macro such as Unreal's UE_DEPRECATED(...) if ( check( Tok_Capture_Start)) { eat( Tok_Capture_Start ); s32 level = 0; while (left && currtok.Type != Tok_Capture_End && level == 0) { if (currtok.Type == Tok_Capture_Start) ++ level; if (currtok.Type == Tok_Capture_End) --level; eat(currtok.Type); } eat(Tok_Capture_End); } len = ( ( sptr )prevtok.Text + prevtok.Length ) - ( sptr )start.Text; // ( ... ) } } if ( len > 0 ) { StrC attribute_txt = { len, start.Text }; pop(& Context); String name_stripped = strip_formatting( attribute_txt, strip_formatting_dont_preserve_newlines ); Code result = make_code(); result->Type = CT_PlatformAttributes; result->Name = get_cached_string( { string_length(name_stripped), name_stripped } ); result->Content = result->Name; // result->Token = return ( CodeAttributes )result; } pop(& Context); return { nullptr }; } internal Code parse_class_struct( TokType which, bool inplace_def = false ) { if ( which != Tok_Decl_Class && which != Tok_Decl_Struct ) { log_failure( "Error, expected class or struct, not %s\n%s", to_str( which ), to_string(Context) ); return InvalidCode; } Token name { nullptr, 0, Tok_Invalid }; AccessSpec access = AccessSpec_Default; CodeTypename parent = { nullptr }; CodeBody body = { nullptr }; CodeAttributes attributes = { nullptr }; ModuleFlag mflags = ModuleFlag_None; CodeClass result = InvalidCode; if ( check(Tok_Module_Export) ) { mflags = ModuleFlag_Export; eat( Tok_Module_Export ); } // eat( which ); // attributes = parse_attributes(); // if ( check( Tok_Identifier ) ) { name = parse_identifier(); Context.Scope->Name = name; } // local_persist char interface_arr_mem[ kilobytes(4) ] {0}; Array interfaces; { Arena arena = arena_init_from_memory( interface_arr_mem, kilobytes(4) ); interfaces = array_init_reserve(CodeTypename, arena_allocator_info(& arena), 4 ); } // TODO(Ed) : Make an AST_DerivedType, we'll store any arbitary derived type into there as a linear linked list of them. if ( check( Tok_Assign_Classifer ) ) { eat( Tok_Assign_Classifer ); // : if ( is_access_specifier(currtok) ) { access = to_access_specifier(currtok); // : eat( currtok.Type ); } Token parent_tok = parse_identifier(); parent = def_type( to_str(parent_tok) ); // : while ( check(Tok_Comma) ) { eat( Tok_Comma ); // : , if ( is_access_specifier(currtok) ) { eat(currtok.Type); } Token interface_tok = parse_identifier(); array_append( & interfaces, def_type( to_str(interface_tok) ) ); // : , ... } } if ( check( Tok_BraceCurly_Open ) ) { body = parse_class_struct_body( which, name ); } // : , ... { } CodeComment inline_cmt = NullCode; if ( ! inplace_def ) { Token stmt_end = currtok; eat( Tok_Statement_End ); // : , ... { }; if ( currtok_noskip.Type == Tok_Comment && currtok_noskip.Line == stmt_end.Line ) inline_cmt = parse_comment(); // : , ... { }; } if ( which == Tok_Decl_Class ) result = def_class( to_str(name), { body, parent, access, attributes, mflags } ); else result = def_struct( to_str(name), { body, (CodeTypename)parent, access, attributes, mflags } ); if ( inline_cmt ) result->InlineCmt = inline_cmt; array_free(& interfaces); return result; } internal neverinline CodeBody parse_class_struct_body( TokType which, Token name ) { push_scope(); eat( Tok_BraceCurly_Open ); // { CodeBody result = (CodeBody) make_code(); if ( which == Tok_Decl_Class ) result->Type = CT_Class_Body; else result->Type = CT_Struct_Body; while ( left && currtok_noskip.Type != Tok_BraceCurly_Close ) { Code member = Code_Invalid; CodeAttributes attributes = { nullptr }; CodeSpecifiers specifiers = { nullptr }; bool expects_function = false; // Context.Scope->Start = currtok_noskip; if ( currtok_noskip.Type == Tok_Preprocess_Hash ) eat( Tok_Preprocess_Hash ); switch ( currtok_noskip.Type ) { case Tok_Statement_End: { // TODO(Ed): Convert this to a general warning procedure log_fmt("Dangling end statement found %S\n", to_string(currtok_noskip)); eat( Tok_Statement_End ); continue; } case Tok_NewLine: member = fmt_newline; eat( Tok_NewLine ); break; case Tok_Comment: member = parse_comment(); break; case Tok_Access_Public: member = access_public; eat( Tok_Access_Public ); eat( Tok_Assign_Classifer ); // public: break; case Tok_Access_Protected: member = access_protected; eat( Tok_Access_Protected ); eat( Tok_Assign_Classifer ); // protected: break; case Tok_Access_Private: member = access_private; eat( Tok_Access_Private ); eat( Tok_Assign_Classifer ); // private: break; case Tok_Decl_Class: member = parse_complicated_definition( Tok_Decl_Class ); // class break; case Tok_Decl_Enum: member = parse_complicated_definition( Tok_Decl_Enum ); // enum break; case Tok_Decl_Friend: member = parse_friend(); // friend break; case Tok_Decl_Operator: member = parse_operator_cast(); // operator () break; case Tok_Decl_Struct: member = parse_complicated_definition( Tok_Decl_Struct ); // struct break; case Tok_Decl_Template: member = parse_template(); // template< ... > break; case Tok_Decl_Typedef: member = parse_typedef(); // typedef break; case Tok_Decl_Union: member = parse_complicated_definition( Tok_Decl_Union ); // union break; case Tok_Decl_Using: member = parse_using(); // using break; case Tok_Operator: //if ( currtok.Text[0] != '~' ) //{ // log_failure( "Operator token found in global body but not destructor unary negation\n%s", to_string(Context) ); // return InvalidCode; //} member = parse_destructor(); // ~() break; case Tok_Preprocess_Define: member = parse_define(); // #define break; case Tok_Preprocess_Include: member = parse_include(); // #include break; case Tok_Preprocess_If: case Tok_Preprocess_IfDef: case Tok_Preprocess_IfNotDef: case Tok_Preprocess_ElIf: member = parse_preprocess_cond(); // # break; case Tok_Preprocess_Else: member = preprocess_else; eat( Tok_Preprocess_Else ); // #else break; case Tok_Preprocess_EndIf: member = preprocess_endif; eat( Tok_Preprocess_EndIf ); // #endif break; case Tok_Preprocess_Macro: member = parse_simple_preprocess( Tok_Preprocess_Macro ); // break; case Tok_Preprocess_Pragma: member = parse_pragma(); // #pragma break; case Tok_Preprocess_Unsupported: member = parse_simple_preprocess( Tok_Preprocess_Unsupported ); // # break; case Tok_StaticAssert: member = parse_static_assert(); // static_assert break; case Tok_Attribute_Open: case Tok_Decl_GNU_Attribute: case Tok_Decl_MSVC_Attribute: #define Entry( attribute, str ) case attribute: GEN_DEFINE_ATTRIBUTE_TOKENS #undef Entry { attributes = parse_attributes(); // } //! Fallthrough intended case Tok_Spec_Consteval: case Tok_Spec_Constexpr: case Tok_Spec_Constinit: case Tok_Spec_Explicit: case Tok_Spec_ForceInline: case Tok_Spec_Inline: case Tok_Spec_Mutable: case Tok_Spec_NeverInline: case Tok_Spec_Static: case Tok_Spec_Volatile: case Tok_Spec_Virtual: { Specifier specs_found[16] { Spec_NumSpecifiers }; s32 NumSpecifiers = 0; while ( left && is_specifier(currtok) ) { Specifier spec = to_specifier( to_str(currtok) ); b32 ignore_spec = false; switch ( spec ) { case Spec_Constexpr: case Spec_Constinit: case Spec_Explicit: case Spec_Inline: case Spec_ForceInline: case Spec_Mutable: case Spec_NeverInline: case Spec_Static: case Spec_Volatile: case Spec_Virtual: break; case Spec_Consteval: expects_function = true; break; case Spec_Const : ignore_spec = true; break; default: log_failure( "Invalid specifier %s for variable\n%s", to_str(spec), to_string(Context) ); pop(& Context); return InvalidCode; } // Every specifier after would be considered part of the type type signature if (ignore_spec) break; specs_found[NumSpecifiers] = spec; NumSpecifiers++; eat( currtok.Type ); } if ( NumSpecifiers ) { specifiers = def_specifiers( NumSpecifiers, specs_found ); } // if ( is_attribute(currtok) ) { // Unfortuantely Unreal has code where there is attirbutes before specifiers CodeAttributes more_attributes = parse_attributes(); if ( attributes ) { String fused = string_make_reserve( GlobalAllocator, attributes->Content.Len + more_attributes->Content.Len ); string_append_fmt( & fused, "%S %S", attributes->Content, more_attributes->Content ); attributes->Name = get_cached_string( { string_length(fused), fused }); attributes->Content = attributes->Name; // } attributes = more_attributes; } if ( currtok.Type == Tok_Operator && currtok.Text[0] == '~' ) { member = parse_destructor( specifiers ); // ~() break; } if ( currtok.Type == Tok_Decl_Operator ) { member = parse_operator_cast( specifiers ); // operator () break; } } //! Fallthrough intentional case Tok_Identifier: case Tok_Spec_Const: case Tok_Type_Unsigned: case Tok_Type_Signed: case Tok_Type_Short: case Tok_Type_Long: case Tok_Type_bool: case Tok_Type_char: case Tok_Type_int: case Tok_Type_double: { if ( nexttok.Type == Tok_Capture_Start && name.Length && currtok.Type == Tok_Identifier ) { if ( str_compare_len( name.Text, currtok.Text, name.Length ) == 0 ) { member = parse_constructor( specifiers ); // () break; } } member = parse_operator_function_or_variable( expects_function, attributes, specifiers ); // operator ... // or // ... } break; default: Token untyped_tok = currtok; while ( left && currtok.Type != Tok_BraceCurly_Close ) { untyped_tok.Length = ( (sptr)currtok.Text + currtok.Length ) - (sptr)untyped_tok.Text; eat( currtok.Type ); } member = untyped_str( to_str(untyped_tok) ); // Something unknown break; } if ( member == Code_Invalid ) { log_failure( "Failed to parse member\n%s", to_string(Context) ); pop(& Context); return InvalidCode; } append(result, member ); } eat( Tok_BraceCurly_Close ); // { } pop(& Context); return result; } internal CodeComment parse_comment() { push_scope(); CodeComment result = (CodeComment) make_code(); result->Type = CT_Comment; result->Content = get_cached_string( to_str(currtok_noskip) ); result->Name = result->Content; // result->Token = currtok_noskip; eat( Tok_Comment ); pop(& Context); return result; } internal Code parse_complicated_definition( TokType which ) { push_scope(); bool is_inplace = false; TokArray tokens = Context.Tokens; s32 idx = tokens.Idx; s32 level = 0; for ( ; idx < array_num(tokens.Arr); idx++ ) { if ( tokens.Arr[ idx ].Type == Tok_BraceCurly_Open ) level++; if ( tokens.Arr[ idx ].Type == Tok_BraceCurly_Close ) level--; if ( level == 0 && tokens.Arr[ idx ].Type == Tok_Statement_End ) break; } if ( ( idx - 2 ) == tokens.Idx ) { // Its a forward declaration only Code result = parse_forward_or_definition( which, is_inplace ); // ; pop(& Context); return result; } Token tok = tokens.Arr[ idx - 1 ]; if ( is_specifier(tok) && is_trailing( to_specifier( to_str(tok))) ) { // (...) ...; s32 spec_idx = idx - 1; Token spec = tokens.Arr[spec_idx]; while ( is_specifier(spec) && is_trailing( to_specifier( to_str(spec))) ) { -- spec_idx; spec = tokens.Arr[spec_idx]; } if ( tokens.Arr[spec_idx].Type == Tok_Capture_End ) { // Forward declaration with trailing specifiers for a procedure tok = tokens.Arr[spec_idx]; Code result = parse_operator_function_or_variable( false, { nullptr }, { nullptr } ); // , or Name> ... pop(& Context); return result; } log_failure( "Unsupported or bad member definition after %s declaration\n%s", to_str(which), to_string(Context) ); pop(& Context); return InvalidCode; } if ( tok.Type == Tok_Identifier ) { tok = tokens.Arr[ idx - 2 ]; bool is_indirection = tok.Type == Tok_Ampersand || tok.Type == Tok_Star; bool ok_to_parse = false; if ( tok.Type == Tok_BraceCurly_Close ) { // Its an inplace definition // { ... } ; ok_to_parse = true; is_inplace = true; } else if ( tok.Type == Tok_Identifier && tokens.Arr[ idx - 3 ].Type == which ) { // Its a variable with type ID using namespace. // ; ok_to_parse = true; } else if ( tok.Type == Tok_Assign_Classifer && ( ( tokens.Arr[idx - 5].Type == which && tokens.Arr[idx - 4].Type == Tok_Decl_Class ) || ( tokens.Arr[idx - 4].Type == which)) ) { // Its a forward declaration of an enum // : ; // : ; ok_to_parse = true; Code result = parse_enum(); pop(& Context); return result; } else if ( is_indirection ) { // Its a indirection type with type ID using struct namespace. // * ; ok_to_parse = true; } if ( ! ok_to_parse ) { log_failure( "Unsupported or bad member definition after %s declaration\n%s", to_str(which), to_string(Context) ); pop(& Context); return InvalidCode; } Code result = parse_operator_function_or_variable( false, { nullptr }, { nullptr } ); // , or Name> ... pop(& Context); return result; } else if ( tok.Type >= Tok_Type_Unsigned && tok.Type <= Tok_Type_MS_W64 ) { tok = tokens.Arr[ idx - 2 ]; if ( tok.Type != Tok_Assign_Classifer || ( ( tokens.Arr[idx - 5].Type != which && tokens.Arr[idx - 4].Type != Tok_Decl_Class ) && ( tokens.Arr[idx - 4].Type != which)) ) { log_failure( "Unsupported or bad member definition after %s declaration\n%s", to_str(which), to_string(Context) ); pop(& Context); return InvalidCode; } // Its a forward declaration of an enum class // : ; // : ; Code result = parse_enum(); pop(& Context); return result; } else if ( tok.Type == Tok_BraceCurly_Close ) { // Its a definition Code result = parse_forward_or_definition( which, is_inplace ); // { ... }; pop(& Context); return result; } else if ( tok.Type == Tok_BraceSquare_Close ) { // Its an array definition Code result = parse_operator_function_or_variable( false, { nullptr }, { nullptr } ); // [ ... ]; pop(& Context); return result; } else { log_failure( "Unsupported or bad member definition after %s declaration\n%S", to_str(which).Ptr, to_string(Context) ); pop(& Context); return InvalidCode; } } internal inline CodeDefine parse_define() { push_scope(); eat( Tok_Preprocess_Define ); // #define CodeDefine define = (CodeDefine) make_code(); define->Type = CT_Preprocess_Define; if ( ! check( Tok_Identifier ) ) { log_failure( "Error, expected identifier after #define\n%s", to_string(Context) ); pop(& Context); return InvalidCode; } Context.Scope->Name = currtok; define->Name = get_cached_string( to_str(currtok) ); eat( Tok_Identifier ); // #define if ( ! check( Tok_Preprocess_Content )) { log_failure( "Error, expected content after #define %s\n%s", define->Name, to_string(Context) ); pop(& Context); return InvalidCode; } if ( currtok.Length == 0 ) { define->Content = get_cached_string( to_str(currtok) ); eat( Tok_Preprocess_Content ); // #define pop(& Context); return define; } define->Content = get_cached_string( string_to_strc( strip_formatting( to_str(currtok), strip_formatting_dont_preserve_newlines )) ); eat( Tok_Preprocess_Content ); // #define pop(& Context); return define; } internal inline Code parse_assignment_expression() { Code expr = { nullptr }; eat( Tok_Operator ); // = Token expr_tok = currtok; if ( currtok.Type == Tok_Statement_End && currtok.Type != Tok_Comma ) { log_failure( "Expected expression after assignment operator\n%s", to_string(Context) ); pop(& Context); return InvalidCode; } s32 level = 0; while ( left && currtok.Type != Tok_Statement_End && (currtok.Type != Tok_Comma || level > 0) ) { if (currtok.Type == Tok_BraceCurly_Open ) level++; if (currtok.Type == Tok_BraceCurly_Close ) level--; if (currtok.Type == Tok_Capture_Start) level++; else if (currtok.Type == Tok_Capture_End) level--; eat( currtok.Type ); } expr_tok.Length = ( ( sptr )currtok.Text + currtok.Length ) - ( sptr )expr_tok.Text - 1; expr = untyped_str( to_str(expr_tok) ); // = return expr; } internal inline Code parse_forward_or_definition( TokType which, bool is_inplace ) { Code result = InvalidCode; switch ( which ) { case Tok_Decl_Class: result = parse_class( is_inplace ); return result; case Tok_Decl_Enum: result = parse_enum( is_inplace ); return result; case Tok_Decl_Struct: result = parse_struct( is_inplace ); return result; case Tok_Decl_Union: result = parse_union( is_inplace ); return result; default: log_failure( "Error, wrong token type given to parse_complicated_definition " "(only supports class, enum, struct, union) \n%s" , to_string(Context) ); return InvalidCode; } } // Function parsing is handled in multiple places because its initial signature is shared with variable parsing internal inline CodeFn parse_function_after_name( ModuleFlag mflags , CodeAttributes attributes , CodeSpecifiers specifiers , CodeTypename ret_type , Token name ) { push_scope(); CodeParam params = parse_params(); // ( ) // TODO(Ed), Review old comment : These have to be kept separate from the return type's specifiers. while ( left && is_specifier(currtok) ) { if ( specifiers.ast == nullptr ) { specifiers = def_specifier( to_specifier( to_str(currtok)) ); eat( currtok.Type ); continue; } append(specifiers, to_specifier( to_str(currtok)) ); eat( currtok.Type ); } // ( ) CodeBody body = NullCode; CodeComment inline_cmt = NullCode; if ( check( Tok_BraceCurly_Open ) ) { body = parse_function_body(); if ( body == Code_Invalid ) { pop(& Context); return InvalidCode; } // ( ) { } } else if ( check(Tok_Operator) && currtok.Text[0] == '=' ) { eat(Tok_Operator); append(specifiers, Spec_Pure ); eat( Tok_Number); Token stmt_end = currtok; eat( Tok_Statement_End ); // ( ) = 0; if ( currtok_noskip.Type == Tok_Comment && currtok_noskip.Line == stmt_end.Line ) inline_cmt = parse_comment(); // ( ) ; } else { Token stmt_end = currtok; eat( Tok_Statement_End ); // ( ) ; if ( currtok_noskip.Type == Tok_Comment && currtok_noskip.Line == stmt_end.Line ) inline_cmt = parse_comment(); // ( ) ; } String name_stripped = string_make_strc( GlobalAllocator, to_str(name) ); strip_space(name_stripped); CodeFn result = (CodeFn) make_code(); result->Name = get_cached_string( string_to_strc(name_stripped) ); result->ModuleFlags = mflags; if ( body ) { switch ( body->Type ) { case CT_Function_Body: case CT_Untyped: break; default: { log_failure("Body must be either of Function_Body or Untyped type, %s\n%s", debug_str(body), to_string(Context)); pop(& Context); return InvalidCode; } } result->Type = CT_Function; result->Body = body; } else { result->Type = CT_Function_Fwd; } if ( attributes ) result->Attributes = attributes; if ( specifiers ) result->Specs = specifiers; result->ReturnType = ret_type; if ( params ) result->Params = params; if ( inline_cmt ) result->InlineCmt = inline_cmt; pop(& Context); return result; } internal Code parse_function_body() { push_scope(); eat( Tok_BraceCurly_Open ); CodeBody result = (CodeBody) make_code(); result->Type = CT_Function_Body; // TODO : Support actual parsing of function body Token start = currtok_noskip; s32 level = 0; while ( left && ( currtok_noskip.Type != Tok_BraceCurly_Close || level > 0 ) ) { if ( currtok_noskip.Type == Tok_BraceCurly_Open ) level++; else if ( currtok_noskip.Type == Tok_BraceCurly_Close && level > 0 ) level--; eat( currtok_noskip.Type ); } Token past = prevtok; s32 len = ( (sptr)prevtok.Text + prevtok.Length ) - (sptr)start.Text; if ( len > 0 ) { append( result, def_execution( { len, start.Text } ) ); } eat( Tok_BraceCurly_Close ); pop(& Context); return result; } internal neverinline CodeBody parse_global_nspace( CodeType which ) { push_scope(); if ( which != CT_Namespace_Body && which != CT_Global_Body && which != CT_Export_Body && which != CT_Extern_Linkage_Body ) return InvalidCode; if ( which != CT_Global_Body ) eat( Tok_BraceCurly_Open ); // { CodeBody result = (CodeBody) make_code(); result->Type = which; while ( left && currtok_noskip.Type != Tok_BraceCurly_Close ) { Code member = Code_Invalid; CodeAttributes attributes = { nullptr }; CodeSpecifiers specifiers = { nullptr }; bool expects_function = false; // Context.Scope->Start = currtok_noskip; if ( currtok_noskip.Type == Tok_Preprocess_Hash ) eat( Tok_Preprocess_Hash ); switch ( currtok_noskip.Type ) { case Tok_Statement_End: { // TODO(Ed): Convert this to a general warning procedure log_fmt("Dangling end statement found %S\n", to_string(currtok_noskip)); eat( Tok_Statement_End ); continue; } case Tok_NewLine: // Empty lines are auto skipped by Tokens.current() member = fmt_newline; eat( Tok_NewLine ); break; case Tok_Comment: member = parse_comment(); break; case Tok_Decl_Class: member = parse_complicated_definition( Tok_Decl_Class ); // class break; case Tok_Decl_Enum: member = parse_complicated_definition( Tok_Decl_Enum ); // enum break; case Tok_Decl_Extern_Linkage: if ( which == CT_Extern_Linkage_Body ) log_failure( "Nested extern linkage\n%s", to_string(Context) ); member = parse_extern_link(); // extern "..." { ... } break; case Tok_Decl_Namespace: member = parse_namespace(); // namespace { ... } break; case Tok_Decl_Struct: member = parse_complicated_definition( Tok_Decl_Struct ); // struct ... break; case Tok_Decl_Template: member = parse_template(); // template<...> ... break; case Tok_Decl_Typedef: member = parse_typedef(); // typedef ... break; case Tok_Decl_Union: member = parse_complicated_definition( Tok_Decl_Union ); // union ... break; case Tok_Decl_Using: member = parse_using(); // using ... break; case Tok_Preprocess_Define: member = parse_define(); // #define ... break; case Tok_Preprocess_Include: member = parse_include(); // #include ... break; case Tok_Preprocess_If: case Tok_Preprocess_IfDef: case Tok_Preprocess_IfNotDef: case Tok_Preprocess_ElIf: member = parse_preprocess_cond(); // # ... break; case Tok_Preprocess_Else: member = preprocess_else; eat( Tok_Preprocess_Else ); // #else break; case Tok_Preprocess_EndIf: member = preprocess_endif; eat( Tok_Preprocess_EndIf ); // #endif break; case Tok_Preprocess_Macro: member = parse_simple_preprocess( Tok_Preprocess_Macro ); // break; case Tok_Preprocess_Pragma: member = parse_pragma(); // #pragma ... break; case Tok_Preprocess_Unsupported: member = parse_simple_preprocess( Tok_Preprocess_Unsupported ); // # ... break; case Tok_StaticAssert: member = parse_static_assert(); // static_assert( , ... ); break; case Tok_Module_Export: if ( which == CT_Export_Body ) log_failure( "Nested export declaration\n%s", to_string(Context) ); member = parse_export_body(); // export { ... } break; case Tok_Module_Import: { not_implemented( context ); // import ... } //! Fallthrough intentional case Tok_Attribute_Open: case Tok_Decl_GNU_Attribute: case Tok_Decl_MSVC_Attribute: #define Entry( attribute, str ) case attribute: GEN_DEFINE_ATTRIBUTE_TOKENS #undef Entry { attributes = parse_attributes(); // } //! Fallthrough intentional case Tok_Spec_Consteval: case Tok_Spec_Constexpr: case Tok_Spec_Constinit: case Tok_Spec_Extern: case Tok_Spec_ForceInline: case Tok_Spec_Global: case Tok_Spec_Inline: case Tok_Spec_Internal_Linkage: case Tok_Spec_NeverInline: case Tok_Spec_Static: { Specifier specs_found[16] { Spec_NumSpecifiers }; s32 NumSpecifiers = 0; while ( left && is_specifier(currtok) ) { Specifier spec = to_specifier( to_str(currtok) ); bool ignore_spec = false; switch ( spec ) { case Spec_Constexpr: case Spec_Constinit: case Spec_ForceInline: case Spec_Global: case Spec_External_Linkage: case Spec_Internal_Linkage: case Spec_Inline: case Spec_Mutable: case Spec_NeverInline: case Spec_Static: case Spec_Volatile: break; case Spec_Consteval: expects_function = true; break; case Spec_Const: ignore_spec = true; break; default: StrC spec_str = to_str(spec); log_failure( "Invalid specifier %.*s for variable\n%s", spec_str.Len, spec_str, to_string(Context) ); pop(& Context); return InvalidCode; } if (ignore_spec) break; specs_found[NumSpecifiers] = spec; NumSpecifiers++; eat( currtok.Type ); } if ( NumSpecifiers ) { specifiers = def_specifiers( NumSpecifiers, specs_found ); } // } //! Fallthrough intentional case Tok_Identifier: case Tok_Spec_Const: case Tok_Type_Long: case Tok_Type_Short: case Tok_Type_Signed: case Tok_Type_Unsigned: case Tok_Type_bool: case Tok_Type_char: case Tok_Type_double: case Tok_Type_int: { Code constructor_destructor = parse_global_nspace_constructor_destructor( specifiers ); // Possible constructor implemented at global file scope. if ( constructor_destructor ) { member = constructor_destructor; break; } bool found_operator_cast_outside_class_implmentation = false; s32 idx = Context.Tokens.Idx; for ( ; idx < array_num(Context.Tokens.Arr); idx++ ) { Token tok = Context.Tokens.Arr[ idx ]; if ( tok.Type == Tok_Identifier ) { idx++; tok = Context.Tokens.Arr[ idx ]; if ( tok.Type == Tok_Access_StaticSymbol ) continue; break; } if ( tok.Type == Tok_Decl_Operator ) found_operator_cast_outside_class_implmentation = true; break; } if ( found_operator_cast_outside_class_implmentation ) { member = parse_operator_cast( specifiers ); // ::operator () { ... } break; } member = parse_operator_function_or_variable( expects_function, attributes, specifiers ); // ... } } if ( member == Code_Invalid ) { log_failure( "Failed to parse member\n%s", to_string(Context) ); pop(& Context); return InvalidCode; } // log_fmt("Global Body Member: %s", member->debug_str()); append(result, member ); } if ( which != CT_Global_Body ) eat( Tok_BraceCurly_Close ); // { } pop(& Context); return result; } internal inline Code parse_global_nspace_constructor_destructor( CodeSpecifiers specifiers ) { Code result = { nullptr }; /* To check if a definition is for a constructor we can go straight to the opening parenthesis for its parameters From There we work backwards to see if we come across two identifiers with the same name between an member access :: operator, there can be template parameters on the left of the :: so we ignore those. Whats important is that its back to back. This has multiple possible faults. What we parse using this method may not filter out if something has a "return type" This is bad since technically you could have a namespace nested into another namespace with the same name. If this awful pattern is done the only way to distiguish with this coarse parse is to know there is no return type defined. TODO(Ed): We could fix this by attempting to parse a type, but we would have to have a way to have it soft fail and rollback. */ TokArray tokens = Context.Tokens; s32 idx = tokens.Idx; Token nav = tokens.Arr[ idx ]; for ( ; idx < array_num(tokens.Arr); idx++, nav = tokens.Arr[ idx ] ) { if ( nav.Text[0] == '<' ) { // Skip templated expressions as they mey have expressions with the () operators s32 capture_level = 0; s32 template_level = 0; for ( ; idx < array_num(tokens.Arr); idx++, nav = tokens.Arr[idx] ) { if (nav.Text[ 0 ] == '<') ++ template_level; if (nav.Text[ 0 ] == '>') -- template_level; if (nav.Type == Tok_Operator && nav.Text[1] == '>') -- template_level; if ( nav.Type == Tok_Capture_Start) { if (template_level != 0 ) ++ capture_level; else break; } if ( template_level != 0 && nav.Type == Tok_Capture_End) -- capture_level; } } if ( nav.Type == Tok_Capture_Start ) break; } -- idx; Token tok_right = tokens.Arr[idx]; Token tok_left = NullToken; if (tok_right.Type != Tok_Identifier) { // We're not dealing with a constructor if there is no identifier right before the opening of a parameter's scope. return result; } -- idx; tok_left = tokens.Arr[idx]; // ... bool possible_destructor = false; if ( tok_left.Type == Tok_Operator && tok_left.Text[0] == '~') { possible_destructor = true; -- idx; tok_left = tokens.Arr[idx]; } if ( tok_left.Type != Tok_Access_StaticSymbol ) return result; -- idx; tok_left = tokens.Arr[idx]; // ... :: // We search toward the left until we find the next valid identifier s32 capture_level = 0; s32 template_level = 0; while ( idx != tokens.Idx ) { if (tok_left.Text[ 0 ] == '<') ++ template_level; if (tok_left.Text[ 0 ] == '>') -- template_level; if (tok_left.Type == Tok_Operator && tok_left.Text[1] == '>') -- template_level; if ( template_level != 0 && tok_left.Type == Tok_Capture_Start) ++ capture_level; if ( template_level != 0 && tok_left.Type == Tok_Capture_End) -- capture_level; if ( capture_level == 0 && template_level == 0 && tok_left.Type == Tok_Identifier ) break; -- idx; tok_left = tokens.Arr[idx]; } bool is_same = str_compare_len( tok_right.Text, tok_left.Text, tok_right.Length ) == 0; if (tok_left.Type == Tok_Identifier && is_same) { // We have found the pattern we desired if (possible_destructor) { // :: ~ ( result = parse_destructor( specifiers ); } else { // :: ( result = parse_constructor( specifiers ); } } return result; } // TODO(Ed): I want to eventually change the identifier to its own AST type. // This would allow distinction of the qualifier for a symbol :: // This would also allow internal Token parse_identifier( bool* possible_member_function ) { push_scope(); Token name = currtok; Context.Scope->Name = name; eat( Tok_Identifier ); // parse_template_args( name ); //