#ifdef GEN_INTELLISENSE_DIRECTIVES #pragma once #include "gen/etoktype.cpp" #include "interface.upfront.cpp" #include "lexer.cpp" #endif namespace parser { // TODO(Ed) : Rename ETokType::Capture_Start, ETokType::Capture_End to Open_Parenthesis adn Close_Parenthesis constexpr bool dont_skip_formatting = false; struct StackNode { StackNode* Prev; Token Start; Token Name; // The name of the AST node (if parsed) StrC ProcName; // The name of the procedure }; struct ParseContext { TokArray Tokens; StackNode* Scope; void push( StackNode* node ) { node->Prev = Scope; Scope = node; #if 0 && Build_Debug log_fmt("\tEntering Context: %.*s\n", Scope->ProcName.Len, Scope->ProcName.Ptr ); #endif } void pop() { #if 0 && Build_Debug log_fmt("\tPopping Context: %.*s\n", Scope->ProcName.Len, Scope->ProcName.Ptr ); #endif Scope = Scope->Prev; } String to_string() { String result = String::make_reserve( GlobalAllocator, kilobytes(4) ); Token scope_start = Scope->Start; Token last_valid = Tokens.Idx >= Tokens.Arr.num() ? Tokens.Arr[Tokens.Arr.num() -1] : Tokens.current(); sptr length = scope_start.Length; char const* current = scope_start.Text + length; while ( current <= Tokens.Arr.back().Text && *current != '\n' && length < 74 ) { current++; length++; } String line = String::make( GlobalAllocator, { length, scope_start.Text } ); result.append_fmt("\tScope : %s\n", line ); line.free(); sptr dist = (sptr)last_valid.Text - (sptr)scope_start.Text + 2; sptr length_from_err = dist; String line_from_err = String::make( GlobalAllocator, { length_from_err, last_valid.Text } ); if ( length_from_err < 100 ) result.append_fmt("\t(%d, %d):%*c\n", last_valid.Line, last_valid.Column, length_from_err, '^' ); else result.append_fmt("\t(%d, %d)\n", last_valid.Line, last_valid.Column ); StackNode* curr_scope = Scope; s32 level = 0; do { if ( curr_scope->Name ) { result.append_fmt("\t%d: %s, AST Name: %.*s\n", level, curr_scope->ProcName.Ptr, curr_scope->Name.Length, curr_scope->Name.Text ); } else { result.append_fmt("\t%d: %s\n", level, curr_scope->ProcName.Ptr ); } curr_scope = curr_scope->Prev; level++; } while ( curr_scope ); return result; } }; global ParseContext Context; bool TokArray::__eat( TokType type ) { if ( Arr.num() - Idx <= 0 ) { log_failure( "No tokens left.\n%s", Context.to_string() ); return false; } if ( ( Arr[ Idx ].Type == TokType::NewLine && type != TokType::NewLine ) || ( Arr[ Idx ].Type == TokType::Comment && type != TokType::Comment ) ) { Idx++; } if ( Arr[Idx].Type != type ) { log_failure( "Parse Error, TokArray::eat, Expected: ' %s ' not ' %.*s ' (%d, %d)`\n%s" , ETokType::to_str(type).Ptr , Arr[Idx].Length, Arr[Idx].Text , current().Line , current().Column , Context.to_string() ); return false; } #if 0 && Build_Debug log_fmt("Ate: %S\n", Arr[Idx].to_string() ); #endif Idx++; return true; } internal void init() { Tokens = Array::init_reserve( LexArena , ( LexAllocator_Size - sizeof( Array::Header ) ) / sizeof(Token) ); defines_map_arena = Arena_256KB::init(); defines = HashTable::init_reserve( defines_map_arena, 256 ); } internal void deinit() { parser::Tokens = { nullptr }; } #pragma region Helper Macros # define check_parse_args( def ) \ if ( def.Len <= 0 ) \ { \ log_failure( "gen::" stringize(__func__) ": length must greater than 0" ); \ parser::Context.pop(); \ return CodeInvalid; \ } \ if ( def.Ptr == nullptr ) \ { \ log_failure( "gen::" stringize(__func__) ": def was null" ); \ parser::Context.pop(); \ return CodeInvalid; \ } # define currtok_noskip Context.Tokens.current( dont_skip_formatting ) # define currtok Context.Tokens.current() # define prevtok Context.Tokens.previous() # define nexttok Context.Tokens.next() # define eat( Type_ ) Context.Tokens.__eat( Type_ ) # define left ( Context.Tokens.Arr.num() - Context.Tokens.Idx ) # define check_noskip( Type_ ) ( left && currtok_noskip.Type == Type_ ) # define check( Type_ ) ( left && currtok.Type == Type_ ) # define push_scope() \ StackNode scope { nullptr, currtok_noskip, NullToken, txt( __func__ ) }; \ Context.push( & scope ) #pragma endregion Helper Macros // Procedure Forwards ( Entire parser internal parser interface ) internal Code parse_array_decl (); internal CodeAttributes parse_attributes (); internal CodeComment parse_comment (); internal Code parse_complicated_definition ( TokType which ); internal CodeBody parse_class_struct_body ( TokType which, Token name = NullToken ); internal Code parse_class_struct ( TokType which, bool inplace_def ); internal CodeDefine parse_define (); internal Code parse_expression (); internal Code parse_forward_or_definition ( TokType which, bool is_inplace ); internal CodeFn parse_function_after_name ( ModuleFlag mflags, CodeAttributes attributes, CodeSpecifiers specifiers, CodeType ret_type, Token name ); internal Code parse_function_body (); internal Code parse_global_nspace (); internal Code parse_global_nspace_constructor_destructor( CodeSpecifiers specifiers ); internal Token parse_identifier ( bool* possible_member_function = nullptr ); internal CodeInclude parse_include (); internal CodeOperator parse_operator_after_ret_type ( ModuleFlag mflags, CodeAttributes attributes, CodeSpecifiers specifiers, CodeType ret_type ); internal Code parse_operator_function_or_variable( bool expects_function, CodeAttributes attributes, CodeSpecifiers specifiers ); internal CodePragma parse_pragma (); internal CodeParam parse_params ( bool use_template_capture = false ); internal CodePreprocessCond parse_preprocess_cond (); internal Code parse_simple_preprocess ( TokType which ); internal Code parse_static_assert (); internal void parse_template_args ( Token& token ); internal CodeVar parse_variable_after_name ( ModuleFlag mflags, CodeAttributes attributes, CodeSpecifiers specifiers, CodeType type, StrC name ); internal CodeVar parse_variable_declaration_list (); internal CodeClass parse_class ( bool inplace_def = false ); internal CodeConstructor parse_constructor ( CodeSpecifiers specifiers ); internal CodeDestructor parse_destructor ( CodeSpecifiers specifiers = NoCode ); internal CodeEnum parse_enum ( bool inplace_def = false ); internal CodeBody parse_export_body (); internal CodeBody parse_extern_link_body(); internal CodeExtern parse_extern_link (); internal CodeFriend parse_friend (); internal CodeFn parse_function (); internal CodeNS parse_namespace (); internal CodeOpCast parse_operator_cast ( CodeSpecifiers specifiers = NoCode ); internal CodeStruct parse_struct ( bool inplace_def = false ); internal CodeVar parse_variable (); internal CodeTemplate parse_template (); internal CodeType parse_type ( bool from_template = false, bool* is_function = nullptr ); internal CodeTypedef parse_typedef (); internal CodeUnion parse_union ( bool inplace_def = false ); internal CodeUsing parse_using (); constexpr bool inplace_def = true; // Internal parsing functions constexpr bool strip_formatting_dont_preserve_newlines = false; /* This function was an attempt at stripping formatting from any c++ code. It has edge case failures that prevent it from being used in function bodies. */ internal String strip_formatting( StrC raw_text, bool preserve_newlines = true ) { String content = String::make_reserve( GlobalAllocator, raw_text.Len ); if ( raw_text.Len == 0 ) return content; #define cut_length ( scanner - raw_text.Ptr - last_cut ) #define cut_ptr ( raw_text.Ptr + last_cut ) #define pos ( sptr( scanner ) - sptr( raw_text.Ptr ) ) #define move_fwd() do { scanner++; tokleft--; } while(0) s32 tokleft = raw_text.Len; sptr last_cut = 0; char const* scanner = raw_text.Ptr; if ( scanner[0] == ' ' ) { move_fwd(); last_cut = 1; } bool within_string = false; bool within_char = false; bool must_keep_newline = false; while ( tokleft ) { // Skip over the content of string literals if ( scanner[0] == '"' ) { move_fwd(); while ( tokleft && ( scanner[0] != '"' || *( scanner - 1 ) == '\\' ) ) { if ( scanner[0] == '\\' && tokleft > 1 ) { scanner += 2; tokleft -= 2; } else { move_fwd(); } } // Skip the closing " if ( tokleft ) move_fwd(); content.append( cut_ptr, cut_length ); last_cut = sptr( scanner ) - sptr( raw_text.Ptr ); continue; } // Skip over the content of character literals if ( scanner[0] == '\'' ) { move_fwd(); while ( tokleft && ( scanner[0] != '\'' || ( *(scanner -1 ) == '\\' ) ) ) { move_fwd(); } // Skip the closing ' if ( tokleft ) move_fwd(); content.append( cut_ptr, cut_length ); last_cut = sptr( scanner ) - sptr( raw_text.Ptr ); continue; } // Block comments if ( tokleft > 1 && scanner[0] == '/' && scanner[1] == '*' ) { while ( tokleft > 1 && !(scanner[0] == '*' && scanner[1] == '/') ) move_fwd(); scanner += 2; tokleft -= 2; content.append( cut_ptr, cut_length ); last_cut = sptr( scanner ) - sptr( raw_text.Ptr ); continue; } // Line comments if ( tokleft > 1 && scanner[0] == '/' && scanner[1] == '/' ) { must_keep_newline = true; scanner += 2; tokleft -= 2; while ( tokleft && scanner[ 0 ] != '\n' ) move_fwd(); if (tokleft) move_fwd(); content.append( cut_ptr, cut_length ); last_cut = sptr( scanner ) - sptr( raw_text.Ptr ); continue; } // Tabs if (scanner[0] == '\t') { if (pos > last_cut) content.append(cut_ptr, cut_length); if ( content.back() != ' ' ) content.append(' '); move_fwd(); last_cut = sptr(scanner) - sptr(raw_text.Ptr); continue; } if ( tokleft > 1 && scanner[0] == '\r' && scanner[1] == '\n' ) { if ( must_keep_newline || preserve_newlines ) { must_keep_newline = false; scanner += 2; tokleft -= 2; content.append( cut_ptr, cut_length ); last_cut = sptr( scanner ) - sptr( raw_text.Ptr ); continue; } if ( pos > last_cut ) content.append( cut_ptr, cut_length ); // Replace with a space if ( content.back() != ' ' ) content.append( ' ' ); scanner += 2; tokleft -= 2; last_cut = sptr( scanner ) - sptr( raw_text.Ptr ); continue; } if ( scanner[0] == '\n' ) { if ( must_keep_newline || preserve_newlines ) { must_keep_newline = false; move_fwd(); content.append( cut_ptr, cut_length ); last_cut = sptr( scanner ) - sptr( raw_text.Ptr ); continue; } if ( pos > last_cut ) content.append( cut_ptr, cut_length ); // Replace with a space if ( content.back() != ' ' ) content.append( ' ' ); move_fwd(); last_cut = sptr( scanner ) - sptr( raw_text.Ptr ); continue; } // Escaped newlines if ( scanner[0] == '\\' ) { content.append( cut_ptr, cut_length ); s32 amount_to_skip = 1; if ( tokleft > 1 && scanner[1] == '\n' ) { amount_to_skip = 2; } else if ( tokleft > 2 && scanner[1] == '\r' && scanner[2] == '\n' ) { amount_to_skip = 3; } if ( amount_to_skip > 1 && pos == last_cut ) { scanner += amount_to_skip; tokleft -= amount_to_skip; } else move_fwd(); last_cut = sptr( scanner ) - sptr( raw_text.Ptr ); continue; } // Consectuive spaces if ( tokleft > 1 && char_is_space( scanner[0] ) && char_is_space( scanner[ 1 ] ) ) { content.append( cut_ptr, cut_length ); do { move_fwd(); } while ( tokleft && char_is_space( scanner[0] ) ); last_cut = sptr( scanner ) - sptr( raw_text.Ptr ); // Preserve only 1 space of formattting if ( content.back() != ' ' ) content.append( ' ' ); continue; } move_fwd(); } if ( last_cut < raw_text.Len ) { content.append( cut_ptr, raw_text.Len - last_cut ); } #undef cut_ptr #undef cut_length #undef pos #undef move_fwd return content; } internal Code parse_array_decl() { push_scope(); if ( check( TokType::Operator ) && currtok.Text[0] == '[' && currtok.Text[1] == ']' ) { Code array_expr = untyped_str( get_cached_string(txt(" ")) ); eat( TokType::Operator ); // [] Context.pop(); return array_expr; } if ( check( TokType::BraceSquare_Open ) ) { eat( TokType::BraceSquare_Open ); // [ if ( left == 0 ) { log_failure( "Error, unexpected end of array declaration ( '[]' scope started )\n%s", Context.to_string() ); Context.pop(); return CodeInvalid; } if ( currtok.Type == TokType::BraceSquare_Close ) { log_failure( "Error, empty array expression in definition\n%s", Context.to_string() ); Context.pop(); return CodeInvalid; } Token untyped_tok = currtok; while ( left && currtok.Type != TokType::BraceSquare_Close ) { eat( currtok.Type ); } untyped_tok.Length = ( (sptr)prevtok.Text + prevtok.Length ) - (sptr)untyped_tok.Text; Code array_expr = untyped_str( untyped_tok ); // [ if ( left == 0 ) { log_failure( "Error, unexpected end of array declaration, expected ]\n%s", Context.to_string() ); Context.pop(); return CodeInvalid; } if ( currtok.Type != TokType::BraceSquare_Close ) { log_failure( "%s: Error, expected ] in array declaration, not %s\n%s", ETokType::to_str( currtok.Type ), Context.to_string() ); Context.pop(); return CodeInvalid; } eat( TokType::BraceSquare_Close ); // [ ] // Its a multi-dimensional array if ( check( TokType::BraceSquare_Open )) { Code adjacent_arr_expr = parse_array_decl(); // [ ][ ]... array_expr->Next = adjacent_arr_expr.ast; } Context.pop(); return array_expr; } Context.pop(); return { nullptr }; } internal inline Code parse_assignment_expression() { Code expr = { nullptr }; eat( TokType::Operator ); // = Token expr_tok = currtok; if ( currtok.Type == TokType::Statement_End && currtok.Type != TokType::Comma ) { log_failure( "Expected expression after assignment operator\n%s", Context.to_string() ); Context.pop(); return CodeInvalid; } s32 level = 0; while ( left && currtok.Type != TokType::Statement_End && (currtok.Type != TokType::Comma || level > 0) ) { if (currtok.Type == TokType::BraceCurly_Open ) level++; if (currtok.Type == TokType::BraceCurly_Close ) level--; if (currtok.Type == TokType::Capture_Start) level++; else if (currtok.Type == TokType::Capture_End) level--; eat( currtok.Type ); } expr_tok.Length = ( ( sptr )currtok.Text + currtok.Length ) - ( sptr )expr_tok.Text - 1; expr = untyped_str( expr_tok ); // = return expr; } internal inline CodeAttributes parse_attributes() { push_scope(); Token start = currtok; s32 len = 0; // There can be more than one attribute. If there is flatten them to a single string. // TODO(Ed): Support keeping an linked list of attributes similar to parameters while ( left && currtok.is_attribute() ) { if ( check( TokType::Attribute_Open ) ) { eat( TokType::Attribute_Open ); // [[ while ( left && currtok.Type != TokType::Attribute_Close ) { eat( currtok.Type ); } // [[ eat( TokType::Attribute_Close ); // [[ ]] len = ( ( sptr )prevtok.Text + prevtok.Length ) - ( sptr )start.Text; } else if ( check( TokType::Decl_GNU_Attribute ) ) { eat( TokType::Decl_GNU_Attribute ); eat( TokType::Capture_Start ); eat( TokType::Capture_Start ); // __attribute__(( while ( left && currtok.Type != TokType::Capture_End ) { eat( currtok.Type ); } // __attribute__(( eat( TokType::Capture_End ); eat( TokType::Capture_End ); // __attribute__(( )) len = ( ( sptr )prevtok.Text + prevtok.Length ) - ( sptr )start.Text; } else if ( check( TokType::Decl_MSVC_Attribute ) ) { eat( TokType::Decl_MSVC_Attribute ); eat( TokType::Capture_Start ); // __declspec( while ( left && currtok.Type != TokType::Capture_End ) { eat( currtok.Type ); } // __declspec( eat( TokType::Capture_End ); // __declspec( ) len = ( ( sptr )prevtok.Text + prevtok.Length ) - ( sptr )start.Text; } else if ( currtok.is_attribute() ) { eat( currtok.Type ); // // If its a macro based attribute, this could be a functional macro such as Unreal's UE_DEPRECATED(...) if ( check( TokType::Capture_Start)) { eat( TokType::Capture_Start ); s32 level = 0; while (left && currtok.Type != TokType::Capture_End && level == 0) { if (currtok.Type == TokType::Capture_Start) ++ level; if (currtok.Type == TokType::Capture_End) --level; eat(currtok.Type); } eat(TokType::Capture_End); } len = ( ( sptr )prevtok.Text + prevtok.Length ) - ( sptr )start.Text; // ( ... ) } } if ( len > 0 ) { StrC attribute_txt = { len, start.Text }; Context.pop(); String name_stripped = strip_formatting( attribute_txt, strip_formatting_dont_preserve_newlines ); Code result = make_code(); result->Type = ECode::PlatformAttributes; result->Name = get_cached_string( name_stripped ); result->Content = result->Name; // result->Token = return ( CodeAttributes )result; } Context.pop(); return { nullptr }; } internal Code parse_class_struct( TokType which, bool inplace_def = false ) { if ( which != TokType::Decl_Class && which != TokType::Decl_Struct ) { log_failure( "Error, expected class or struct, not %s\n%s", ETokType::to_str( which ), Context.to_string() ); return CodeInvalid; } Token name { nullptr, 0, TokType::Invalid }; AccessSpec access = AccessSpec::Default; CodeType parent = { nullptr }; CodeBody body = { nullptr }; CodeAttributes attributes = { nullptr }; ModuleFlag mflags = ModuleFlag::None; CodeClass result = CodeInvalid; if ( check(TokType::Module_Export) ) { mflags = ModuleFlag::Export; eat( TokType::Module_Export ); } // eat( which ); // attributes = parse_attributes(); // if ( check( TokType::Identifier ) ) { name = parse_identifier(); Context.Scope->Name = name; } // local_persist char interface_arr_mem[ kilobytes(4) ] {0}; Array interfaces = Array::init_reserve( Arena::init_from_memory(interface_arr_mem, kilobytes(4) ), 4 ); // TODO(Ed) : Make an AST_DerivedType, we'll store any arbitary derived type into there as a linear linked list of them. if ( check( TokType::Assign_Classifer ) ) { eat( TokType::Assign_Classifer ); // : if ( currtok.is_access_specifier() ) { access = currtok.to_access_specifier(); // : eat( currtok.Type ); } Token parent_tok = parse_identifier(); parent = def_type( parent_tok ); // : while ( check(TokType::Comma) ) { eat( TokType::Comma ); // : , if ( currtok.is_access_specifier() ) { eat(currtok.Type); } Token interface_tok = parse_identifier(); interfaces.append( def_type( interface_tok ) ); // : , ... } } if ( check( TokType::BraceCurly_Open ) ) { body = parse_class_struct_body( which, name ); } // : , ... { } CodeComment inline_cmt = NoCode; if ( ! inplace_def ) { Token stmt_end = currtok; eat( TokType::Statement_End ); // : , ... { }; if ( currtok_noskip.Type == TokType::Comment && currtok_noskip.Line == stmt_end.Line ) inline_cmt = parse_comment(); // : , ... { }; } if ( which == TokType::Decl_Class ) result = def_class( name, body, parent, access, attributes, mflags ); else result = def_struct( name, body, (CodeType)parent, access, attributes, mflags ); if ( inline_cmt ) result->InlineCmt = inline_cmt; interfaces.free(); return result; } internal neverinline CodeBody parse_class_struct_body( TokType which, Token name ) { using namespace ECode; push_scope(); eat( TokType::BraceCurly_Open ); // { CodeBody result = (CodeBody) make_code(); if ( which == TokType::Decl_Class ) result->Type = Class_Body; else result->Type = Struct_Body; while ( left && currtok_noskip.Type != TokType::BraceCurly_Close ) { Code member = Code::Invalid; CodeAttributes attributes = { nullptr }; CodeSpecifiers specifiers = { nullptr }; bool expects_function = false; // Context.Scope->Start = currtok_noskip; if ( currtok_noskip.Type == TokType::Preprocess_Hash ) eat( TokType::Preprocess_Hash ); switch ( currtok_noskip.Type ) { case TokType::Statement_End: { // TODO(Ed): Convert this to a general warning procedure log_fmt("Dangling end statement found %S\n", currtok_noskip.to_string()); eat( TokType::Statement_End ); continue; } case TokType::NewLine: member = fmt_newline; eat( TokType::NewLine ); break; case TokType::Comment: member = parse_comment(); break; case TokType::Access_Public: member = access_public; eat( TokType::Access_Public ); eat( TokType::Assign_Classifer ); // public: break; case TokType::Access_Protected: member = access_protected; eat( TokType::Access_Protected ); eat( TokType::Assign_Classifer ); // protected: break; case TokType::Access_Private: member = access_private; eat( TokType::Access_Private ); eat( TokType::Assign_Classifer ); // private: break; case TokType::Decl_Class: member = parse_complicated_definition( TokType::Decl_Class ); // class break; case TokType::Decl_Enum: member = parse_complicated_definition( TokType::Decl_Enum ); // enum break; case TokType::Decl_Friend: member = parse_friend(); // friend break; case TokType::Decl_Operator: member = parse_operator_cast(); // operator () break; case TokType::Decl_Struct: member = parse_complicated_definition( TokType::Decl_Struct ); // struct break; case TokType::Decl_Template: member = parse_template(); // template< ... > break; case TokType::Decl_Typedef: member = parse_typedef(); // typedef break; case TokType::Decl_Union: member = parse_complicated_definition( TokType::Decl_Union ); // union break; case TokType::Decl_Using: member = parse_using(); // using break; case TokType::Operator: if ( currtok.Text[0] != '~' ) { log_failure( "Operator token found in global body but not destructor unary negation\n%s", Context.to_string() ); return CodeInvalid; } member = parse_destructor(); // ~() break; case TokType::Preprocess_Define: member = parse_define(); // #define break; case TokType::Preprocess_Include: member = parse_include(); // #include break; case TokType::Preprocess_If: case TokType::Preprocess_IfDef: case TokType::Preprocess_IfNotDef: case TokType::Preprocess_ElIf: member = parse_preprocess_cond(); // # break; case TokType::Preprocess_Else: member = preprocess_else; eat( TokType::Preprocess_Else ); // #else break; case TokType::Preprocess_EndIf: member = preprocess_endif; eat( TokType::Preprocess_EndIf ); // #endif break; case TokType::Preprocess_Macro: member = parse_simple_preprocess( TokType::Preprocess_Macro ); // break; case TokType::Preprocess_Pragma: member = parse_pragma(); // #pragma break; case TokType::Preprocess_Unsupported: member = parse_simple_preprocess( TokType::Preprocess_Unsupported ); // # break; case TokType::StaticAssert: member = parse_static_assert(); // static_assert break; case TokType::Attribute_Open: case TokType::Decl_GNU_Attribute: case TokType::Decl_MSVC_Attribute: #define Entry( attribute, str ) case TokType::attribute: GEN_DEFINE_ATTRIBUTE_TOKENS #undef Entry { attributes = parse_attributes(); // } //! Fallthrough intended case TokType::Spec_Consteval: case TokType::Spec_Constexpr: case TokType::Spec_Constinit: case TokType::Spec_Explicit: case TokType::Spec_ForceInline: case TokType::Spec_Inline: case TokType::Spec_Mutable: case TokType::Spec_NeverInline: case TokType::Spec_Static: case TokType::Spec_Volatile: case TokType::Spec_Virtual: { SpecifierT specs_found[16] { ESpecifier::NumSpecifiers }; s32 NumSpecifiers = 0; while ( left && currtok.is_specifier() ) { SpecifierT spec = ESpecifier::to_type( currtok ); b32 ignore_spec = false; switch ( spec ) { case ESpecifier::Constexpr: case ESpecifier::Constinit: case ESpecifier::Explicit: case ESpecifier::Inline: case ESpecifier::ForceInline: case ESpecifier::Mutable: case ESpecifier::NeverInline: case ESpecifier::Static: case ESpecifier::Volatile: case ESpecifier::Virtual: break; case ESpecifier::Consteval: expects_function = true; break; case ESpecifier::Const : ignore_spec = true; break; default: log_failure( "Invalid specifier %s for variable\n%s", ESpecifier::to_str(spec), Context.to_string() ); Context.pop(); return CodeInvalid; } // Every specifier after would be considered part of the type type signature if (ignore_spec) break; specs_found[NumSpecifiers] = spec; NumSpecifiers++; eat( currtok.Type ); } if ( NumSpecifiers ) { specifiers = def_specifiers( NumSpecifiers, specs_found ); } // if ( currtok.is_attribute() ) { // Unfortuantely Unreal has code where there is attirbutes before specifiers CodeAttributes more_attributes = parse_attributes(); if ( attributes ) { String fused = String::make_reserve( GlobalAllocator, attributes->Content.length() + more_attributes->Content.length() ); fused.append_fmt( "%S %S", attributes->Content, more_attributes->Content ); attributes->Name = get_cached_string(fused); attributes->Content = attributes->Name; // } attributes = more_attributes; } if ( currtok.Type == TokType::Operator && currtok.Text[0] == '~' ) { member = parse_destructor( specifiers ); // ~() break; } if ( currtok.Type == TokType::Decl_Operator ) { member = parse_operator_cast( specifiers ); // operator () break; } } //! Fallthrough intentional case TokType::Identifier: case TokType::Spec_Const: case TokType::Type_Unsigned: case TokType::Type_Signed: case TokType::Type_Short: case TokType::Type_Long: case TokType::Type_bool: case TokType::Type_char: case TokType::Type_int: case TokType::Type_double: { if ( nexttok.Type == TokType::Capture_Start && name.Length && currtok.Type == TokType::Identifier ) { if ( str_compare( name.Text, currtok.Text, name.Length ) == 0 ) { member = parse_constructor( specifiers ); // () break; } } member = parse_operator_function_or_variable( expects_function, attributes, specifiers ); // operator ... // or // ... } break; default: Token untyped_tok = currtok; while ( left && currtok.Type != TokType::BraceCurly_Close ) { untyped_tok.Length = ( (sptr)currtok.Text + currtok.Length ) - (sptr)untyped_tok.Text; eat( currtok.Type ); } member = untyped_str( untyped_tok ); // Something unknown break; } if ( member == Code::Invalid ) { log_failure( "Failed to parse member\n%s", Context.to_string() ); Context.pop(); return CodeInvalid; } result.append( member ); } eat( TokType::BraceCurly_Close ); // { } Context.pop(); return result; } internal CodeComment parse_comment() { StackNode scope { nullptr, currtok_noskip, NullToken, txt( __func__ ) }; Context.push( & scope ); CodeComment result = (CodeComment) make_code(); result->Type = ECode::Comment; result->Content = get_cached_string( currtok_noskip ); result->Name = result->Content; // result->Token = currtok_noskip; eat( TokType::Comment ); Context.pop(); return result; } internal Code parse_complicated_definition( TokType which ) { push_scope(); bool is_inplace = false; TokArray tokens = Context.Tokens; s32 idx = tokens.Idx; s32 level = 0; for ( ; idx < tokens.Arr.num(); idx++ ) { if ( tokens[ idx ].Type == TokType::BraceCurly_Open ) level++; if ( tokens[ idx ].Type == TokType::BraceCurly_Close ) level--; if ( level == 0 && tokens[ idx ].Type == TokType::Statement_End ) break; } if ( ( idx - 2 ) == tokens.Idx ) { // Its a forward declaration only Code result = parse_forward_or_definition( which, is_inplace ); // ; Context.pop(); return result; } Token tok = tokens[ idx - 1 ]; if ( tok.is_specifier() && is_trailing( ESpecifier::to_type(tok)) ) { // (...) ...; s32 spec_idx = idx - 1; Token spec = tokens[spec_idx]; while ( spec.is_specifier() && is_trailing( ESpecifier::to_type(spec)) ) { -- spec_idx; spec = tokens[spec_idx]; } if ( tokens[spec_idx].Type == TokType::Capture_End ) { // Forward declaration with trailing specifiers for a procedure tok = tokens[spec_idx]; Code result = parse_operator_function_or_variable( false, { nullptr }, { nullptr } ); // , or Name> ... Context.pop(); return result; } log_failure( "Unsupported or bad member definition after %s declaration\n%s", to_str(which), Context.to_string() ); Context.pop(); return CodeInvalid; } if ( tok.Type == TokType::Identifier ) { tok = tokens[ idx - 2 ]; bool is_indirection = tok.Type == TokType::Ampersand || tok.Type == TokType::Star; bool ok_to_parse = false; if ( tok.Type == TokType::BraceCurly_Close ) { // Its an inplace definition // { ... } ; ok_to_parse = true; is_inplace = true; } else if ( tok.Type == TokType::Identifier && tokens[ idx - 3 ].Type == which ) { // Its a variable with type ID using namespace. // ; ok_to_parse = true; } else if ( tok.Type == TokType::Assign_Classifer && ( ( tokens[idx - 5].Type == which && tokens[idx - 4].Type == TokType::Decl_Class ) || ( tokens[idx - 4].Type == which)) ) { // Its a forward declaration of an enum // : ; // : ; ok_to_parse = true; Code result = parse_enum(); Context.pop(); return result; } else if ( is_indirection ) { // Its a indirection type with type ID using struct namespace. // * ; ok_to_parse = true; } if ( ! ok_to_parse ) { log_failure( "Unsupported or bad member definition after %s declaration\n%s", to_str(which), Context.to_string() ); Context.pop(); return CodeInvalid; } Code result = parse_operator_function_or_variable( false, { nullptr }, { nullptr } ); // , or Name> ... Context.pop(); return result; } else if ( tok.Type >= TokType::Type_Unsigned && tok.Type <= TokType::Type_MS_W64 ) { tok = tokens[ idx - 2 ]; if ( tok.Type != TokType::Assign_Classifer || ( ( tokens[idx - 5].Type != which && tokens[idx - 4].Type != TokType::Decl_Class ) && ( tokens[idx - 4].Type != which)) ) { log_failure( "Unsupported or bad member definition after %s declaration\n%s", to_str(which), Context.to_string() ); Context.pop(); return CodeInvalid; } // Its a forward declaration of an enum class // : ; // : ; Code result = parse_enum(); Context.pop(); return result; } else if ( tok.Type == TokType::BraceCurly_Close ) { // Its a definition Code result = parse_forward_or_definition( which, is_inplace ); // { ... }; Context.pop(); return result; } else if ( tok.Type == TokType::BraceSquare_Close ) { // Its an array definition Code result = parse_operator_function_or_variable( false, { nullptr }, { nullptr } ); // [ ... ]; Context.pop(); return result; } else { log_failure( "Unsupported or bad member definition after %s declaration\n%S", to_str(which).Ptr, Context.to_string() ); Context.pop(); return CodeInvalid; } } internal inline CodeDefine parse_define() { push_scope(); eat( TokType::Preprocess_Define ); // #define CodeDefine define = (CodeDefine) make_code(); define->Type = ECode::Preprocess_Define; if ( ! check( TokType::Identifier ) ) { log_failure( "Error, expected identifier after #define\n%s", Context.to_string() ); Context.pop(); return CodeInvalid; } Context.Scope->Name = currtok; define->Name = get_cached_string( currtok ); eat( TokType::Identifier ); // #define // Defines don't necessarily need content. #if 0 if ( ! check( TokType::Preprocess_Content )) { log_failure( "Error, expected content after #define %s\n%s", define->Name, Context.to_string() ); Context.pop(); return CodeInvalid; } #endif if ( check(TokType::Preprocess_Content) && currtok.Length != 0 ) { define->Content = get_cached_string( currtok ); eat( TokType::Preprocess_Content ); // #define Context.pop(); return define; } define->Content = get_cached_string( strip_formatting( currtok, strip_formatting_dont_preserve_newlines ) ); eat( TokType::Preprocess_Content ); // #define Context.pop(); return define; } internal inline Code parse_forward_or_definition( TokType which, bool is_inplace ) { Code result = CodeInvalid; switch ( which ) { case TokType::Decl_Class: result = parse_class( is_inplace ); return result; case TokType::Decl_Enum: result = parse_enum( is_inplace ); return result; case TokType::Decl_Struct: result = parse_struct( is_inplace ); return result; case TokType::Decl_Union: result = parse_union( is_inplace ); return result; default: log_failure( "Error, wrong token type given to parse_complicated_definition " "(only supports class, enum, struct, union) \n%s" , Context.to_string() ); return CodeInvalid; } return CodeInvalid; } // Function parsing is handled in multiple places because its initial signature is shared with variable parsing internal inline CodeFn parse_function_after_name( ModuleFlag mflags , CodeAttributes attributes , CodeSpecifiers specifiers , CodeType ret_type , Token name ) { push_scope(); CodeParam params = parse_params(); // ( ) // TODO(Ed), Review old comment : These have to be kept separate from the return type's specifiers. while ( left && currtok.is_specifier() ) { if ( specifiers.ast == nullptr ) { specifiers = def_specifier( ESpecifier::to_type(currtok) ); eat( currtok.Type ); continue; } specifiers.append( ESpecifier::to_type(currtok) ); eat( currtok.Type ); } // ( ) CodeBody body = NoCode; CodeComment inline_cmt = NoCode; if ( check( TokType::BraceCurly_Open ) ) { body = parse_function_body(); if ( body == Code::Invalid ) { Context.pop(); return CodeInvalid; } // ( ) { } } else if ( check(TokType::Operator) && currtok.Text[0] == '=' ) { eat(TokType::Operator); specifiers.append( ESpecifier::Pure ); eat( TokType::Number); Token stmt_end = currtok; eat( TokType::Statement_End ); // ( ) = 0; if ( currtok_noskip.Type == TokType::Comment && currtok_noskip.Line == stmt_end.Line ) inline_cmt = parse_comment(); // ( ) ; } else { Token stmt_end = currtok; eat( TokType::Statement_End ); // ( ) ; if ( currtok_noskip.Type == TokType::Comment && currtok_noskip.Line == stmt_end.Line ) inline_cmt = parse_comment(); // ( ) ; } using namespace ECode; String name_stripped = String::make( GlobalAllocator, name ); name_stripped.strip_space(); CodeFn result = (CodeFn) make_code(); result->Name = get_cached_string( name_stripped ); result->ModuleFlags = mflags; if ( body ) { switch ( body->Type ) { case Function_Body: case Untyped: break; default: { log_failure("Body must be either of Function_Body or Untyped type, %s\n%s", body.debug_str(), Context.to_string()); Context.pop(); return CodeInvalid; } } result->Type = Function; result->Body = body; } else { result->Type = Function_Fwd; } if ( attributes ) result->Attributes = attributes; if ( specifiers ) result->Specs = specifiers; result->ReturnType = ret_type; if ( params ) result->Params = params; if ( inline_cmt ) result->InlineCmt = inline_cmt; Context.pop(); return result; } internal Code parse_function_body() { using namespace ECode; push_scope(); eat( TokType::BraceCurly_Open ); CodeBody result = (CodeBody) make_code(); result->Type = Function_Body; // TODO : Support actual parsing of function body Token start = currtok_noskip; s32 level = 0; while ( left && ( currtok_noskip.Type != TokType::BraceCurly_Close || level > 0 ) ) { if ( currtok_noskip.Type == TokType::BraceCurly_Open ) level++; else if ( currtok_noskip.Type == TokType::BraceCurly_Close && level > 0 ) level--; eat( currtok_noskip.Type ); } Token previous = prevtok; s32 len = ( (sptr)prevtok.Text + prevtok.Length ) - (sptr)start.Text; if ( len > 0 ) { result.append( def_execution( { len, start.Text } ) ); } eat( TokType::BraceCurly_Close ); Context.pop(); return result; } internal neverinline CodeBody parse_global_nspace( CodeT which ) { using namespace ECode; push_scope(); if ( which != Namespace_Body && which != Global_Body && which != Export_Body && which != Extern_Linkage_Body ) return CodeInvalid; if ( which != Global_Body ) eat( TokType::BraceCurly_Open ); // { CodeBody result = (CodeBody) make_code(); result->Type = which; while ( left && currtok_noskip.Type != TokType::BraceCurly_Close ) { Code member = Code::Invalid; CodeAttributes attributes = { nullptr }; CodeSpecifiers specifiers = { nullptr }; bool expects_function = false; // Context.Scope->Start = currtok_noskip; if ( currtok_noskip.Type == TokType::Preprocess_Hash ) eat( TokType::Preprocess_Hash ); switch ( currtok_noskip.Type ) { case TokType::Statement_End: { // TODO(Ed): Convert this to a general warning procedure log_fmt("Dangling end statement found %S\n", currtok_noskip.to_string()); eat( TokType::Statement_End ); continue; } case TokType::NewLine: // Empty lines are auto skipped by Tokens.current() member = fmt_newline; eat( TokType::NewLine ); break; case TokType::Comment: member = parse_comment(); break; case TokType::Decl_Class: member = parse_complicated_definition( TokType::Decl_Class ); // class break; case TokType::Decl_Enum: member = parse_complicated_definition( TokType::Decl_Enum ); // enum break; case TokType::Decl_Extern_Linkage: if ( which == Extern_Linkage_Body ) log_failure( "Nested extern linkage\n%s", Context.to_string() ); member = parse_extern_link(); // extern "..." { ... } break; case TokType::Decl_Namespace: member = parse_namespace(); // namespace { ... } break; case TokType::Decl_Struct: member = parse_complicated_definition( TokType::Decl_Struct ); // struct ... break; case TokType::Decl_Template: member = parse_template(); // template<...> ... break; case TokType::Decl_Typedef: member = parse_typedef(); // typedef ... break; case TokType::Decl_Union: member = parse_complicated_definition( TokType::Decl_Union ); // union ... break; case TokType::Decl_Using: member = parse_using(); // using ... break; case TokType::Preprocess_Define: member = parse_define(); // #define ... break; case TokType::Preprocess_Include: member = parse_include(); // #include ... break; case TokType::Preprocess_If: case TokType::Preprocess_IfDef: case TokType::Preprocess_IfNotDef: case TokType::Preprocess_ElIf: member = parse_preprocess_cond(); // # ... break; case TokType::Preprocess_Else: member = preprocess_else; eat( TokType::Preprocess_Else ); // #else break; case TokType::Preprocess_EndIf: member = preprocess_endif; eat( TokType::Preprocess_EndIf ); // #endif break; case TokType::Preprocess_Macro: member = parse_simple_preprocess( TokType::Preprocess_Macro ); // break; case TokType::Preprocess_Pragma: member = parse_pragma(); // #pragma ... break; case TokType::Preprocess_Unsupported: member = parse_simple_preprocess( TokType::Preprocess_Unsupported ); // # ... break; case TokType::StaticAssert: member = parse_static_assert(); // static_assert( , ... ); break; case TokType::Module_Export: if ( which == Export_Body ) log_failure( "Nested export declaration\n%s", Context.to_string() ); member = parse_export_body(); // export { ... } break; case TokType::Module_Import: { not_implemented( context ); // import ... } //! Fallthrough intentional case TokType::Attribute_Open: case TokType::Decl_GNU_Attribute: case TokType::Decl_MSVC_Attribute: #define Entry( attribute, str ) case TokType::attribute: GEN_DEFINE_ATTRIBUTE_TOKENS #undef Entry { attributes = parse_attributes(); // } //! Fallthrough intentional case TokType::Spec_Consteval: case TokType::Spec_Constexpr: case TokType::Spec_Constinit: case TokType::Spec_Extern: case TokType::Spec_ForceInline: case TokType::Spec_Global: case TokType::Spec_Inline: case TokType::Spec_Internal_Linkage: case TokType::Spec_NeverInline: case TokType::Spec_Static: case TokType::Spec_ThreadLocal: { SpecifierT specs_found[16] { ESpecifier::NumSpecifiers }; s32 NumSpecifiers = 0; while ( left && currtok.is_specifier() ) { SpecifierT spec = ESpecifier::to_type( currtok ); bool ignore_spec = false; switch ( spec ) { case ESpecifier::Constexpr: case ESpecifier::Constinit: case ESpecifier::ForceInline: case ESpecifier::Global: case ESpecifier::External_Linkage: case ESpecifier::Internal_Linkage: case ESpecifier::Inline: case ESpecifier::Mutable: case ESpecifier::NeverInline: case ESpecifier::Static: case ESpecifier::Volatile: case ESpecifier::Thread_Local: break; case ESpecifier::Consteval: expects_function = true; break; case ESpecifier::Const: ignore_spec = true; break; default: StrC spec_str = ESpecifier::to_str(spec); log_failure( "Invalid specifier %.*s for variable\n%s", spec_str.Len, spec_str, Context.to_string() ); Context.pop(); return CodeInvalid; } if (ignore_spec) break; specs_found[NumSpecifiers] = spec; NumSpecifiers++; eat( currtok.Type ); } if ( NumSpecifiers ) { specifiers = def_specifiers( NumSpecifiers, specs_found ); } // } //! Fallthrough intentional case TokType::Identifier: case TokType::Spec_Const: case TokType::Type_Long: case TokType::Type_Short: case TokType::Type_Signed: case TokType::Type_Unsigned: case TokType::Type_bool: case TokType::Type_char: case TokType::Type_double: case TokType::Type_int: { Code constructor_destructor = parse_global_nspace_constructor_destructor( specifiers ); // Possible constructor implemented at global file scope. if ( constructor_destructor ) { member = constructor_destructor; break; } bool found_operator_cast_outside_class_implmentation = false; s32 idx = Context.Tokens.Idx; for ( ; idx < Context.Tokens.Arr.num(); idx++ ) { Token tok = Context.Tokens[ idx ]; if ( tok.Type == TokType::Identifier ) { idx++; tok = Context.Tokens[ idx ]; if ( tok.Type == TokType::Access_StaticSymbol ) continue; break; } if ( tok.Type == TokType::Decl_Operator ) found_operator_cast_outside_class_implmentation = true; break; } if ( found_operator_cast_outside_class_implmentation ) { member = parse_operator_cast(); // ::operator () { ... } break; } member = parse_operator_function_or_variable( expects_function, attributes, specifiers ); // ... } } if ( member == Code::Invalid ) { log_failure( "Failed to parse member\n%s", Context.to_string() ); Context.pop(); return CodeInvalid; } // log_fmt("Global Body Member: %s", member->debug_str()); result.append( member ); } if ( which != Global_Body ) eat( TokType::BraceCurly_Close ); // { } Context.pop(); return result; } internal inline Code parse_global_nspace_constructor_destructor( CodeSpecifiers specifiers ) { Code result = { nullptr }; /* To check if a definition is for a constructor we can go straight to the opening parenthesis for its parameters From There we work backwards to see if we come across two identifiers with the same name between an member access :: operator, there can be template parameters on the left of the :: so we ignore those. Whats important is that its back to back. This has multiple possible faults. What we parse using this method may not filter out if something has a "return type" This is bad since technically you could have a namespace nested into another namespace with the same name. If this awful pattern is done the only way to distiguish with this coarse parse is to know there is no return type defined. TODO(Ed): We could fix this by attempting to parse a type, but we would have to have a way to have it soft fail and rollback. */ TokArray tokens = Context.Tokens; s32 idx = tokens.Idx; Token nav = tokens[ idx ]; for ( ; idx < tokens.Arr.num(); idx++, nav = tokens[ idx ] ) { if ( nav.Text[0] == '<' ) { // Skip templated expressions as they mey have expressions with the () operators s32 capture_level = 0; s32 template_level = 0; for ( ; idx < tokens.Arr.num(); idx++, nav = tokens[idx] ) { if (nav.Text[ 0 ] == '<') ++ template_level; if (nav.Text[ 0 ] == '>') -- template_level; if (nav.Type == TokType::Operator && nav.Text[1] == '>') -- template_level; if ( nav.Type == ETokType::Capture_Start) { if (template_level != 0 ) ++ capture_level; else break; } if ( template_level != 0 && nav.Type == ETokType::Capture_End) -- capture_level; } } if ( nav.Type == TokType::Capture_Start ) break; } -- idx; Token tok_right = tokens[idx]; Token tok_left = NullToken; if (tok_right.Type != TokType::Identifier) { // We're not dealing with a constructor if there is no identifier right before the opening of a parameter's scope. return result; } -- idx; tok_left = tokens[idx]; // ... bool possible_destructor = false; if ( tok_left.Type == TokType::Operator && tok_left.Text[0] == '~') { possible_destructor = true; -- idx; tok_left = tokens[idx]; } if ( tok_left.Type != TokType::Access_StaticSymbol ) return result; -- idx; tok_left = tokens[idx]; // ... :: // We search toward the left until we find the next valid identifier s32 capture_level = 0; s32 template_level = 0; while ( idx != tokens.Idx ) { if (tok_left.Text[ 0 ] == '<') ++ template_level; if (tok_left.Text[ 0 ] == '>') -- template_level; if (tok_left.Type == TokType::Operator && tok_left.Text[1] == '>') -- template_level; if ( template_level != 0 && tok_left.Type == ETokType::Capture_Start) ++ capture_level; if ( template_level != 0 && tok_left.Type == ETokType::Capture_End) -- capture_level; if ( capture_level == 0 && template_level == 0 && tok_left.Type == TokType::Identifier ) break; -- idx; tok_left = tokens[idx]; } bool is_same = str_compare( tok_right.Text, tok_left.Text, tok_right.Length ) == 0; if (tok_left.Type == TokType::Identifier && is_same) { // We have found the pattern we desired if (possible_destructor) { // :: ~ ( result = parse_destructor( specifiers ); } else { // :: ( result = parse_constructor( specifiers ); } } return result; } // TODO(Ed): I want to eventually change the identifier to its own AST type. // This would allow distinction of the qualifier for a symbol :: // This would also allow internal Token parse_identifier( bool* possible_member_function ) { push_scope(); Token name = currtok; Context.Scope->Name = name; eat( TokType::Identifier ); // parse_template_args( name ); //