From bfbfae466f2e75012796e2c287a3f3e80ebd8eea Mon Sep 17 00:00:00 2001 From: Ed_ Date: Sun, 30 Jul 2023 01:21:04 -0400 Subject: [PATCH 1/9] Naive preprocessor support initial implementation (compiles and runs, not heavily tested) --- Readme.md | 6 - docs/Parsing.md | 6 +- docs/Readme.md | 1 + docs/Upfront.md | 0 gencpp.10x | 8 +- project/components/ast.cpp | 36 +- project/components/data_structures.hpp | 60 ++++ project/components/ecode.hpp | 10 +- project/components/etoktype.cpp | 12 +- project/components/header_end.hpp | 88 ++--- project/components/interface.cpp | 98 +++--- project/components/interface.hpp | 8 +- project/components/interface.parsing.cpp | 314 +++++++++++++++--- .../components/interface.upfront.bodies.cpp | 0 project/components/interface.upfront.cpp | 69 +++- project/components/static_data.cpp | 73 ++-- project/components/types.hpp | 34 +- project/enums/ECode.csv | 10 +- project/enums/ETokType.csv | 7 +- 19 files changed, 641 insertions(+), 199 deletions(-) create mode 100644 docs/Upfront.md create mode 100644 project/components/interface.upfront.bodies.cpp diff --git a/Readme.md b/Readme.md index f1aab97..c6db963 100644 --- a/Readme.md +++ b/Readme.md @@ -8,12 +8,6 @@ These build up a code AST to then serialize with a file builder. This code base attempts follow the [handmade philosophy](https://handmade.network/manifesto), its not meant to be a black box metaprogramming utility, its meant for the user to extend for their project domain. -### TOC - -* [Notes](#notes) -* [Usage](#usage) -* [Building](#building) - ## Notes The project has reached an *alpha* state, all the current functionality works for the test cases but it will most likely break in many other cases. diff --git a/docs/Parsing.md b/docs/Parsing.md index e3e23e0..7f81d66 100644 --- a/docs/Parsing.md +++ b/docs/Parsing.md @@ -5,8 +5,7 @@ This parser does not, and should not do the compiler's job. By only supporting t Everything is done in one pass for both the preprocessor directives and the rest of the language. -The parser performs no macro expansion as the scope of gencpp feature-set is to only support the preprocessor for the goal of having rudimentary awareness of preprocessor ***conditionals***, ***defines***, and ***includes***. -*(Conditionals and defines are a TODO)* +The parser performs no macro expansion as the scope of gencpp feature-set is to only support the preprocessor for the goal of having rudimentary awareness of preprocessor ***conditionals***, ***defines***, and ***includes***, and ***`pragmas`**. The keywords supported for the preprocessor are: @@ -17,8 +16,9 @@ The keywords supported for the preprocessor are: * elif * endif * undef +* pragma -Just like with actual preprocessor, each directive # line is considered one preproecessor unit, and will be treated as one Preprocessor AST. *These ASTs will be considered members or entries of braced scope they reside within*. +Each directive `#` line is considered one preproecessor unit, and will be treated as one Preprocessor AST. *These ASTs will be considered members or entries of braced scope they reside within*. All keywords except *include* are suppported as members of a scope for a class/struct, global, or namespace body. Any preprocessor definition abuse that changes the syntax of the core language is unsupported and will fail to parse if not kept within an execution scope (function body, or expression assignment). diff --git a/docs/Readme.md b/docs/Readme.md index 1787d75..145a2e8 100644 --- a/docs/Readme.md +++ b/docs/Readme.md @@ -42,6 +42,7 @@ Otherwise the library is free of any templates. * Execution statement validation : Execution expressions are defined using the untyped AST. * Lambdas (This naturally means its unsupported) +* Non-trivial template validation support. * RAII : This needs support for constructors/destructor parsing * Haven't gotten around to yet (its in the github issues) diff --git a/docs/Upfront.md b/docs/Upfront.md new file mode 100644 index 0000000..e69de29 diff --git a/gencpp.10x b/gencpp.10x index 8fba690..8cb3c94 100644 --- a/gencpp.10x +++ b/gencpp.10x @@ -8,8 +8,8 @@ true false false - powershell ./scripts/build.ps1 - powershell ./scripts/rebuild.ps1 + powershell ./scripts/test.gen_run.ps1 + powershell ./scripts/clean.ps1 @@ -38,7 +38,9 @@ C:\Program Files (x86)\Windows Kits\10\\include\10.0.19041.0\\cppwinrt C:\Program Files (x86)\Windows Kits\NETFXSDK\4.8\include\um - + + GEN_TIME + Debug:x64 diff --git a/project/components/ast.cpp b/project/components/ast.cpp index 846af4d..c90d58f 100644 --- a/project/components/ast.cpp +++ b/project/components/ast.cpp @@ -489,8 +489,40 @@ String AST::to_string() } break; - case Preprocessor_Include: - result.append_fmt( "#include \"%s\"", Name ); + case Preprocess_Define: + result.append_fmt( "#define %s %s", Name, Content ); + break; + + case Preprocess_If: + result.append_fmt( "#if %s", Content ); + break; + + case Preprocess_IfDef: + result.append_fmt( "#ifdef %s", Content ); + break; + + case Preprocess_IfNotDef: + result.append_fmt( "#ifndef %s", Content ); + break; + + case Preprocess_Include: + result.append_fmt( "#include \"%s\"", Content ); + break; + + case Preprocess_ElIf: + result.append_fmt( "#elif %s", Content ); + break; + + case Preprocess_Else: + result.append_fmt( "#else" ); + break; + + case Preprocess_EndIf: + result.append_fmt( "#endif" ); + break; + + case Preprocess_Pragma: + result.append_fmt( "#pragma %s", Content ); break; case Specifiers: diff --git a/project/components/data_structures.hpp b/project/components/data_structures.hpp index a70033b..4270583 100644 --- a/project/components/data_structures.hpp +++ b/project/components/data_structures.hpp @@ -12,6 +12,7 @@ struct AST_Body; struct AST_Attributes; struct AST_Comment; struct AST_Class; +struct AST_Define; struct AST_Enum; struct AST_Exec; struct AST_Extern; @@ -23,6 +24,8 @@ struct AST_Namespace; struct AST_Operator; struct AST_OpCast; struct AST_Param; +struct AST_Pragma; +struct AST_PreprocessCond; struct AST_Specifiers; struct AST_Struct; struct AST_Template; @@ -38,6 +41,7 @@ struct CodeBody; struct CodeAttributes; struct CodeComment; struct CodeClass; +struct CodeDefine; struct CodeEnum; struct CodeExec; struct CodeExtern; @@ -49,6 +53,8 @@ struct CodeNamespace; struct CodeOperator; struct CodeOpCast; struct CodeParam; +struct CodePreprocessCond; +struct CodePragma; struct CodeSpecifiers; struct CodeStruct; struct CodeTemplate; @@ -115,6 +121,7 @@ struct Code operator CodeAttributes() const; operator CodeComment() const; operator CodeClass() const; + operator CodeDefine() const; operator CodeExec() const; operator CodeEnum() const; operator CodeExtern() const; @@ -126,6 +133,8 @@ struct Code operator CodeOperator() const; operator CodeOpCast() const; operator CodeParam() const; + operator CodePragma() const; + operator CodePreprocessCond() const; operator CodeSpecifiers() const; operator CodeStruct() const; operator CodeTemplate() const; @@ -175,6 +184,7 @@ struct AST operator CodeAttributes(); operator CodeComment(); operator CodeClass(); + operator CodeDefine(); operator CodeEnum(); operator CodeExec(); operator CodeExtern(); @@ -186,6 +196,8 @@ struct AST operator CodeOperator(); operator CodeOpCast(); operator CodeParam(); + operator CodePragma(); + operator CodePreprocessCond(); operator CodeSpecifiers(); operator CodeStruct(); operator CodeTemplate(); @@ -378,6 +390,7 @@ struct CodeBody Define_CodeType( Attributes ); Define_CodeType( Comment ); +Define_CodeType( Define ); Define_CodeType( Enum ); Define_CodeType( Exec ); Define_CodeType( Extern ); @@ -388,6 +401,8 @@ Define_CodeType( Module ); Define_CodeType( Namespace ); Define_CodeType( Operator ); Define_CodeType( OpCast ); +Define_CodeType( Pragma ); +Define_CodeType( PreprocessCond ); Define_CodeType( Template ); Define_CodeType( Type ); Define_CodeType( Typedef ); @@ -631,6 +646,21 @@ struct AST_Class }; static_assert( sizeof(AST_Class) == sizeof(AST), "ERROR: AST_Class is not the same size as AST"); +struct AST_Define +{ + union { + char _PAD_[ sizeof(SpecifierT) * AST::ArrSpecs_Cap ]; + StringCached Content; + }; + Code Prev; + Code Next; + Code Parent; + StringCached Name; + CodeT Type; + char _PAD_UNUSED_[ sizeof(ModuleFlag) + sizeof(u32) ]; +}; +static_assert( sizeof(AST_Define) == sizeof(AST), "ERROR: AST_Define is not the same size as AST"); + struct AST_Enum { union { @@ -847,6 +877,36 @@ struct AST_Param }; static_assert( sizeof(AST_Param) == sizeof(AST), "ERROR: AST_Param is not the same size as AST"); +struct AST_Pragma +{ + union { + char _PAD_[ sizeof(SpecifierT) * AST::ArrSpecs_Cap ]; + StringCached Content; + }; + Code Prev; + Code Next; + Code Parent; + StringCached Name; + CodeT Type; + char _PAD_UNUSED_[ sizeof(ModuleFlag) + sizeof(u32) ]; +}; +static_assert( sizeof(AST_Pragma) == sizeof(AST), "ERROR: AST_Pragma is not the same size as AST"); + +struct AST_PreprocessCond +{ + union { + char _PAD_[ sizeof(SpecifierT) * AST::ArrSpecs_Cap ]; + StringCached Content; + }; + Code Prev; + Code Next; + Code Parent; + StringCached Name; + CodeT Type; + char _PAD_UNUSED_[ sizeof(ModuleFlag) + sizeof(u32) ]; +}; +static_assert( sizeof(AST_PreprocessCond) == sizeof(AST), "ERROR: AST_PreprocessCond is not the same size as AST"); + struct AST_Specifiers { SpecifierT ArrSpecs[ AST::ArrSpecs_Cap ]; diff --git a/project/components/ecode.hpp b/project/components/ecode.hpp index 80a2bd3..8422e77 100644 --- a/project/components/ecode.hpp +++ b/project/components/ecode.hpp @@ -37,7 +37,15 @@ namespace ECode Entry( Operator_Cast ) \ Entry( Operator_Cast_Fwd ) \ Entry( Parameters ) \ - Entry( Preprocessor_Include ) \ + Entry( Preprocess_Define ) \ + Entry( Preprocess_If ) \ + Entry( Preprocess_IfDef ) \ + Entry( Preprocess_IfNotDef ) \ + Entry( Preprocess_ElIf ) \ + Entry( Preprocess_Else ) \ + Entry( Preprocess_EndIf ) \ + Entry( Preprocess_Include ) \ + Entry( Preprocess_Pragma ) \ Entry( Specifiers ) \ Entry( Struct ) \ Entry( Struct_Fwd ) \ diff --git a/project/components/etoktype.cpp b/project/components/etoktype.cpp index adaea66..ac85155 100644 --- a/project/components/etoktype.cpp +++ b/project/components/etoktype.cpp @@ -15,7 +15,7 @@ namespace Parser Entry( API_Import, "GEN_API_Import_Code" ) #endif -# define Define_TokType \ +# define Define_TokType \ Entry( Invalid, "INVALID" ) \ Entry( Access_Private, "private" ) \ Entry( Access_Protected, "protected" ) \ @@ -37,7 +37,7 @@ namespace Parser Entry( Char, "character" ) \ Entry( Comma, "," ) \ Entry( Decl_Class, "class" ) \ - Entry( Decl_GNU_Attribute, "__attribute__" ) \ + Entry( Decl_GNU_Attribute, "__attribute__" ) \ Entry( Decl_MSVC_Attribute, "__declspec" ) \ Entry( Decl_Enum, "enum" ) \ Entry( Decl_Extern_Linkage, "extern" ) \ @@ -56,11 +56,15 @@ namespace Parser Entry( Number, "number" ) \ Entry( Operator, "operator" ) \ Entry( Preprocess_Define, "#define") \ - Entry( Preproces_Include, "include" ) \ + Entry( Preprocess_Include, "include" ) \ Entry( Preprocess_If, "#if") \ - Entry( Preprocess_Elif, "#elif") \ + Entry( Preprocess_IfDef, "#if") \ + Entry( Preprocess_IfNotDef, "#ifndef") \ + Entry( Preprocess_ElIf, "#elif") \ Entry( Preprocess_Else, "#else") \ Entry( Preprocess_EndIf, "#endif") \ + Entry( Preprocess_Pragma, "#pragma") \ + Entry( Preprocess_Content, "macro content") \ Entry( Spec_Alignas, "alignas" ) \ Entry( Spec_Const, "const" ) \ Entry( Spec_Consteval, "consteval" ) \ diff --git a/project/components/header_end.hpp b/project/components/header_end.hpp index d40961d..5921178 100644 --- a/project/components/header_end.hpp +++ b/project/components/header_end.hpp @@ -162,6 +162,7 @@ Define_CodeImpl( CodeBody ); Define_CodeImpl( CodeAttributes ); Define_CodeImpl( CodeComment ); Define_CodeImpl( CodeClass ); +Define_CodeImpl( CodeDefine ); Define_CodeImpl( CodeEnum ); Define_CodeImpl( CodeExec ); Define_CodeImpl( CodeExtern ); @@ -173,6 +174,8 @@ Define_CodeImpl( CodeNamespace ); Define_CodeImpl( CodeOperator ); Define_CodeImpl( CodeOpCast ); Define_CodeImpl( CodeParam ); +Define_CodeImpl( CodePragma ); +Define_CodeImpl( CodePreprocessCond ); Define_CodeImpl( CodeSpecifiers ); Define_CodeImpl( CodeStruct ); Define_CodeImpl( CodeTemplate ); @@ -193,6 +196,7 @@ Define_AST_Cast( Body ); Define_AST_Cast( Attributes ); Define_AST_Cast( Comment ); Define_AST_Cast( Class ); +Define_AST_Cast( Define ); Define_AST_Cast( Enum ); Define_AST_Cast( Exec ); Define_AST_Cast( Extern ); @@ -204,6 +208,8 @@ Define_AST_Cast( Namespace ); Define_AST_Cast( Operator ); Define_AST_Cast( OpCast ); Define_AST_Cast( Param ); +Define_AST_Cast( Pragma ); +Define_AST_Cast( PreprocessCond ); Define_AST_Cast( Struct ); Define_AST_Cast( Specifiers ); Define_AST_Cast( Template ); @@ -223,6 +229,7 @@ Code::operator Code ## type() const \ Define_CodeCast( Attributes ); Define_CodeCast( Comment ); Define_CodeCast( Class ); +Define_CodeCast( Define ); Define_CodeCast( Exec ); Define_CodeCast( Enum ); Define_CodeCast( Extern ); @@ -234,6 +241,8 @@ Define_CodeCast( Namespace ); Define_CodeCast( Operator ); Define_CodeCast( OpCast ); Define_CodeCast( Param ); +Define_CodeCast( Pragma ); +Define_CodeCast( PreprocessCond ); Define_CodeCast( Specifiers ); Define_CodeCast( Struct ); Define_CodeCast( Template ); @@ -368,28 +377,6 @@ StrC token_fmt_impl( sw num, ... ) #pragma region Constants -#ifdef GEN_DEFINE_LIBRARY_CODE_CONSTANTS - // Predefined typename codes. Are set to readonly and are setup during gen::init() - - extern CodeType t_b32; - - extern CodeType t_s8; - extern CodeType t_s16; - extern CodeType t_s32; - extern CodeType t_s64; - - extern CodeType t_u8; - extern CodeType t_u16; - extern CodeType t_u32; - extern CodeType t_u64; - - extern CodeType t_sw; - extern CodeType t_uw; - - extern CodeType t_f32; - extern CodeType t_f64; -#endif - #ifndef GEN_GLOBAL_BUCKET_SIZE # define GEN_GLOBAL_BUCKET_SIZE megabytes(10) #endif @@ -437,29 +424,22 @@ constexpr s32 TokenFmt_TokenMap_MemSize = GEN_TOKEN_FMT_TOKEN_MAP_MEM_SIZE; constexpr s32 LexAllocator_Size = GEN_LEX_ALLOCATOR_SIZE; constexpr s32 Builder_StrBufferReserve = GEN_BUILDER_STR_BUFFER_RESERVE; -extern CodeType t_empty; // Used with varaidc parameters. (Exposing just in case its useful for another circumstance) -extern CodeType t_auto; -extern CodeType t_void; -extern CodeType t_int; -extern CodeType t_bool; -extern CodeType t_char; -extern CodeType t_wchar_t; -extern CodeType t_class; -extern CodeType t_typename; - -extern CodeParam param_varadic; - -extern CodeAttributes attrib_api_export; -extern CodeAttributes attrib_api_import; - extern Code access_public; extern Code access_protected; extern Code access_private; +extern CodeAttributes attrib_api_export; +extern CodeAttributes attrib_api_import; + extern Code module_global_fragment; extern Code module_private_fragment; -extern Code pragma_once; +extern CodePragma pragma_once; + +extern CodeParam param_varadic; + +extern CodePreprocessCond preprocess_else; +extern CodePreprocessCond preprocess_endif; extern CodeSpecifiers spec_const; extern CodeSpecifiers spec_consteval; @@ -482,6 +462,38 @@ extern CodeSpecifiers spec_thread_local; extern CodeSpecifiers spec_virtual; extern CodeSpecifiers spec_volatile; +extern CodeType t_empty; // Used with varaidc parameters. (Exposing just in case its useful for another circumstance) +extern CodeType t_auto; +extern CodeType t_void; +extern CodeType t_int; +extern CodeType t_bool; +extern CodeType t_char; +extern CodeType t_wchar_t; +extern CodeType t_class; +extern CodeType t_typename; + +#ifdef GEN_DEFINE_LIBRARY_CODE_CONSTANTS + // Predefined typename codes. Are set to readonly and are setup during gen::init() + + extern CodeType t_b32; + + extern CodeType t_s8; + extern CodeType t_s16; + extern CodeType t_s32; + extern CodeType t_s64; + + extern CodeType t_u8; + extern CodeType t_u16; + extern CodeType t_u32; + extern CodeType t_u64; + + extern CodeType t_sw; + extern CodeType t_uw; + + extern CodeType t_f32; + extern CodeType t_f64; +#endif + #pragma endregion Constants #pragma region Macros diff --git a/project/components/interface.cpp b/project/components/interface.cpp index d1e7d56..8e948bd 100644 --- a/project/components/interface.cpp +++ b/project/components/interface.cpp @@ -71,6 +71,59 @@ void define_constants() Code::Invalid = make_code(); Code::Invalid.set_global(); + access_private = make_code(); + access_private->Type = ECode::Access_Private; + access_private->Name = get_cached_string( txt_StrC("private:") ); + access_private.set_global(); + + access_protected = make_code(); + access_protected->Type = ECode::Access_Protected; + access_protected->Name = get_cached_string( txt_StrC("protected:") ); + access_protected.set_global(); + + access_public = make_code(); + access_public->Type = ECode::Access_Public; + access_public->Name = get_cached_string( txt_StrC("public:") ); + access_public.set_global(); + + attrib_api_export = def_attributes( code(GEN_API_Export_Code)); + attrib_api_export.set_global(); + + attrib_api_import = def_attributes( code(GEN_API_Import_Code)); + attrib_api_import.set_global(); + + module_global_fragment = make_code(); + module_global_fragment->Type = ECode::Untyped; + module_global_fragment->Name = get_cached_string( txt_StrC("module;") ); + module_global_fragment->Content = module_global_fragment->Name; + module_global_fragment.set_global(); + + module_private_fragment = make_code(); + module_private_fragment->Type = ECode::Untyped; + module_private_fragment->Name = get_cached_string( txt_StrC("module : private;") ); + module_private_fragment->Content = module_private_fragment->Name; + module_private_fragment.set_global(); + + pragma_once = (CodePragma) make_code(); + pragma_once->Type = ECode::Untyped; + pragma_once->Name = get_cached_string( txt_StrC("once") ); + pragma_once->Content = pragma_once->Name; + pragma_once.set_global(); + + param_varadic = (CodeType) make_code(); + param_varadic->Type = ECode::Parameters; + param_varadic->Name = get_cached_string( txt_StrC("...") ); + param_varadic->ValueType = t_empty; + param_varadic.set_global(); + + preprocess_else = (CodePreprocessCond) make_code(); + preprocess_else->Type = ECode::Preprocess_Else; + preprocess_else.set_global(); + + preprocess_endif = (CodePreprocessCond) make_code(); + preprocess_endif->Type = ECode::Preprocess_EndIf; + preprocess_endif.set_global(); + # define def_constant_code_type( Type_ ) \ t_##Type_ = def_type( name(Type_) ); \ t_##Type_.set_global(); @@ -110,51 +163,6 @@ void define_constants() t_empty->Name = get_cached_string( txt_StrC("") ); t_empty.set_global(); - param_varadic = (CodeType) make_code(); - param_varadic->Type = ECode::Parameters; - param_varadic->Name = get_cached_string( txt_StrC("...") ); - param_varadic->ValueType = t_empty; - param_varadic.set_global(); - - attrib_api_export = def_attributes( code(GEN_API_Export_Code)); - attrib_api_export.set_global(); - - attrib_api_import = def_attributes( code(GEN_API_Import_Code)); - attrib_api_import.set_global(); - - access_private = make_code(); - access_private->Type = ECode::Access_Private; - access_private->Name = get_cached_string( txt_StrC("private:") ); - access_private.set_global(); - - access_protected = make_code(); - access_protected->Type = ECode::Access_Protected; - access_protected->Name = get_cached_string( txt_StrC("protected:") ); - access_protected.set_global(); - - access_public = make_code(); - access_public->Type = ECode::Access_Public; - access_public->Name = get_cached_string( txt_StrC("public:") ); - access_public.set_global(); - - module_global_fragment = make_code(); - module_global_fragment->Type = ECode::Untyped; - module_global_fragment->Name = get_cached_string( txt_StrC("module;") ); - module_global_fragment->Content = module_global_fragment->Name; - module_global_fragment.set_global(); - - module_private_fragment = make_code(); - module_private_fragment->Type = ECode::Untyped; - module_private_fragment->Name = get_cached_string( txt_StrC("module : private;") ); - module_private_fragment->Content = module_private_fragment->Name; - module_private_fragment.set_global(); - - pragma_once = make_code(); - pragma_once->Type = ECode::Untyped; - pragma_once->Name = get_cached_string( txt_StrC("#pragma once") ); - pragma_once->Content = pragma_once->Name; - pragma_once.set_global(); - # pragma push_macro( "global" ) # pragma push_macro( "internal" ) # pragma push_macro( "local_persist" ) diff --git a/project/components/interface.hpp b/project/components/interface.hpp index b1fb94a..0256b02 100644 --- a/project/components/interface.hpp +++ b/project/components/interface.hpp @@ -45,6 +45,8 @@ CodeClass def_class( StrC name , ModuleFlag mflags = ModuleFlag::None , CodeType* interfaces = nullptr, s32 num_interfaces = 0 ); +CodeDefine def_define( StrC name, StrC content ); + CodeEnum def_enum( StrC name , Code body = NoCode, CodeType type = NoCode , EnumT specifier = EnumRegular, CodeAttributes attributes = NoCode @@ -70,7 +72,11 @@ CodeOperator def_operator( OperatorT op CodeOpCast def_operator_cast( CodeType type, Code body = NoCode, CodeSpecifiers specs = NoCode ); -CodeParam def_param ( CodeType type, StrC name, Code value = NoCode ); +CodeParam def_param ( CodeType type, StrC name, Code value = NoCode ); +CodePragma def_pragma( StrC directive ); + +CodePreprocessCond def_preprocess_cond( EPreprocessCond type, StrC content ); + CodeSpecifiers def_specifier( SpecifierT specifier ); CodeStruct def_struct( StrC name diff --git a/project/components/interface.parsing.cpp b/project/components/interface.parsing.cpp index 22b46cd..99ddfc9 100644 --- a/project/components/interface.parsing.cpp +++ b/project/components/interface.parsing.cpp @@ -36,7 +36,12 @@ namespace Parser bool is_preprocessor() { - return Type >= TokType::Preprocess_Define && Type <= TokType::Preprocess_EndIf; + return Type >= TokType::Preprocess_Define && Type <= TokType::Preprocess_Pragma; + } + + bool is_preprocess_cond() + { + return Type >= TokType::Preprocess_If && Type <= TokType::Preprocess_EndIf; } bool is_specifier() @@ -163,7 +168,13 @@ namespace Parser { String token_str = String::make( GlobalAllocator, { Arr[Idx].Length, Arr[Idx].Text } ); - log_failure( "Parse Error, TokArray::eat, Expected: %s, not '%s' (%d, %d)`\n%s", ETokType::to_str(type), token_str, current().Line, current().Column, Context.to_string() ); + log_failure( "Parse Error, TokArray::eat, Expected: %s, not '%s' (%d, %d)`\n%s" + , ETokType::to_str(type) + , token_str + , current().Line + , current().Column + , Context.to_string() + ); return false; } @@ -177,7 +188,7 @@ namespace Parser IsAssign = bit(0), }; - TokArray lex( StrC content, bool keep_preprocess_directives = true ) + TokArray lex( StrC content ) { # define current ( * scanner ) @@ -202,17 +213,6 @@ namespace Parser move_forward(); \ } - # define SkipWhitespace_Checked( Context_, Msg_, ... ) \ - while ( left && char_is_space( current ) ) \ - { \ - move_forward(); \ - } \ - if ( left <= 0 ) \ - { \ - log_failure( "gen::" txt(Context_) ": " Msg_, __VA_ARGS__ ); \ - return { 0, nullptr }; \ - } - local_persist thread_local Array Tokens = { nullptr }; @@ -249,34 +249,93 @@ namespace Parser switch ( current ) { - // TODO : Need to handle the preprocessor as a separate pass. case '#': + { token.Text = scanner; token.Length = 1; move_forward(); - while (left && current != '\n' ) + while (left && current != ' ' ) { - if ( token.Type == ETokType::Invalid && current == ' ' ) - { - token.Type = ETokType::to_type( token ); - } - - if ( current == '\\' ) - { - move_forward(); - - if ( current != '\n' && keep_preprocess_directives ) - { - log_failure( "gen::lex: invalid preprocessor directive, will still grab but will not compile %s", token.Text ); - } - } - move_forward(); token.Length++; } - goto FoundToken; + token.Type = ETokType::to_type( token ); + Tokens.append( token ); + + Token content = { scanner, 0, TokType::Preprocess_Content, false, line, column }; + + if ( token.Type == TokType::Preprocess_Include ) + { + content.Type = TokType::String; + + SkipWhitespace(); + if ( current != '"' ) + { + log_failure( "gen::Parser::lex: Expected '\"' after #include, not '%c' (%d, %d)\n%s" + , current + , token.Line + , token.Column + , Context.to_string() + ); + return { { nullptr }, 0 }; + } + + while ( left && current != '"' ) + { + move_forward(); + content.Length++; + } + + move_forward(); + content.Length++; + + Tokens.append( content ); + continue; // Skip found token, its all handled here. + } + + while ( left ) + { + if ( current == '\\' ) + { + move_forward(); + content.Length++; + + if ( current == '\n' ) + { + move_forward(); + content.Length++; + continue; + } + else + { + String directive_str = String::fmt_buf( GlobalAllocator, "%s", token.Text, token.Length ); + String content_str = String::fmt_buf( GlobalAllocator, "%s", content, min( 40, left + content.Length ) ); + + log_failure( "gen::Parser::lex: Invalid escape sequence '\\%c' (%d, %d)" + " in preprocessor directive '%.*s' (%d, %d)\n" + "will continue parsing, but compiliation will fail (if using non-fatal failures).\n" + , content_str, line, column + , directive_str, token.Line, token.Column ); + break; + } + } + + if ( current == '\n' ) + { + move_forward(); + content.Length++; + break; + } + + move_forward(); + content.Length++; + } + + Tokens.append( content ); + continue; // Skip found token, its all handled here. + } case '.': token.Text = scanner; token.Length = 1; @@ -297,7 +356,9 @@ namespace Parser } else { - log_failure( "gen::lex: invalid varadic argument, expected '...' got '..%c'", current ); + String context_str = String::fmt_buf( GlobalAllocator, "%s", scanner, min( 100, left ) ); + + log_failure( "gen::lex: invalid varadic argument, expected '...' got '..%c' (%d, %d)", context_str, line, column ); } } @@ -669,7 +730,7 @@ namespace Parser { String context_str = String::fmt_buf( GlobalAllocator, "%s", scanner, min( 100, left ) ); - log_failure( "Failed to lex token %s", context_str ); + log_failure( "Failed to lex token %s (%d, %d)", context_str, line, column ); // Skip to next whitespace since we can't know if anything else is valid until then. while ( left && ! char_is_space( current ) ) @@ -682,9 +743,6 @@ namespace Parser if ( token.Type != TokType::Invalid ) { - if ( token.is_preprocessor() && keep_preprocess_directives == false ) - continue; - Tokens.append( token ); continue; } @@ -708,22 +766,21 @@ namespace Parser # undef current # undef move_forward # undef SkipWhitespace - # undef SkipWhitespace_Checked } } #pragma region Helper Macros -# define check_parse_args( def ) \ -if ( def.Len <= 0 ) \ -{ \ +# define check_parse_args( def ) \ +if ( def.Len <= 0 ) \ +{ \ log_failure( "gen::" stringize(__func__) ": length must greater than 0" ); \ - return CodeInvalid; \ -} \ -if ( def.Ptr == nullptr ) \ -{ \ + return CodeInvalid; \ +} \ +if ( def.Ptr == nullptr ) \ +{ \ log_failure( "gen::" stringize(__func__) ": def was null" ); \ - return CodeInvalid; \ + return CodeInvalid; \ } # define nexttok Context.Tokens.next() @@ -734,7 +791,7 @@ if ( def.Ptr == nullptr ) # define check( Type_ ) ( left && currtok.Type == Type_ ) -# define push_scope() \ +# define push_scope() \ StackNode scope { nullptr, currtok, NullToken, txt_StrC( __func__ ) }; \ Context.push( & scope ) @@ -760,6 +817,115 @@ internal CodeTypedef parse_typedef (); internal CodeUnion parse_union (); internal CodeUsing parse_using (); +internal inline +CodeDefine parse_define() +{ + using namespace Parser; + push_scope(); + + eat( TokType::Preprocess_Define ); + + CodeDefine + define = (CodeDefine) make_code(); + define->Type = ECode::Preprocess_Define; + + if ( ! check( TokType::Identifier ) ) + { + log_failure( "Error, expected identifier after #define\n%s", Context.to_string() ); + return CodeInvalid; + } + + define->Name = get_cached_string( currtok ); + eat( TokType::Identifier ); + + if ( ! check( TokType::Preprocess_Content )) + { + log_failure( "Error, expected content after #define %s\n%s", define->Name, Context.to_string() ); + return CodeInvalid; + } + + define->Content = get_cached_string( currtok ); + eat( TokType::Preprocess_Content ); + + Context.pop(); + return define; +} + +internal inline +CodePreprocessCond parse_preprocess_cond() +{ + using namespace Parser; + push_scope(); + + if ( ! currtok.is_preprocess_cond() ) + { + log_failure( "Error, expected preprocess conditional\n%s", Context.to_string() ); + return CodeInvalid; + } + + CodePreprocessCond + cond = (CodePreprocessCond) make_code(); + cond->Type = scast(CodeT, currtok.Type - (ETokType::Preprocess_If - ECode::Preprocess_If) ); + eat( currtok.Type ); + + if ( ! check( TokType::Preprocess_Content )) + { + log_failure( "Error, expected content after #define\n%s", Context.to_string() ); + return CodeInvalid; + } + + cond->Content = get_cached_string( currtok ); + eat( TokType::Preprocess_Content ); + + Context.pop(); + return cond; +} + +internal inline +CodeInclude parse_include() +{ + using namespace Parser; + push_scope(); + + CodeInclude + include = (CodeInclude) make_code(); + include->Type = ECode::Preprocess_Include; + + if ( ! check( TokType::String )) + { + log_failure( "Error, expected include string after #include\n%s", Context.to_string() ); + return CodeInvalid; + } + include->Content = get_cached_string( currtok ); + eat( TokType::String ); + + Context.pop(); + return include; +} + +internal inline +CodePragma parse_pragma() +{ + using namespace Parser; + push_scope(); + + CodePragma + pragma = (CodePragma) make_code(); + pragma->Type = ECode::Preprocess_Pragma; + + if ( ! check( TokType::Preprocess_Content )) + { + log_failure( "Error, expected content after #define\n%s", Context.to_string() ); + return CodeInvalid; + } + + pragma->Content = get_cached_string( currtok ); + eat( TokType::Preprocess_Content ); + + Context.pop(); + return pragma; +} + internal inline Code parse_array_decl() { @@ -1677,6 +1843,33 @@ CodeBody parse_class_struct_body( Parser::TokType which ) member = parse_using(); break; + case TokType::Preprocess_Define: + member = parse_define(); + break; + + case TokType::Preprocess_Include: + member = parse_include(); + break; + + case TokType::Preprocess_If: + case TokType::Preprocess_IfDef: + case TokType::Preprocess_IfNotDef: + case TokType::Preprocess_ElIf: + member = parse_preprocess_cond(); + break; + + case TokType::Preprocess_Pragma: + member = parse_pragma(); + break; + + case TokType::Preprocess_Else: + member = preprocess_else; + break; + + case TokType::Preprocess_EndIf: + member = preprocess_endif; + break; + case TokType::Attribute_Open: case TokType::Decl_GNU_Attribute: case TokType::Decl_MSVC_Attribute: @@ -1966,6 +2159,33 @@ CodeBody parse_global_nspace( CodeT which ) member = parse_using(); break; + case TokType::Preprocess_Define: + member = parse_define(); + break; + + case TokType::Preprocess_Include: + member = parse_include(); + break; + + case TokType::Preprocess_If: + case TokType::Preprocess_IfDef: + case TokType::Preprocess_IfNotDef: + case TokType::Preprocess_ElIf: + member = parse_preprocess_cond(); + break; + + case TokType::Preprocess_Pragma: + member = parse_pragma(); + break; + + case TokType::Preprocess_Else: + member = preprocess_else; + break; + + case TokType::Preprocess_EndIf: + member = preprocess_endif; + break; + case TokType::Module_Export: if ( which == Export_Body ) log_failure( "Nested export declaration\n%s", Context.to_string() ); diff --git a/project/components/interface.upfront.bodies.cpp b/project/components/interface.upfront.bodies.cpp new file mode 100644 index 0000000..e69de29 diff --git a/project/components/interface.upfront.cpp b/project/components/interface.upfront.cpp index 8245157..145ef8f 100644 --- a/project/components/interface.upfront.cpp +++ b/project/components/interface.upfront.cpp @@ -489,6 +489,26 @@ CodeClass def_class( StrC name return result; } +CodeDefine def_define( StrC name, StrC content ) +{ + using namespace ECode; + + name_check( def_define, name ); + + if ( content.Len <= 0 || content.Ptr == nullptr ) + { + log_failure( "gen::def_define: Invalid value provided" ); + return CodeInvalid; + } + + CodeDefine + result = (CodeDefine) make_code(); + result->Name = get_cached_string( name ); + result->Content = get_cached_string( content ); + + return result; +} + CodeEnum def_enum( StrC name , Code body, CodeType type , EnumT specifier, CodeAttributes attributes @@ -719,7 +739,7 @@ CodeInclude def_include ( StrC path ) Code result = make_code(); - result->Type = ECode::Preprocessor_Include; + result->Type = ECode::Preprocess_Include; result->Name = get_cached_string( path ); result->Content = result->Name; @@ -910,6 +930,53 @@ CodeParam def_param( CodeType type, StrC name, Code value ) return result; } +CodePragma def_pragma( StrC directive ) +{ + using namespace ECode; + + if ( directive.Len <= 0 || directive.Ptr == nullptr ) + { + log_failure( "gen::def_comment: Invalid comment provided:" ); + return CodeInvalid; + } + + CodePragma + result = (CodePragma) make_code(); + result->Type = Preprocess_Pragma; + result->Content = get_cached_string( directive ); + + return result; +} + +CodePreprocessCond def_preprocess_cond( EPreprocessCond type, StrC expr ) +{ + using namespace ECode; + + if ( expr.Len <= 0 || expr.Ptr == nullptr ) + { + log_failure( "gen::def_comment: Invalid comment provided:" ); + return CodeInvalid; + } + + CodePreprocessCond + result = (CodePreprocessCond) make_code(); + result->Content = get_cached_string( expr ); + + switch (type) + { + case EPreprocessCond::If: + result->Type = ECode::Preprocess_If; + case EPreprocessCond::IfDef: + result->Type = Preprocess_IfDef; + case EPreprocessCond::IfNotDef: + result->Type = Preprocess_IfNotDef; + case EPreprocessCond::ElIf: + result->Type = Preprocess_ElIf; + } + + return result; +} + CodeSpecifiers def_specifier( SpecifierT spec ) { CodeSpecifiers diff --git a/project/components/static_data.cpp b/project/components/static_data.cpp index 99cbdcd..77f8bd9 100644 --- a/project/components/static_data.cpp +++ b/project/components/static_data.cpp @@ -22,6 +22,44 @@ global AllocatorInfo Allocator_TypeTable = heap(); #pragma region Constants +global Code access_public; +global Code access_protected; +global Code access_private; + +global CodeAttributes attrib_api_export; +global CodeAttributes attrib_api_import; + +global Code module_global_fragment; +global Code module_private_fragment; + +global CodeParam param_varadic; + +global CodePragma pragma_once; + +global CodePreprocessCond preprocess_else; +global CodePreprocessCond preprocess_endif; + +global CodeSpecifiers spec_const; +global CodeSpecifiers spec_consteval; +global CodeSpecifiers spec_constexpr; +global CodeSpecifiers spec_constinit; +global CodeSpecifiers spec_extern_linkage; +global CodeSpecifiers spec_final; +global CodeSpecifiers spec_global; +global CodeSpecifiers spec_inline; +global CodeSpecifiers spec_internal_linkage; +global CodeSpecifiers spec_local_persist; +global CodeSpecifiers spec_mutable; +global CodeSpecifiers spec_override; +global CodeSpecifiers spec_ptr; +global CodeSpecifiers spec_ref; +global CodeSpecifiers spec_register; +global CodeSpecifiers spec_rvalue; +global CodeSpecifiers spec_static_member; +global CodeSpecifiers spec_thread_local; +global CodeSpecifiers spec_virtual; +global CodeSpecifiers spec_volatile; + global CodeType t_empty; global CodeType t_auto; global CodeType t_void; @@ -52,39 +90,4 @@ global CodeType t_f32; global CodeType t_f64; #endif -global CodeParam param_varadic; - -global CodeAttributes attrib_api_export; -global CodeAttributes attrib_api_import; - -global Code access_public; -global Code access_protected; -global Code access_private; - -global Code module_global_fragment; -global Code module_private_fragment; - -global Code pragma_once; - -global CodeSpecifiers spec_const; -global CodeSpecifiers spec_consteval; -global CodeSpecifiers spec_constexpr; -global CodeSpecifiers spec_constinit; -global CodeSpecifiers spec_extern_linkage; -global CodeSpecifiers spec_final; -global CodeSpecifiers spec_global; -global CodeSpecifiers spec_inline; -global CodeSpecifiers spec_internal_linkage; -global CodeSpecifiers spec_local_persist; -global CodeSpecifiers spec_mutable; -global CodeSpecifiers spec_override; -global CodeSpecifiers spec_ptr; -global CodeSpecifiers spec_ref; -global CodeSpecifiers spec_register; -global CodeSpecifiers spec_rvalue; -global CodeSpecifiers spec_static_member; -global CodeSpecifiers spec_thread_local; -global CodeSpecifiers spec_virtual; -global CodeSpecifiers spec_volatile; - #pragma endregion Constants diff --git a/project/components/types.hpp b/project/components/types.hpp index 0e9d6a3..99c4865 100644 --- a/project/components/types.hpp +++ b/project/components/types.hpp @@ -8,16 +8,6 @@ using LogFailType = sw(*)(char const*, ...); constexpr LogFailType log_failure = fatal; #endif -// Used to indicate if enum definitoin is an enum class or regular enum. -enum class EnumT : u8 -{ - Regular, - Class -}; - -constexpr EnumT EnumClass = EnumT::Class; -constexpr EnumT EnumRegular = EnumT::Regular; - enum class AccessSpec : u32 { Default, @@ -46,6 +36,17 @@ char const* to_str( AccessSpec type ) return lookup[ (u32)type ]; } +// Used to indicate if enum definitoin is an enum class or regular enum. +enum class EnumT : u8 +{ + Regular, + Class +}; + +constexpr EnumT EnumClass = EnumT::Class; +constexpr EnumT EnumRegular = EnumT::Regular; + + enum class ModuleFlag : u32 { None = 0, @@ -62,6 +63,19 @@ ModuleFlag operator|( ModuleFlag A, ModuleFlag B) return (ModuleFlag)( (u32)A | (u32)B ); } +enum class EPreprocessCond : u32 +{ + If, + IfDef, + IfNotDef, + ElIf +}; + +constexpr EPreprocessCond PreprocessCond_If = EPreprocessCond::If; +constexpr EPreprocessCond PreprocessCond_IfDef = EPreprocessCond::IfDef; +constexpr EPreprocessCond PreprocessCond_IfNotDef = EPreprocessCond::IfNotDef; +constexpr EPreprocessCond PreprocessCond_ElIf = EPreprocessCond::ElIf; + /* Predefined attributes Used for the parser constructors to identify non-standard attributes diff --git a/project/enums/ECode.csv b/project/enums/ECode.csv index 9bfabfd..bf6b82b 100644 --- a/project/enums/ECode.csv +++ b/project/enums/ECode.csv @@ -32,7 +32,15 @@ Operator_Member_Fwd Operator_Cast Operator_Cast_Fwd Parameters -Preprocessor_Include +Preprocess_Define +Preprocess_Include +Preprocess_If +Preprocess_IfDef +Preprocess_IfNotDef +Preprocess_ElIf +Preprocess_Else +Preprocess_EndIf +Preprocess_Pragma Specifiers Struct Struct_Fwd diff --git a/project/enums/ETokType.csv b/project/enums/ETokType.csv index e0ff1f5..9a172c2 100644 --- a/project/enums/ETokType.csv +++ b/project/enums/ETokType.csv @@ -38,11 +38,14 @@ Module_Export, "export" Number, "number" Operator, "operator" Preprocess_Define, "#define" -Preprocess_Include, "#include" Preprocess_If, "#if" -Preprocess_ElIF, "#elif" +Preprocess_IfDef, "#ifdef" +Preprocess_IfNotDef, "#ifndef" +Preprocess_ElIf, "#elif" Preprocess_Else, "#else" Preprocess_EndIf, "#endif" +Preprocess_Include, "#include" +Preprocess_Pragma, "#pragma" Spec_Alignas, "alignas" Spec_Const, "const" Spec_Consteval, "consteval" From 2b63fc27cd545ce8e98c159a917e77f31ae9740c Mon Sep 17 00:00:00 2001 From: Ed_ Date: Sun, 30 Jul 2023 18:55:57 -0400 Subject: [PATCH 2/9] Progress toward preprocessor parsing, lexing works, parsing does not. --- project/components/etoktype.cpp | 17 +- project/components/interface.cpp | 5 + project/components/interface.parsing.cpp | 216 +++++++++++++++++++---- project/enums/ETokType.csv | 35 ++-- scripts/.clang-format | 8 +- scripts/msvc/devshell.ps1 | 37 ++-- test/test.cpp | 5 +- test/test.singleheader_ast.cpp | 31 ++++ 8 files changed, 274 insertions(+), 80 deletions(-) create mode 100644 test/test.singleheader_ast.cpp diff --git a/project/components/etoktype.cpp b/project/components/etoktype.cpp index ac85155..dbb17b9 100644 --- a/project/components/etoktype.cpp +++ b/project/components/etoktype.cpp @@ -55,16 +55,17 @@ namespace Parser Entry( Module_Export, "export" ) \ Entry( Number, "number" ) \ Entry( Operator, "operator" ) \ - Entry( Preprocess_Define, "#define") \ + Entry( Preprocess_Define, "define") \ + Entry( Preprocess_If, "if") \ + Entry( Preprocess_IfDef, "ifdef") \ + Entry( Preprocess_IfNotDef, "ifndef") \ + Entry( Preprocess_ElIf, "elif") \ + Entry( Preprocess_Else, "else") \ + Entry( Preprocess_EndIf, "endif") \ Entry( Preprocess_Include, "include" ) \ - Entry( Preprocess_If, "#if") \ - Entry( Preprocess_IfDef, "#if") \ - Entry( Preprocess_IfNotDef, "#ifndef") \ - Entry( Preprocess_ElIf, "#elif") \ - Entry( Preprocess_Else, "#else") \ - Entry( Preprocess_EndIf, "#endif") \ - Entry( Preprocess_Pragma, "#pragma") \ + Entry( Preprocess_Pragma, "pragma") \ Entry( Preprocess_Content, "macro content") \ + Entry( Preprocess_Macro, "macro") \ Entry( Spec_Alignas, "alignas" ) \ Entry( Spec_Const, "const" ) \ Entry( Spec_Consteval, "consteval" ) \ diff --git a/project/components/interface.cpp b/project/components/interface.cpp index 8e948bd..cf2b2ec 100644 --- a/project/components/interface.cpp +++ b/project/components/interface.cpp @@ -1,3 +1,6 @@ +internal void init_parser(); +internal void deinit_parser(); + internal void* Global_Allocator_Proc( void* allocator_data, AllocType type, sw size, sw alignment, void* old_memory, sw old_size, u64 flags ) { @@ -266,6 +269,7 @@ void init() } define_constants(); + init_parser(); } void deinit() @@ -308,6 +312,7 @@ void deinit() while ( left--, left ); Global_AllocatorBuckets.free(); + deinit_parser(); } void reset() diff --git a/project/components/interface.parsing.cpp b/project/components/interface.parsing.cpp index 99ddfc9..b71acaf 100644 --- a/project/components/interface.parsing.cpp +++ b/project/components/interface.parsing.cpp @@ -188,6 +188,8 @@ namespace Parser IsAssign = bit(0), }; + global Array Tokens; + TokArray lex( StrC content ) { # define current ( * scanner ) @@ -213,9 +215,6 @@ namespace Parser move_forward(); \ } - local_persist thread_local - Array Tokens = { nullptr }; - s32 left = content.Len; char const* scanner = content.Ptr; @@ -232,17 +231,22 @@ namespace Parser return { { nullptr }, 0 }; } - if ( Tokens ) + local_persist char defines_map_mem[ kilobytes(64) ]; + local_persist Arena defines_map_arena; + HashTable defines; { - Tokens.free(); + defines_map_arena = Arena::init_from_memory( defines_map_mem, sizeof(defines_map_mem) ); + defines = HashTable::init( defines_map_arena ); } - Tokens = Array::init_reserve( LexArena, content.Len / 6 ); + Tokens.clear(); while (left ) { Token token = { nullptr, 0, TokType::Invalid, false, line, column }; + bool is_define = false; + SkipWhitespace(); if ( left <= 0 ) break; @@ -251,11 +255,11 @@ namespace Parser { case '#': { - token.Text = scanner; - token.Length = 1; move_forward(); + SkipWhitespace(); - while (left && current != ' ' ) + token.Text = scanner; + while (left && ! char_is_space(current) ) { move_forward(); token.Length++; @@ -264,6 +268,34 @@ namespace Parser token.Type = ETokType::to_type( token ); Tokens.append( token ); + if ( token.Type == TokType::Preprocess_Define ) + { + SkipWhitespace(); + Token name = { scanner, 0, TokType::Identifier, false, line, column }; + + name.Text = scanner; + name.Length = 1; + move_forward(); + + while ( left && ( char_is_alphanumeric(current) || current == '_' ) ) + { + move_forward(); + name.Length++; + } + + Tokens.append( name ); + + s32 key = crc32( name.Text, name.Length ); + defines.set( key, name ); + } + + if ( token.Type == TokType::Preprocess_Else || token.Type == TokType::Preprocess_EndIf ) + { + SkipWhitespace(); + Tokens.append( token ); + continue; + } + Token content = { scanner, 0, TokType::Preprocess_Content, false, line, column }; if ( token.Type == TokType::Preprocess_Include ) @@ -271,18 +303,20 @@ namespace Parser content.Type = TokType::String; SkipWhitespace(); - if ( current != '"' ) + if ( current != '"' && current != '<' ) { - log_failure( "gen::Parser::lex: Expected '\"' after #include, not '%c' (%d, %d)\n%s" + String directive_str = String::fmt_buf( GlobalAllocator, "%.*s", min( 80, left + content.Length ), token.Text ); + + log_failure( "gen::Parser::lex: Expected '\"' or '<' after #include, not '%c' (%d, %d)\n%s" , current - , token.Line - , token.Column - , Context.to_string() + , content.Line + , content.Column + , directive_str.Data ); return { { nullptr }, 0 }; } - while ( left && current != '"' ) + while ( left && current != '"' && current != '>' ) { move_forward(); content.Length++; @@ -295,13 +329,27 @@ namespace Parser continue; // Skip found token, its all handled here. } + s32 within_string = false; + s32 within_char = false; while ( left ) { - if ( current == '\\' ) + if ( current == '"' ) + within_string ^= true; + + if ( current == '\'' ) + within_char ^= true; + + if ( current == '\\' && ! within_string && ! within_char ) { move_forward(); content.Length++; + if ( current == '\r' ) + { + move_forward(); + content.Length++; + } + if ( current == '\n' ) { move_forward(); @@ -310,14 +358,14 @@ namespace Parser } else { - String directive_str = String::fmt_buf( GlobalAllocator, "%s", token.Text, token.Length ); - String content_str = String::fmt_buf( GlobalAllocator, "%s", content, min( 40, left + content.Length ) ); + String directive_str = String::make_length( GlobalAllocator, token.Text, token.Length ); + String content_str = String::fmt_buf( GlobalAllocator, "%.*s", min( 400, left + content.Length ), content.Text ); log_failure( "gen::Parser::lex: Invalid escape sequence '\\%c' (%d, %d)" - " in preprocessor directive '%.*s' (%d, %d)\n" - "will continue parsing, but compiliation will fail (if using non-fatal failures).\n" - , content_str, line, column - , directive_str, token.Line, token.Column ); + " in preprocessor directive '%s' (%d, %d)\n%s" + , current, line, column + , directive_str, content.Line, content.Column + , content_str ); break; } } @@ -358,7 +406,7 @@ namespace Parser { String context_str = String::fmt_buf( GlobalAllocator, "%s", scanner, min( 100, left ) ); - log_failure( "gen::lex: invalid varadic argument, expected '...' got '..%c' (%d, %d)", context_str, line, column ); + log_failure( "gen::lex: invalid varadic argument, expected '...' got '..%c' (%d, %d)\n%s", current, line, column, context_str ); } } @@ -468,6 +516,18 @@ namespace Parser move_forward(); + if ( left && current == '\\' ) + { + move_forward(); + token.Length++; + + if ( current == '\'' ) + { + move_forward(); + token.Length++; + } + } + while ( left && current != '\'' ) { move_forward(); @@ -540,6 +600,17 @@ namespace Parser } goto FoundToken; + case '?': + token.Text = scanner; + token.Length = 1; + token.Type = TokType::Operator; + token.IsAssign = false; + + if (left) + move_forward(); + + goto FoundToken; + // All other operators we just label as an operator and move forward. case '=': token.Text = scanner; @@ -550,6 +621,15 @@ namespace Parser if (left) move_forward(); + if ( current == '=' ) + { + token.Length++; + token.IsAssign = false; + + if (left) + move_forward(); + } + goto FoundToken; case '+': @@ -653,10 +733,17 @@ namespace Parser token.Text = scanner; token.Length = 0; - while ( left && ( current != '*' && *(scanner + 1) != '/' ) ) + bool star = current == '*'; + bool slash = scanner[1] == '/'; + bool at_end = star && slash; + while ( left && ! at_end ) { move_forward(); token.Length++; + + star = current == '*'; + slash = scanner[1] == '/'; + at_end = star && slash; } move_forward(); move_forward(); @@ -728,9 +815,9 @@ namespace Parser } else { - String context_str = String::fmt_buf( GlobalAllocator, "%s", scanner, min( 100, left ) ); + String context_str = String::fmt_buf( GlobalAllocator, "%.*s", min( 100, left ), scanner ); - log_failure( "Failed to lex token %s (%d, %d)", context_str, line, column ); + log_failure( "Failed to lex token '%c' (%d, %d)\n%s", current, line, column, context_str ); // Skip to next whitespace since we can't know if anything else is valid until then. while ( left && ! char_is_space( current ) ) @@ -749,10 +836,47 @@ namespace Parser TokType type = ETokType::to_type( token ); - if ( type == TokType::Invalid) - type = TokType::Identifier; + if ( type != TokType::Invalid ) + { + token.Type = type; + Tokens.append( token ); + continue; + } + + u32 key = crc32( token.Text, token.Length ); + StrC* define = defines.get( key ); + if ( define ) + { + token.Type = TokType::Preprocess_Macro; + + // Want to ignore any arguments the define may have as they can be execution expressions. + if ( left && current == '(' ) + { + move_forward(); + token.Length++; + + s32 level = 0; + while ( left && (current != ')' || level > 0) ) + { + if ( current == '(' ) + level++; + + else if ( current == ')' && level > 0 ) + level--; + + move_forward(); + token.Length++; + } + + move_forward(); + token.Length++; + } + } + else + { + token.Type = TokType::Identifier; + } - token.Type = type; Tokens.append( token ); } @@ -762,6 +886,8 @@ namespace Parser return { { nullptr }, 0 }; } + defines.clear(); + defines_map_arena.free(); return { Tokens, 0 }; # undef current # undef move_forward @@ -769,6 +895,22 @@ namespace Parser } } +internal +void init_parser() +{ + using namespace Parser; + + Tokens = Array::init_reserve( LexArena + , ( LexAllocator_Size - sizeof( Array::Header ) ) / sizeof(Token) + ); +} + +internal +void deinit_parser() +{ + Parser::Tokens = { nullptr }; +} + #pragma region Helper Macros # define check_parse_args( def ) \ @@ -912,10 +1054,11 @@ CodePragma parse_pragma() CodePragma pragma = (CodePragma) make_code(); pragma->Type = ECode::Preprocess_Pragma; + eat( TokType::Preprocess_Pragma ); if ( ! check( TokType::Preprocess_Content )) { - log_failure( "Error, expected content after #define\n%s", Context.to_string() ); + log_failure( "Error, expected content after #pragma\n%s", Context.to_string() ); return CodeInvalid; } @@ -1858,6 +2001,11 @@ CodeBody parse_class_struct_body( Parser::TokType which ) member = parse_preprocess_cond(); break; + case TokType::Preprocess_Macro: + member = untyped_str( currtok ); + eat( TokType::Preprocess_Macro ); + break; + case TokType::Preprocess_Pragma: member = parse_pragma(); break; @@ -2174,16 +2322,23 @@ CodeBody parse_global_nspace( CodeT which ) member = parse_preprocess_cond(); break; + case TokType::Preprocess_Macro: + member = untyped_str( currtok ); + eat( TokType::Preprocess_Macro ); + break; + case TokType::Preprocess_Pragma: member = parse_pragma(); break; case TokType::Preprocess_Else: member = preprocess_else; + eat( TokType::Preprocess_Else ); break; case TokType::Preprocess_EndIf: member = preprocess_endif; + eat( TokType::Preprocess_EndIf ); break; case TokType::Module_Export: @@ -2273,6 +2428,7 @@ CodeBody parse_global_nspace( CodeT which ) return CodeInvalid; } + // log_fmt("Global Body Member: %s", member->debug_str()); result.append( member ); } diff --git a/project/enums/ETokType.csv b/project/enums/ETokType.csv index 9a172c2..f4e624f 100644 --- a/project/enums/ETokType.csv +++ b/project/enums/ETokType.csv @@ -7,8 +7,8 @@ Access_StaticSymbol, "::" Ampersand, "&" Ampersand_DBL, "&&" Assign_Classifer, ":" -Attribute_Open, "[[" -Attribute_Close, "]]" +Attribute_Open, "[[" +Attribute_Close, "]]" BraceCurly_Open, "{" BraceCurly_Close, "}" BraceSquare_Open, "[" @@ -38,23 +38,24 @@ Module_Export, "export" Number, "number" Operator, "operator" Preprocess_Define, "#define" -Preprocess_If, "#if" -Preprocess_IfDef, "#ifdef" -Preprocess_IfNotDef, "#ifndef" -Preprocess_ElIf, "#elif" -Preprocess_Else, "#else" -Preprocess_EndIf, "#endif" -Preprocess_Include, "#include" -Preprocess_Pragma, "#pragma" +Preprocess_If, "if" +Preprocess_IfDef, "ifdef" +Preprocess_IfNotDef, "ifndef" +Preprocess_ElIf, "elif" +Preprocess_Else, "else" +Preprocess_EndIf, "endif" +Preprocess_Include, "include" +Preprocess_Pragma, "pragma" +Preprocess_Macro, "macro" Spec_Alignas, "alignas" Spec_Const, "const" Spec_Consteval, "consteval" Spec_Constexpr, "constexpr" Spec_Constinit, "constinit" -Spec_Explicit, "explicit" +Spec_Explicit, "explicit" Spec_Extern, "extern" -Spec_Final, "final" -Spec_Global, "global" +Spec_Final, "final" +Spec_Global, "global" Spec_Inline, "inline" Spec_Internal_Linkage, "internal" Spec_LocalPersist, "local_persist" @@ -66,12 +67,12 @@ Spec_Volatile, "volatile" Star, "*" Statement_End, ";" String, "string" -Type_Unsigned, "unsigned" +Type_Unsigned, "unsigned" Type_Signed, "signed" Type_Short, "short" Type_Long, "long" -Type_char, "char" -Type_int, "int" -Type_double, "double" +Type_char, "char" +Type_int, "int" +Type_double, "double" Varadic_Argument, "..." Attributes_Start, "__attrib_start__" diff --git a/scripts/.clang-format b/scripts/.clang-format index 26f9cc8..9f67e41 100644 --- a/scripts/.clang-format +++ b/scripts/.clang-format @@ -36,8 +36,8 @@ AlwaysBreakAfterReturnType: None AlwaysBreakBeforeMultilineStrings: true AlwaysBreakTemplateDeclarations: Yes -BinPackArguments: false -BinPackParameters: false +BinPackArguments: true +BinPackParameters: true BitFieldColonSpacing: Both @@ -73,7 +73,7 @@ BreakBeforeTernaryOperators: true BreakConstructorInitializers: BeforeComma BreakStringLiterals: true -ColumnLimit: 180 +ColumnLimit: 120 CompactNamespaces: true @@ -103,7 +103,7 @@ IndentWidth: 4 IndentWrappedFunctionNames: false # InsertNewlineAtEOF: true -InsertTrailingCommas: Wrapped +# InsertTrailingCommas: Wrapped LambdaBodyIndentation: OuterScope diff --git a/scripts/msvc/devshell.ps1 b/scripts/msvc/devshell.ps1 index 459a164..531bdfa 100644 --- a/scripts/msvc/devshell.ps1 +++ b/scripts/msvc/devshell.ps1 @@ -1,26 +1,23 @@ -# This script is used to iniitate the MSVC DevShell -$vs_devshell = @() -@("enterprise", "professional", "community") | ForEach-Object { - $vs_devshell_2022 = "C:\Program Files\Microsoft Visual Studio\2022\" + $_ + "\Common7\Tools\Launch-VsDevShell.ps1" - $vs_devshell_2019 = "C:\Program Files (x86)\Microsoft Visual Studio\2019\" + $_ + "\Common7\Tools\Launch-VsDevShell.ps1" +$ErrorActionPreference = "Stop" - $vs_devshell += @( $vs_devshell_2022, $vs_devshell_2019 ) +# Use vswhere to find the latest Visual Studio installation +$vswhere_out = & "C:\Program Files (x86)\Microsoft Visual Studio\Installer\vswhere.exe" -latest -property installationPath +if ($null -eq $vswhere_out) { + Write-Host "ERROR: Visual Studio installation not found" + exit 1 } -$found = $false -foreach($path in $vs_devshell) { - if (Test-Path $path) { - write-host "Found $path" +# Find Launch-VsDevShell.ps1 in the Visual Studio installation +$vs_path = $vswhere_out +$vs_devshell = Join-Path $vs_path "\Common7\Tools\Launch-VsDevShell.ps1" - Push-Location # Save the current path, loading the script will change it. - & $path - Pop-Location - - $found = $true - break; - } +if ( -not (Test-Path $vs_devshell) ) { + Write-Host "ERROR: Launch-VsDevShell.ps1 not found in Visual Studio installation" + Write-Host Tested path: $vs_devshell + exit 1 } -if (-not $found) { - write-host "MSVC DevShell: No valid path found" -} +# Launch the Visual Studio Developer Shell +Push-Location +& $vs_devshell @args +Pop-Location diff --git a/test/test.cpp b/test/test.cpp index a91dbfa..50aceba 100644 --- a/test/test.cpp +++ b/test/test.cpp @@ -5,6 +5,7 @@ #include "gen.cpp" #include "sanity.cpp" #include "SOA.cpp" +#include "test.singleheader_ast.cpp" #if GEN_TIME int gen_main() @@ -12,10 +13,12 @@ int gen_main() using namespace gen; log_fmt("\ngen_time:"); - check_sanity(); + // check_sanity(); check_SOA(); + check_singleheader_ast(); + return 0; } #endif diff --git a/test/test.singleheader_ast.cpp b/test/test.singleheader_ast.cpp new file mode 100644 index 0000000..cf26097 --- /dev/null +++ b/test/test.singleheader_ast.cpp @@ -0,0 +1,31 @@ +#pragma once + +#define GEN_DEFINE_LIBRARY_CODE_CONSTANTS +#define GEN_ENFORCE_STRONG_CODE_TYPES +#define GEN_EXPOSE_BACKEND +#define GEN_BENCHMARK +#include "gen.hpp" +#include "file_processors/scanner.hpp" +using namespace gen; + +void check_singleheader_ast() +{ + #define project_dir "../../" + gen::init(); + log_fmt("\ncheck_singleheader_ast:\n"); + + FileContents file = file_read_contents( GlobalAllocator, true, project_dir "singleheader/gen/gen.hpp" ); + + CodeBody ast = parse_global_body( { file.size, (char const*)file.data } ); + + log_fmt("generated AST!!!"); + + Builder builder; + builder.open( "singleheader_copy.hpp" ); + log_fmt("serializng ast"); + builder.print( ast ); + builder.write(); + + log_fmt("passed!!\n"); + gen::deinit(); +} From 21a8f3bb39a98ce2ab88f71910710f1eaa2348c6 Mon Sep 17 00:00:00 2001 From: Ed_ Date: Tue, 1 Aug 2023 00:42:08 -0400 Subject: [PATCH 3/9] WIP: It can parse to around ~2k lines. Need to improve its ability to detect when a forward declare of a class/enum/struct/union.. This language truly is a mess. --- project/components/ast.cpp | 63 ++- project/components/data_structures.hpp | 6 + project/components/etoktype.cpp | 27 +- project/components/interface.parsing.cpp | 499 +++++++++++++++++++---- project/enums/ETokType.csv | 27 +- 5 files changed, 500 insertions(+), 122 deletions(-) diff --git a/project/components/ast.cpp b/project/components/ast.cpp index c90d58f..c4ea141 100644 --- a/project/components/ast.cpp +++ b/project/components/ast.cpp @@ -107,13 +107,16 @@ String AST::to_string() } else { - result.append_fmt( "%s \n{\n%s\n};", Name, Body->to_string() ); + result.append_fmt( "%s \n{\n%s\n}", Name, Body->to_string() ); } } else { - result.append_fmt( "class %s\n{\n%s\n};", Name, Body->to_string() ); + result.append_fmt( "class %s\n{\n%s\n}", Name, Body->to_string() ); } + + if ( Parent && Parent->Type != ECode::Typedef ) + result.append(";"); } break; @@ -123,9 +126,12 @@ String AST::to_string() result.append( "export " ); if ( Attributes ) - result.append_fmt( "class %s %s;", Attributes->to_string(), Name ); + result.append_fmt( "class %s %s", Attributes->to_string(), Name ); - else result.append_fmt( "class %s;", Name ); + else result.append_fmt( "class %s", Name ); + + if ( Parent && Parent->Type != ECode::Typedef ) + result.append(";"); } break; @@ -142,21 +148,24 @@ String AST::to_string() result.append_fmt( "%s ", Attributes->to_string() ); if ( UnderlyingType ) - result.append_fmt( "%s : %s\n{\n%s\n};" + result.append_fmt( "%s : %s\n{\n%s\n}" , Name , UnderlyingType->to_string() , Body->to_string() ); - else result.append_fmt( "%s\n{\n%s\n};" + else result.append_fmt( "%s\n{\n%s\n}" , Name , Body->to_string() ); } - else result.append_fmt( "enum %s\n{\n%s\n};" + else result.append_fmt( "enum %s\n{\n%s\n}" , Name , Body->to_string() ); + + if ( Parent && Parent->Type != ECode::Typedef ) + result.append(";"); } break; @@ -168,7 +177,10 @@ String AST::to_string() if ( Attributes ) result.append_fmt( "%s ", Attributes->to_string() ); - result.append_fmt( "enum %s : %s;", Name, UnderlyingType->to_string() ); + result.append_fmt( "enum %s : %s", Name, UnderlyingType->to_string() ); + + if ( Parent && Parent->Type != ECode::Typedef ) + result.append(";"); } break; @@ -188,7 +200,7 @@ String AST::to_string() if ( UnderlyingType ) { - result.append_fmt( "%s : %s\n{\n%s\n};" + result.append_fmt( "%s : %s\n{\n%s\n}" , Name , UnderlyingType->to_string() , Body->to_string() @@ -196,7 +208,7 @@ String AST::to_string() } else { - result.append_fmt( "%s\n{\n%s\n};" + result.append_fmt( "%s\n{\n%s\n}" , Name , Body->to_string() ); @@ -204,10 +216,13 @@ String AST::to_string() } else { - result.append_fmt( "enum class %s\n{\n%s\n};" + result.append_fmt( "enum class %s\n{\n%s\n}" , Body->to_string() ); } + + if ( Parent && Parent->Type != ECode::Typedef ) + result.append(";"); } break; @@ -221,7 +236,10 @@ String AST::to_string() if ( Attributes ) result.append_fmt( "%s ", Attributes->to_string() ); - result.append_fmt( "%s : %s;", Name, UnderlyingType->to_string() ); + result.append_fmt( "%s : %s", Name, UnderlyingType->to_string() ); + + if ( Parent && Parent->Type != ECode::Typedef ) + result.append(";"); } break; @@ -587,13 +605,16 @@ String AST::to_string() { if ( Name ) - result.append_fmt( "%s \n{\n%s\n};", Name, Body->to_string() ); + result.append_fmt( "%s \n{\n%s\n}", Name, Body->to_string() ); } } else { - result.append_fmt( "struct %s\n{\n%s\n};", Name, Body->to_string() ); + result.append_fmt( "struct %s\n{\n%s\n}", Name, Body->to_string() ); } + + if ( Parent && Parent->Type != ECode::Typedef ) + result.append(";"); } break; @@ -603,9 +624,12 @@ String AST::to_string() result.append( "export " ); if ( Attributes ) - result.append_fmt( "struct %s %s;", Attributes->to_string(), Name ); + result.append_fmt( "struct %s %s", Attributes->to_string(), Name ); - else result.append_fmt( "struct %s;", Name ); + else result.append_fmt( "struct %s", Name ); + + if ( Parent && Parent->Type != ECode::Typedef ) + result.append(";"); } break; @@ -670,7 +694,7 @@ String AST::to_string() if ( Name ) { - result.append_fmt( "%s\n{\n%s\n};" + result.append_fmt( "%s\n{\n%s\n}" , Name , Body->to_string() ); @@ -678,10 +702,13 @@ String AST::to_string() else { // Anonymous union - result.append_fmt( "\n{\n%s\n};" + result.append_fmt( "\n{\n%s\n}" , Body->to_string() ); } + + if ( Parent && Parent->Type != ECode::Typedef ) + result.append(";"); } break; diff --git a/project/components/data_structures.hpp b/project/components/data_structures.hpp index 4270583..6fdfcc3 100644 --- a/project/components/data_structures.hpp +++ b/project/components/data_structures.hpp @@ -489,6 +489,12 @@ struct CodeSpecifiers bool append( SpecifierT spec ) { + if ( ast == nullptr ) + { + log_failure("CodeSpecifiers: Attempted to append to a null specifiers AST!"); + return false; + } + if ( raw()->NumEntries == AST::ArrSpecs_Cap ) { log_failure("CodeSpecifiers: Attempted to append over %d specifiers to a specifiers AST!", AST::ArrSpecs_Cap ); diff --git a/project/components/etoktype.cpp b/project/components/etoktype.cpp index dbb17b9..fd3952b 100644 --- a/project/components/etoktype.cpp +++ b/project/components/etoktype.cpp @@ -6,7 +6,7 @@ namespace Parser For the sake of scanning files, it can scan preprocessor directives - Attributes_Start is only used to indicate the start of the user_defined attribute list. + __Attributes_Start is only used to indicate the start of the user_defined attribute list. */ #ifndef GEN_DEFINE_ATTRIBUTE_TOKENS @@ -33,8 +33,8 @@ namespace Parser Entry( BraceSquare_Close, "]" ) \ Entry( Capture_Start, "(" ) \ Entry( Capture_End, ")" ) \ - Entry( Comment, "comment" ) \ - Entry( Char, "character" ) \ + Entry( Comment, "__comment__" ) \ + Entry( Char, "__character__" ) \ Entry( Comma, "," ) \ Entry( Decl_Class, "class" ) \ Entry( Decl_GNU_Attribute, "__attribute__" ) \ @@ -50,11 +50,11 @@ namespace Parser Entry( Decl_Typedef, "typedef" ) \ Entry( Decl_Using, "using" ) \ Entry( Decl_Union, "union" ) \ - Entry( Identifier, "identifier" ) \ + Entry( Identifier, "__identifier__" ) \ Entry( Module_Import, "import" ) \ Entry( Module_Export, "export" ) \ - Entry( Number, "number" ) \ - Entry( Operator, "operator" ) \ + Entry( Number, "__number__" ) \ + Entry( Operator, "__operator__" ) \ Entry( Preprocess_Define, "define") \ Entry( Preprocess_If, "if") \ Entry( Preprocess_IfDef, "ifdef") \ @@ -64,8 +64,9 @@ namespace Parser Entry( Preprocess_EndIf, "endif") \ Entry( Preprocess_Include, "include" ) \ Entry( Preprocess_Pragma, "pragma") \ - Entry( Preprocess_Content, "macro content") \ - Entry( Preprocess_Macro, "macro") \ + Entry( Preprocess_Content, "__macro_content__") \ + Entry( Preprocess_Macro, "__macro__") \ + Entry( Preprocess_Unsupported, "__unsupported__" ) \ Entry( Spec_Alignas, "alignas" ) \ Entry( Spec_Const, "const" ) \ Entry( Spec_Consteval, "consteval" ) \ @@ -85,7 +86,8 @@ namespace Parser Entry( Spec_Volatile, "volatile") \ Entry( Star, "*" ) \ Entry( Statement_End, ";" ) \ - Entry( String, "string" ) \ + Entry( StaticAssert, "static_assert" ) \ + Entry( String, "__string__" ) \ Entry( Type_Unsigned, "unsigned" ) \ Entry( Type_Signed, "signed" ) \ Entry( Type_Short, "short" ) \ @@ -93,8 +95,13 @@ namespace Parser Entry( Type_char, "char" ) \ Entry( Type_int, "int" ) \ Entry( Type_double, "double" ) \ + Entry( Type_MS_int8, "__int8" ) \ + Entry( Type_MS_int16, "__int16" ) \ + Entry( Type_MS_int32, "__int32" ) \ + Entry( Type_MS_int64, "__int64" ) \ + Entry( Type_MS_W64, "_W64" ) \ Entry( Varadic_Argument, "..." ) \ - Entry( Attributes_Start, "__attrib_start__" ) + Entry( __Attributes_Start, "__attrib_start__" ) namespace ETokType { diff --git a/project/components/interface.parsing.cpp b/project/components/interface.parsing.cpp index b71acaf..b1bf188 100644 --- a/project/components/interface.parsing.cpp +++ b/project/components/interface.parsing.cpp @@ -31,7 +31,7 @@ namespace Parser bool is_attribute() { - return Type > TokType::Attributes_Start; + return Type > TokType::__Attributes_Start; } bool is_preprocessor() @@ -124,10 +124,10 @@ namespace Parser } String line = String::make( GlobalAllocator, { length, scope_start.Text } ); - result.append_fmt("\tScope: %s\n", line ); + result.append_fmt("\tScope : %s\n", line ); line.free(); - sptr dist = (sptr)last_valid.Text - (sptr)scope_start.Text; + sptr dist = (sptr)last_valid.Text - (sptr)scope_start.Text + 2; sptr length_from_err = dist; String line_from_err = String::make( GlobalAllocator, { length_from_err, last_valid.Text } ); @@ -168,7 +168,7 @@ namespace Parser { String token_str = String::make( GlobalAllocator, { Arr[Idx].Length, Arr[Idx].Text } ); - log_failure( "Parse Error, TokArray::eat, Expected: %s, not '%s' (%d, %d)`\n%s" + log_failure( "Parse Error, TokArray::eat, Expected: ' %s ' not ' %s ' (%d, %d)`\n%s" , ETokType::to_str(type) , token_str , current().Line @@ -221,8 +221,8 @@ namespace Parser char const* word = scanner; s32 word_length = 0; - s32 line = 0; - s32 column = 0; + s32 line = 1; + s32 column = 1; SkipWhitespace(); if ( left <= 0 ) @@ -236,7 +236,7 @@ namespace Parser HashTable defines; { defines_map_arena = Arena::init_from_memory( defines_map_mem, sizeof(defines_map_mem) ); - defines = HashTable::init( defines_map_arena ); + defines = HashTable::init( defines_map_arena ); } Tokens.clear(); @@ -266,11 +266,72 @@ namespace Parser } token.Type = ETokType::to_type( token ); + + if ( ! token.is_preprocessor() ) + { + token.Type = TokType::Preprocess_Unsupported; + + // Its an unsupported directive, skip it + s32 within_string = false; + s32 within_char = false; + while ( left ) + { + if ( current == '"' ) + within_string ^= true; + + if ( current == '\'' ) + within_char ^= true; + + if ( current == '\\' && ! within_string && ! within_char ) + { + move_forward(); + token.Length++; + + if ( current == '\r' ) + { + move_forward(); + token.Length++; + } + + if ( current == '\n' ) + { + move_forward(); + token.Length++; + continue; + } + else + { + String directive_str = String::make_length( GlobalAllocator, token.Text, token.Length ); + + log_failure( "gen::Parser::lex: Invalid escape sequence '\\%c' (%d, %d)" + " in preprocessor directive (%d, %d)\n%s" + , current, line, column + , token.Line, token.Column, directive_str ); + break; + } + } + + if ( current == '\n' ) + { + move_forward(); + token.Length++; + break; + } + + move_forward(); + token.Length++; + } + + Tokens.append( token ); + continue; // Skip found token, its all handled here. + } + Tokens.append( token ); + SkipWhitespace(); + if ( token.Type == TokType::Preprocess_Define ) { - SkipWhitespace(); Token name = { scanner, 0, TokType::Identifier, false, line, column }; name.Text = scanner; @@ -285,13 +346,12 @@ namespace Parser Tokens.append( name ); - s32 key = crc32( name.Text, name.Length ); + u64 key = crc32( name.Text, name.Length ); defines.set( key, name ); } if ( token.Type == TokType::Preprocess_Else || token.Type == TokType::Preprocess_EndIf ) { - SkipWhitespace(); Tokens.append( token ); continue; } @@ -302,7 +362,6 @@ namespace Parser { content.Type = TokType::String; - SkipWhitespace(); if ( current != '"' && current != '<' ) { String directive_str = String::fmt_buf( GlobalAllocator, "%.*s", min( 80, left + content.Length ), token.Text ); @@ -836,6 +895,18 @@ namespace Parser TokType type = ETokType::to_type( token ); + if ( type == ETokType::Decl_Extern_Linkage ) + { + SkipWhitespace(); + + if ( current != '"' ) + type = ETokType::Spec_Extern; + + token.Type = type; + Tokens.append( token ); + continue; + } + if ( type != TokType::Invalid ) { token.Type = type; @@ -843,7 +914,7 @@ namespace Parser continue; } - u32 key = crc32( token.Text, token.Length ); + u64 key = crc32( token.Text, token.Length ); StrC* define = defines.get( key ); if ( define ) { @@ -917,11 +988,13 @@ void deinit_parser() if ( def.Len <= 0 ) \ { \ log_failure( "gen::" stringize(__func__) ": length must greater than 0" ); \ + Parser::Context.pop(); \ return CodeInvalid; \ } \ if ( def.Ptr == nullptr ) \ { \ log_failure( "gen::" stringize(__func__) ": def was null" ); \ + Parser::Context.pop(); \ return CodeInvalid; \ } @@ -942,8 +1015,8 @@ if ( def.Ptr == nullptr ) \ internal Code parse_function_body(); internal Code parse_global_nspace(); -internal CodeClass parse_class (); -internal CodeEnum parse_enum (); +internal CodeClass parse_class ( bool from_typedef = false ); +internal CodeEnum parse_enum ( bool from_typedef = false ); internal CodeBody parse_export_body (); internal CodeBody parse_extern_link_body(); internal CodeExtern parse_exten_link (); @@ -951,12 +1024,12 @@ internal CodeFriend parse_friend (); internal CodeFn parse_function (); internal CodeNamespace parse_namespace (); internal CodeOpCast parse_operator_cast (); -internal CodeStruct parse_struct (); +internal CodeStruct parse_struct ( bool from_typedef = false ); internal CodeVar parse_variable (); internal CodeTemplate parse_template (); internal CodeType parse_type (); internal CodeTypedef parse_typedef (); -internal CodeUnion parse_union (); +internal CodeUnion parse_union ( bool from_typedef = false ); internal CodeUsing parse_using (); internal inline @@ -974,6 +1047,7 @@ CodeDefine parse_define() if ( ! check( TokType::Identifier ) ) { log_failure( "Error, expected identifier after #define\n%s", Context.to_string() ); + Context.pop(); return CodeInvalid; } @@ -983,6 +1057,7 @@ CodeDefine parse_define() if ( ! check( TokType::Preprocess_Content )) { log_failure( "Error, expected content after #define %s\n%s", define->Name, Context.to_string() ); + Context.pop(); return CodeInvalid; } @@ -1002,6 +1077,7 @@ CodePreprocessCond parse_preprocess_cond() if ( ! currtok.is_preprocess_cond() ) { log_failure( "Error, expected preprocess conditional\n%s", Context.to_string() ); + Context.pop(); return CodeInvalid; } @@ -1013,6 +1089,7 @@ CodePreprocessCond parse_preprocess_cond() if ( ! check( TokType::Preprocess_Content )) { log_failure( "Error, expected content after #define\n%s", Context.to_string() ); + Context.pop(); return CodeInvalid; } @@ -1032,10 +1109,12 @@ CodeInclude parse_include() CodeInclude include = (CodeInclude) make_code(); include->Type = ECode::Preprocess_Include; + eat( TokType::Preprocess_Include ); if ( ! check( TokType::String )) { log_failure( "Error, expected include string after #include\n%s", Context.to_string() ); + Context.pop(); return CodeInvalid; } include->Content = get_cached_string( currtok ); @@ -1059,6 +1138,7 @@ CodePragma parse_pragma() if ( ! check( TokType::Preprocess_Content )) { log_failure( "Error, expected content after #pragma\n%s", Context.to_string() ); + Context.pop(); return CodeInvalid; } @@ -1069,6 +1149,36 @@ CodePragma parse_pragma() return pragma; } +internal inline +Code parse_static_assert() +{ + using namespace Parser; + push_scope(); + + Code + assert = make_code(); + assert->Type = ECode::Untyped; + eat( TokType::StaticAssert ); + + eat( TokType::Capture_Start ); + + s32 level = 0; + while ( left && ( currtok.Type != TokType::Capture_End || level > 0 ) ) + { + if ( currtok.Type == TokType::Capture_Start ) + level++; + else if ( currtok.Type == TokType::Capture_End ) + level--; + + eat( currtok.Type ); + } + eat( TokType::Capture_End ); + eat( TokType::Statement_End ); + + Context.pop(); + return assert; +} + internal inline Code parse_array_decl() { @@ -1082,13 +1192,15 @@ Code parse_array_decl() if ( left == 0 ) { log_failure( "Error, unexpected end of array declaration ( '[]' scope started )\n%s", Context.to_string() ); - return Code::Invalid; + Context.pop(); + return CodeInvalid; } if ( currtok.Type == TokType::BraceSquare_Close ) { log_failure( "Error, empty array expression in typedef definition\n%s", Context.to_string() ); - return Code::Invalid; + Context.pop(); + return CodeInvalid; } Token untyped_tok = currtok; @@ -1105,16 +1217,19 @@ Code parse_array_decl() if ( left == 0 ) { log_failure( "Error, unexpected end of array declaration, expected ]\n%s", Context.to_string() ); - return Code::Invalid; + Context.pop(); + return CodeInvalid; } if ( currtok.Type != TokType::BraceSquare_Close ) { log_failure( "%s: Error, expected ] in array declaration, not %s\n%s", ETokType::to_str( currtok.Type ), Context.to_string() ); - return Code::Invalid; + Context.pop(); + return CodeInvalid; } eat( TokType::BraceSquare_Close ); + Context.pop(); return array_expr; } @@ -1185,6 +1300,7 @@ CodeAttributes parse_attributes() if ( len > 0 ) { StrC attribute_txt = { len, start.Text }; + Context.pop(); return def_attributes( attribute_txt ); } @@ -1209,12 +1325,14 @@ Parser::Token parse_identifier() if ( left == 0 ) { log_failure( "Error, unexpected end of static symbol identifier\n%s", Context.to_string() ); + Context.pop(); return { nullptr, 0, TokType::Invalid }; } if ( currtok.Type != TokType::Identifier ) { log_failure( "Error, expected static symbol identifier, not %s\n%s", ETokType::to_str( currtok.Type ), Context.to_string() ); + Context.pop(); return { nullptr, 0, TokType::Invalid }; } @@ -1241,6 +1359,7 @@ Parser::Token parse_identifier() if ( left == 0 ) { log_failure( "Error, unexpected end of template arguments\n%s", Context.to_string() ); + Context.pop(); return { nullptr, 0, TokType::Invalid }; } @@ -1269,6 +1388,7 @@ Parser::Token parse_identifier() if ( left == 0 ) { log_failure( "Error, unexpected end of template arguments\n%s", Context.to_string() ); + Context.pop(); return { nullptr, 0, TokType::Invalid }; } @@ -1300,6 +1420,7 @@ CodeParam parse_params( bool use_template_capture = false ) if ( ! use_template_capture && check(TokType::Capture_End) ) { eat( TokType::Capture_End ); + Context.pop(); return { nullptr }; } @@ -1310,12 +1431,16 @@ CodeParam parse_params( bool use_template_capture = false ) { eat( TokType::Varadic_Argument ); + Context.pop(); return param_varadic; } type = parse_type(); if ( type == Code::Invalid ) + { + Context.pop(); return CodeInvalid; + } Token name = { nullptr, 0, TokType::Invalid, false }; @@ -1333,6 +1458,7 @@ CodeParam parse_params( bool use_template_capture = false ) if ( currtok.Type == TokType::Statement_End ) { log_failure( "Expected value after assignment operator\n%s.", Context.to_string() ); + Context.pop(); return CodeInvalid; } @@ -1379,7 +1505,10 @@ CodeParam parse_params( bool use_template_capture = false ) type = parse_type(); if ( type == Code::Invalid ) + { + Context.pop(); return CodeInvalid; + } name = { nullptr, 0, TokType::Invalid, false }; @@ -1397,6 +1526,7 @@ CodeParam parse_params( bool use_template_capture = false ) if ( currtok.Type == TokType::Statement_End ) { log_failure( "Expected value after assignment operator\n%s", Context.to_string() ); + Context.pop(); return CodeInvalid; } @@ -1433,6 +1563,7 @@ CodeParam parse_params( bool use_template_capture = false ) if ( ! check( TokType::Operator) || currtok.Text[0] != '>' ) { log_failure("Expected '<' after 'template' keyword\n%s", Context.to_string() ); + Context.pop(); return CodeInvalid; } eat( TokType::Operator ); @@ -1460,6 +1591,13 @@ CodeFn parse_function_after_name( while ( left && currtok.is_specifier() ) { + if ( specifiers.ast == nullptr ) + { + specifiers = def_specifier( ESpecifier::to_type(currtok) ); + eat( currtok.Type ); + continue; + } + specifiers.append( ESpecifier::to_type(currtok) ); eat( currtok.Type ); } @@ -1469,7 +1607,10 @@ CodeFn parse_function_after_name( { body = parse_function_body(); if ( body == Code::Invalid ) + { + Context.pop(); return CodeInvalid; + } } else { @@ -1494,6 +1635,7 @@ CodeFn parse_function_after_name( default: { log_failure("Body must be either of Function_Body or Untyped type, %s\n%s", body.debug_str(), Context.to_string()); + Context.pop(); return CodeInvalid; } } @@ -1536,6 +1678,7 @@ CodeOperator parse_operator_after_ret_type( if ( ! check( TokType::Operator ) ) { log_failure( "Expected operator after 'operator' keyword\n%s", Context.to_string() ); + Context.pop(); return CodeInvalid; } @@ -1722,6 +1865,7 @@ CodeOperator parse_operator_after_ret_type( if ( op == Invalid ) { log_failure( "Invalid operator '%s'\n%s", currtok.Text, Context.to_string() ); + Context.pop(); return CodeInvalid; } @@ -1732,6 +1876,13 @@ CodeOperator parse_operator_after_ret_type( while ( left && currtok.is_specifier() ) { + if ( specifiers.ast == nullptr ) + { + specifiers = def_specifier( ESpecifier::to_type(currtok) ); + eat( currtok.Type ); + continue; + } + specifiers.append( ESpecifier::to_type(currtok) ); eat( currtok.Type ); } @@ -1742,7 +1893,10 @@ CodeOperator parse_operator_after_ret_type( { body = parse_function_body(); if ( body == Code::Invalid ) + { + Context.pop(); return CodeInvalid; + } } else { @@ -1780,6 +1934,7 @@ CodeVar parse_variable_after_name( if ( currtok.Type == TokType::Statement_End ) { log_failure( "Expected expression after assignment operator\n%s", Context.to_string() ); + Context.pop(); return CodeInvalid; } @@ -1826,7 +1981,7 @@ Code parse_variable_assignment() using namespace Parser; push_scope(); - Code expr = Code::Invalid; + Code expr = CodeInvalid; if ( currtok.IsAssign ) { @@ -1837,7 +1992,8 @@ Code parse_variable_assignment() if ( currtok.Type == TokType::Statement_End ) { log_failure( "Expected expression after assignment operator\n%s", Context.to_string() ); - return Code::Invalid; + Context.pop(); + return CodeInvalid; } while ( left && currtok.Type != TokType::Statement_End ) @@ -1859,14 +2015,17 @@ Code parse_operator_function_or_variable( bool expects_function, CodeAttributes using namespace Parser; push_scope(); - Code result = Code::Invalid; + Code result = CodeInvalid; CodeType type = parse_type(); - if ( type == Code::Invalid ) + if ( type == CodeInvalid ) + { + Context.pop(); return CodeInvalid; + } - if ( check( TokType::Operator) ) + if ( check( TokType::Decl_Operator) ) { // Dealing with an operator overload result = parse_operator_after_ret_type( ModuleFlag::None, attributes, specifiers, type ); @@ -1887,7 +2046,8 @@ Code parse_operator_function_or_variable( bool expects_function, CodeAttributes if ( expects_function ) { log_failure( "Expected function declaration (consteval was used)\n%s", Context.to_string() ); - return Code::Invalid; + Context.pop(); + return CodeInvalid; } // Dealing with a variable @@ -1951,11 +2111,59 @@ CodeBody parse_class_struct_body( Parser::TokType which ) break; case TokType::Decl_Class: + { + s32 idx = Context.Tokens.Idx; + for ( ; Context.Tokens.Arr[idx].Type != TokType::Statement_End; idx ++) + { + if ( Context.Tokens.Arr[idx].Type == TokType::BraceCurly_Open ) + { + idx = Context.Tokens.Idx; + break; + } + } + + Token tok_before_end = Context.Tokens.Arr[ idx ]; + if ( tok_before_end.Type == TokType::Identifier + || tok_before_end.Type == TokType::Capture_End + || tok_before_end.Type == TokType::Spec_Const + || tok_before_end.Type == TokType::Spec_Volatile + || tok_before_end.Type == TokType::Spec_Override + || tok_before_end.Type == TokType::Spec_Final ) + { + member = parse_operator_function_or_variable( false, attributes, specifiers ); + break; + } + member = parse_class(); + } break; case TokType::Decl_Enum: + { + s32 idx = Context.Tokens.Idx; + for ( ; Context.Tokens.Arr[idx].Type != TokType::Statement_End; idx ++) + { + if ( Context.Tokens.Arr[idx].Type == TokType::BraceCurly_Open ) + { + idx = Context.Tokens.Idx; + break; + } + } + + Token tok_before_end = Context.Tokens.Arr[ idx ]; + if ( tok_before_end.Type == TokType::Identifier + || tok_before_end.Type == TokType::Capture_End + || tok_before_end.Type == TokType::Spec_Const + || tok_before_end.Type == TokType::Spec_Volatile + || tok_before_end.Type == TokType::Spec_Override + || tok_before_end.Type == TokType::Spec_Final ) + { + member = parse_operator_function_or_variable( false, attributes, specifiers ); + break; + } + member = parse_enum(); + } break; case TokType::Decl_Friend: @@ -1967,7 +2175,31 @@ CodeBody parse_class_struct_body( Parser::TokType which ) break; case TokType::Decl_Struct: + { + s32 idx = Context.Tokens.Idx; + for ( ; Context.Tokens.Arr[idx].Type != TokType::Statement_End; idx ++) + { + if ( Context.Tokens.Arr[idx].Type == TokType::BraceCurly_Open ) + { + idx = Context.Tokens.Idx; + break; + } + } + + Token tok_before_end = Context.Tokens.Arr[ idx ]; + if ( tok_before_end.Type == TokType::Identifier + || tok_before_end.Type == TokType::Capture_End + || tok_before_end.Type == TokType::Spec_Const + || tok_before_end.Type == TokType::Spec_Volatile + || tok_before_end.Type == TokType::Spec_Override + || tok_before_end.Type == TokType::Spec_Final ) + { + member = parse_operator_function_or_variable( false, attributes, specifiers ); + break; + } + member = parse_struct(); + } break; case TokType::Decl_Template: @@ -1979,7 +2211,32 @@ CodeBody parse_class_struct_body( Parser::TokType which ) break; case TokType::Decl_Union: - member = parse_variable(); + { + s32 idx = Context.Tokens.Idx; + for ( ; Context.Tokens.Arr[idx].Type != TokType::Statement_End; idx ++) + { + if ( Context.Tokens.Arr[idx].Type == TokType::BraceCurly_Open ) + { + idx = Context.Tokens.Idx; + break; + } + } + + Token tok_before_end = Context.Tokens.Arr[ idx ]; + if ( tok_before_end.Type == TokType::Identifier + || tok_before_end.Type == TokType::Capture_End + || tok_before_end.Type == TokType::Spec_Const + || tok_before_end.Type == TokType::Spec_Volatile + || tok_before_end.Type == TokType::Spec_Override + || tok_before_end.Type == TokType::Spec_Final ) + { + member = parse_operator_function_or_variable( false, attributes, specifiers ); + break; + } + + member = parse_union(); + } + break; case TokType::Decl_Using: @@ -2018,6 +2275,15 @@ CodeBody parse_class_struct_body( Parser::TokType which ) member = preprocess_endif; break; + case TokType::Preprocess_Unsupported: + member = untyped_str( currtok ); + eat( TokType::Preprocess_Unsupported ); + break; + + case TokType::StaticAssert: + member = parse_static_assert(); + break; + case TokType::Attribute_Open: case TokType::Decl_GNU_Attribute: case TokType::Decl_MSVC_Attribute: @@ -2059,6 +2325,7 @@ CodeBody parse_class_struct_body( Parser::TokType which ) default: log_failure( "Invalid specifier %s for variable\n%s", ESpecifier::to_str(spec), Context.to_string() ); + Context.pop(); return CodeInvalid; } @@ -2103,6 +2370,7 @@ CodeBody parse_class_struct_body( Parser::TokType which ) if ( member == Code::Invalid ) { log_failure( "Failed to parse member\n%s", Context.to_string() ); + Context.pop(); return CodeInvalid; } @@ -2115,7 +2383,7 @@ CodeBody parse_class_struct_body( Parser::TokType which ) } internal -Code parse_class_struct( Parser::TokType which ) +Code parse_class_struct( Parser::TokType which, bool from_typedef = false ) { using namespace Parser; @@ -2183,7 +2451,8 @@ Code parse_class_struct( Parser::TokType which ) body = parse_class_struct_body( which ); } - eat( TokType::Statement_End ); + if ( ! from_typedef ) + eat( TokType::Statement_End ); if ( which == TokType::Decl_Class ) result = def_class( name, body, parent, access, attributes, mflags ); @@ -2288,6 +2557,11 @@ CodeBody parse_global_nspace( CodeT which ) break; case TokType::Decl_Struct: + if ( currtok.Line >= 2202 ) + { + log_fmt("here"); + } + member = parse_struct(); break; @@ -2341,6 +2615,15 @@ CodeBody parse_global_nspace( CodeT which ) eat( TokType::Preprocess_EndIf ); break; + case TokType::Preprocess_Unsupported: + member = untyped_str( currtok ); + eat( TokType::Preprocess_Unsupported ); + break; + + case TokType::StaticAssert: + member = parse_static_assert(); + break; + case TokType::Module_Export: if ( which == Export_Body ) log_failure( "Nested export declaration\n%s", Context.to_string() ); @@ -2382,6 +2665,7 @@ CodeBody parse_global_nspace( CodeT which ) { case ESpecifier::Constexpr: case ESpecifier::Constinit: + case ESpecifier::External_Linkage: case ESpecifier::Inline: case ESpecifier::Mutable: case ESpecifier::Static: @@ -2439,11 +2723,11 @@ CodeBody parse_global_nspace( CodeT which ) } internal -CodeClass parse_class() +CodeClass parse_class( bool from_typedef ) { using namespace Parser; push_scope(); - CodeClass result = (CodeClass) parse_class_struct( Parser::TokType::Decl_Class ); + CodeClass result = (CodeClass) parse_class_struct( Parser::TokType::Decl_Class, from_typedef ); Context.pop(); return result; } @@ -2465,7 +2749,7 @@ CodeClass parse_class( StrC def ) } internal -CodeEnum parse_enum() +CodeEnum parse_enum( bool from_typedef ) { using namespace Parser; using namespace ECode; @@ -2497,6 +2781,7 @@ CodeEnum parse_enum() if ( currtok.Type != TokType::Identifier ) { log_failure( "Expected identifier for enum name\n%s", Context.to_string() ); + Context.pop(); return CodeInvalid; } @@ -2509,7 +2794,11 @@ CodeEnum parse_enum() type = parse_type(); if ( type == Code::Invalid ) + { + log_failure( "Failed to parse enum classifier\n%s", Context.to_string() ); + Context.pop(); return CodeInvalid; + } } if ( currtok.Type == TokType::BraceCurly_Open ) @@ -2543,7 +2832,8 @@ CodeEnum parse_enum() eat( TokType::BraceCurly_Close ); } - eat( TokType::Statement_End ); + if ( ! from_typedef ) + eat( TokType::Statement_End ); using namespace ECode; @@ -2580,7 +2870,10 @@ CodeEnum parse_enum( StrC def ) TokArray toks = lex( def ); if ( toks.Arr == nullptr ) + { + Context.pop(); return CodeInvalid; + } Context.Tokens = toks; return parse_enum(); @@ -2642,6 +2935,7 @@ CodeExtern parse_extern_link() if ( entry == Code::Invalid ) { log_failure( "Failed to parse body\n%s", Context.to_string() ); + Context.pop(); return result; } @@ -2678,7 +2972,10 @@ CodeFriend parse_friend() // Type declaration or return type CodeType type = parse_type(); if ( type == Code::Invalid ) + { + Context.pop(); return CodeInvalid; + } // Funciton declaration if ( currtok.Type == TokType::Identifier ) @@ -2764,6 +3061,7 @@ CodeFn parse_functon() default: log_failure( "Invalid specifier %s for functon\n%s", ESpecifier::to_str(spec), Context.to_string() ); + Context.pop(); return CodeInvalid; } @@ -2782,11 +3080,17 @@ CodeFn parse_functon() CodeType ret_type = parse_type(); if ( ret_type == Code::Invalid ) + { + Context.pop(); return CodeInvalid; + } Token name = parse_identifier(); if ( ! name ) + { + Context.pop(); return CodeInvalid; + } CodeFn result = parse_function_after_name( mflags, attributes, specifiers, ret_type, name ); @@ -2835,7 +3139,10 @@ CodeNamespace parse_namespace() CodeBody body = parse_global_nspace( ECode::Namespace_Body ); if ( body == Code::Invalid ) + { + Context.pop(); return CodeInvalid; + } CodeNamespace result = (CodeNamespace) make_code(); @@ -2896,6 +3203,7 @@ CodeOperator parse_operator() default: log_failure( "Invalid specifier " "%s" " for operator\n%s", ESpecifier::to_str(spec), Context.to_string() ); + Context.pop(); return CodeInvalid; } @@ -2949,7 +3257,10 @@ CodeOpCast parse_operator_cast() CodeSpecifiers specifiers = { nullptr }; if ( check(TokType::Spec_Const)) + { specifiers = spec_const; + eat( TokType::Spec_Const ); + } Code body = { nullptr }; @@ -3013,11 +3324,11 @@ CodeOpCast parse_operator_cast( StrC def ) } internal inline -CodeStruct parse_struct() +CodeStruct parse_struct( bool from_typedef ) { using namespace Parser; push_scope(); - CodeStruct result = (CodeStruct) parse_class_struct( TokType::Decl_Struct ); + CodeStruct result = (CodeStruct) parse_class_struct( TokType::Decl_Struct, from_typedef ); Context.pop(); return result; } @@ -3058,7 +3369,10 @@ CodeTemplate parse_template() Code params = parse_params( UseTemplateCapture ); if ( params == Code::Invalid ) + { + Context.pop(); return CodeInvalid; + } Code definition = { nullptr }; @@ -3072,7 +3386,7 @@ CodeTemplate parse_template() if ( check( TokType::Decl_Struct ) ) { - definition = parse_enum(); + definition = parse_struct(); break; } @@ -3120,6 +3434,7 @@ CodeTemplate parse_template() default: log_failure( "Invalid specifier %s for variable or function\n%s", ESpecifier::to_str( spec ), Context.to_string() ); + Context.pop(); return CodeInvalid; } @@ -3189,6 +3504,7 @@ CodeType parse_type() if ( spec != ESpecifier::Const ) { log_failure( "Error, invalid specifier used in type definition: %s\n%s", currtok.Text, Context.to_string() ); + Context.pop(); return CodeInvalid; } @@ -3200,6 +3516,7 @@ CodeType parse_type() if ( left == 0 ) { log_failure( "Error, unexpected end of type definition\n%s", Context.to_string() ); + Context.pop(); return CodeInvalid; } @@ -3213,12 +3530,12 @@ CodeType parse_type() name.Length = ( (sptr)currtok.Text + currtok.Length ) - (sptr)name.Text; eat( TokType::Identifier ); } - else if ( currtok.Type >= TokType::Type_Unsigned ) + else if ( currtok.Type >= TokType::Type_Unsigned && currtok.Type <= TokType::Type_MS_W64 ) { name = currtok; eat( currtok.Type ); - while (currtok.Type >= TokType::Type_Unsigned) + while (currtok.Type >= TokType::Type_Unsigned && currtok.Type <= TokType::Type_MS_W64 ) { eat( currtok.Type ); } @@ -3229,7 +3546,11 @@ CodeType parse_type() { name = parse_identifier(); if ( ! name ) + { + log_failure( "Error, failed to type signature\n%s", Context.to_string() ); + Context.pop(); return CodeInvalid; + } // Problably dealing with a templated symbol if ( currtok.Type == TokType::Operator && currtok.Text[0] == '<' && currtok.Length == 1 ) @@ -3265,6 +3586,7 @@ CodeType parse_type() && spec != ESpecifier::RValue ) { log_failure( "Error, invalid specifier used in type definition: %s\n%s", currtok.Text, Context.to_string() ); + Context.pop(); return CodeInvalid; } @@ -3273,43 +3595,27 @@ CodeType parse_type() eat( currtok.Type ); } - // Not sure if its technically possible to cast ot a function pointer user defined operator cast... - // Supporting it is not worth the effort. +BruteforceCaptureAgain: if ( check( TokType::Capture_Start ) && context_tok.Type != TokType::Decl_Operator ) { - // Its a function type + // Brute force capture the entire thing. + // Function typedefs are complicated and there are not worth dealing with for validation at this point... eat( TokType::Capture_Start ); - while ( check( TokType::Star ) || currtok.Type == TokType::Spec_Const ) - { - eat( currtok.Type ); - } - - // if its a using statement there will not be an ID. - if ( check( TokType::Identifier ) ) - eat(TokType::Identifier); - - eat( TokType::Capture_End ); - - // Parameters - - eat( TokType::Capture_Start ); - - // TODO : Change this to validate the parameters... - // Bruteforce lex the parameters, no validation. s32 level = 0; - while ( ! check( TokType::Capture_End ) || level > 0 ) + while ( left && ( currtok.Type != TokType::Capture_End || level > 0 )) { - if ( check( TokType::Capture_Start ) ) + if ( currtok.Type == TokType::Capture_Start ) level++; - if ( check( TokType::Capture_End ) ) + if ( currtok.Type == TokType::Capture_End ) level--; eat( currtok.Type ); } + eat( TokType::Capture_End ); - eat(TokType::Capture_End); + goto BruteforceCaptureAgain; brute_sig.Length = ( (sptr)prevtok.Text + prevtok.Length ) - (sptr)brute_sig.Text; } @@ -3376,29 +3682,51 @@ CodeTypedef parse_typedef() eat( TokType::Decl_Typedef ); - if ( check( TokType::Decl_Enum ) ) - type = parse_enum(); - - else if ( check(TokType::Decl_Class ) ) - type = parse_class(); - - else if ( check(TokType::Decl_Struct ) ) - type = parse_struct(); - - else if ( check(TokType::Decl_Union) ) - type = parse_union(); - - else - type = parse_type(); - - if ( ! check( TokType::Identifier ) ) + if ( currtok.Line == 2196 ) { - log_failure( "Error, expected identifier for typedef\n%s", Context.to_string() ); - return CodeInvalid; + log_fmt("here"); } - name = currtok; - eat( TokType::Identifier ); + constexpr bool from_typedef = true; + + // TODO : Confirm if this should stay... (Macro abuse, kept because used by zpl library code...) + // TODO : I could refactor the library code to not use this, and just ban it from usage + // TODO : (as I already do for all macros that are not at entries in a body ast...) + if ( check( TokType::Preprocess_Macro )) + { + type = t_empty; + name = currtok; + eat( TokType::Preprocess_Macro ); + } + else + { + if ( check( TokType::Decl_Enum ) ) + type = parse_enum( from_typedef ); + + else if ( check(TokType::Decl_Class ) ) + type = parse_class( from_typedef ); + + else if ( check(TokType::Decl_Struct ) ) + type = parse_struct( from_typedef ); + + else if ( check(TokType::Decl_Union) ) + type = parse_union( from_typedef ); + + else + type = parse_type(); + + if ( check( TokType::Identifier ) ) + { + name = currtok; + eat( TokType::Identifier ); + } + else + { + log_failure( "Error, expected identifier for typedef\n%s", Context.to_string() ); + Context.pop(); + return CodeInvalid; + } + } array_expr = parse_array_decl(); @@ -3435,7 +3763,7 @@ CodeTypedef parse_typedef( StrC def ) } internal -CodeUnion parse_union() +CodeUnion parse_union( bool from_typedef ) { using namespace Parser; push_scope(); @@ -3476,7 +3804,9 @@ CodeUnion parse_union() } eat( TokType::BraceCurly_Close ); - eat( TokType::Statement_End ); + + if ( ! from_typedef ) + eat( TokType::Statement_End ); CodeUnion result = (CodeUnion) make_code(); @@ -3641,6 +3971,7 @@ CodeVar parse_variable() default: log_failure( "Invalid specifier %s for variable\n%s", ESpecifier::to_str( spec ), Context.to_string() ); + Context.pop(); return CodeInvalid; } diff --git a/project/enums/ETokType.csv b/project/enums/ETokType.csv index f4e624f..e085541 100644 --- a/project/enums/ETokType.csv +++ b/project/enums/ETokType.csv @@ -15,8 +15,8 @@ BraceSquare_Open, "[" BraceSquare_Close, "]" Capture_Start, "(" Capture_End, ")" -Comment, "comemnt" -Char, "character" +Comment, "__comemnt__" +Char, "__character__" Comma, "," Decl_Class, "class" Decl_GNU_Attribute, "__attribute__" @@ -26,18 +26,18 @@ Decl_Extern_Linkage, "extern" Decl_Friend, "friend" Decl_Module, "module" Decl_Namespace, "namespace" -Decl_Operator, "operator" +Decl_Operator, "__operator__" Decl_Struct, "struct" Decl_Template, "template" Decl_Typedef, "typedef" Decl_Using, "using" Decl_Union, "union" -Identifier, "identifier" +Identifier, "__identifier__" Module_Import, "import" Module_Export, "export" -Number, "number" -Operator, "operator" -Preprocess_Define, "#define" +Number, "__number__" +Operator, "__operator__" +Preprocess_Define, "define" Preprocess_If, "if" Preprocess_IfDef, "ifdef" Preprocess_IfNotDef, "ifndef" @@ -46,7 +46,8 @@ Preprocess_Else, "else" Preprocess_EndIf, "endif" Preprocess_Include, "include" Preprocess_Pragma, "pragma" -Preprocess_Macro, "macro" +Preprocess_Macro, "__macro__" +Preprocess_Unsupported, "__unsupported__" Spec_Alignas, "alignas" Spec_Const, "const" Spec_Consteval, "consteval" @@ -66,7 +67,8 @@ Spec_ThreadLocal, "thread_local" Spec_Volatile, "volatile" Star, "*" Statement_End, ";" -String, "string" +StaticAssert, "static_assert" +String, "__string__" Type_Unsigned, "unsigned" Type_Signed, "signed" Type_Short, "short" @@ -74,5 +76,10 @@ Type_Long, "long" Type_char, "char" Type_int, "int" Type_double, "double" +Type_MS_int8, "__int8" +Type_MS_int16, "__int16" +Type_MS_int32, "__int32" +Type_MS_int64, "__int64" +Type_MS_W64, "_W64" Varadic_Argument, "..." -Attributes_Start, "__attrib_start__" +__Attributes_Start, "__attrib_start__" From 528ef72a51dafb1c8f130e81bd36d199c67aa1e1 Mon Sep 17 00:00:00 2001 From: Ed_ Date: Tue, 1 Aug 2023 05:17:24 -0400 Subject: [PATCH 4/9] More progress on parsing Made it to line 2597 of self parsing its singleheader Complex global or member defintions are now supported. --- Readme.md | 2 +- docs/Parsing.md | 43 +-- project/Readme.md | 2 +- project/components/ast.cpp | 12 +- project/components/data_structures.hpp | 12 +- project/components/interface.parsing.cpp | 323 ++++++++++++++--------- project/dependencies/macros.hpp | 3 + 7 files changed, 252 insertions(+), 145 deletions(-) diff --git a/Readme.md b/Readme.md index c6db963..ad1502c 100644 --- a/Readme.md +++ b/Readme.md @@ -17,7 +17,7 @@ A `natvis` and `natstepfilter` are provided in the scripts directory. ***The editor and scanner have not been implemented yet. The scanner will come first, then the editor.*** -A C variant is hosted [here](https://github.com/Ed94/genc); I haven't gotten headwind on it, should be easier to make than this... +A C variant is hosted [here](https://github.com/Ed94/genc); I will complete it when this library is feature complete, it should be easier to make than this... ## Usage diff --git a/docs/Parsing.md b/docs/Parsing.md index 7f81d66..518f22a 100644 --- a/docs/Parsing.md +++ b/docs/Parsing.md @@ -3,26 +3,6 @@ The library features a naive parser tailored for only what the library needs to construct the supported syntax of C++ into its AST. This parser does not, and should not do the compiler's job. By only supporting this minimal set of features, the parser is kept under 5000 loc. - -Everything is done in one pass for both the preprocessor directives and the rest of the language. -The parser performs no macro expansion as the scope of gencpp feature-set is to only support the preprocessor for the goal of having rudimentary awareness of preprocessor ***conditionals***, ***defines***, and ***includes***, and ***`pragmas`**. - -The keywords supported for the preprocessor are: - -* include -* define -* if -* ifdef -* elif -* endif -* undef -* pragma - -Each directive `#` line is considered one preproecessor unit, and will be treated as one Preprocessor AST. *These ASTs will be considered members or entries of braced scope they reside within*. -All keywords except *include* are suppported as members of a scope for a class/struct, global, or namespace body. - -Any preprocessor definition abuse that changes the syntax of the core language is unsupported and will fail to parse if not kept within an execution scope (function body, or expression assignment). - The parsing implementation supports the following for the user: ```cpp @@ -47,3 +27,26 @@ CodeVar parse_variable ( StrC var_def ); ***Parsing will aggregate any tokens within a function body or expression statement to an untyped Code AST.*** +Everything is done in one pass for both the preprocessor directives and the rest of the language. +The parser performs no macro expansion as the scope of gencpp feature-set is to only support the preprocessor for the goal of having rudimentary awareness of preprocessor ***conditionals***, ***defines***, and ***includes***, and ***`pragmas`**. + +The keywords supported for the preprocessor are: + +* include +* define +* if +* ifdef +* elif +* endif +* undef +* pragma + +Each directive `#` line is considered one preproecessor unit, and will be treated as one Preprocessor AST. *These ASTs will be considered members or entries of braced scope they reside within*. +All keywords except *include* are suppported as members of a scope for a class/struct, global, or namespace body. + +Any preprocessor definition abuse that changes the syntax of the core language is unsupported and will fail to parse if not kept within an execution scope (function body, or expression assignment). + +Exceptions to the above rule (If its too hard to keep track of just follow the above notion): + +* Typedefs allow of a macro exansion to be defined after the keyword; Ex: `typedef GEN_FILE_OPEN_PROC( file_open_proc );` + diff --git a/project/Readme.md b/project/Readme.md index 5698371..cfe968f 100644 --- a/project/Readme.md +++ b/project/Readme.md @@ -12,7 +12,7 @@ Just like the `gen.` they include their components: `dependencies/.` ) They directly include `depedencies/file_handling.` as the core library does not include file processing by defualt. -**TODO : Right now the library is not finished structurally, as such the first self-hosting iteration is still WIP** +**TODO : Right now the library is not finished, as such the first self-hosting iteration is still WIP** Both libraries use *pre-generated* (self-hosting I guess) version of the library to then generate the latest version of itself. (sort of a verification that the generated version is equivalent). diff --git a/project/components/ast.cpp b/project/components/ast.cpp index c4ea141..d6e9538 100644 --- a/project/components/ast.cpp +++ b/project/components/ast.cpp @@ -707,7 +707,9 @@ String AST::to_string() ); } - if ( Parent && Parent->Type != ECode::Typedef ) + bool add_semicolon = Parent && Parent->Type != ECode::Typedef && Parent->Type != ECode::Variable; + + if ( add_semicolon ) result.append(";"); } break; @@ -756,6 +758,9 @@ String AST::to_string() if ( ValueType->ArrExpr ) result.append_fmt( "[%s]", ValueType->ArrExpr->to_string() ); + if ( BitfieldSize ) + result.append_fmt( " : %lu", BitfieldSize ); + if ( Value ) result.append_fmt( " = %s", Value->to_string() ); @@ -764,7 +769,10 @@ String AST::to_string() break; } - if ( UnderlyingType->ArrExpr ) + if ( BitfieldSize ) + result.append_fmt( "%s : %lu", ValueType->to_string(), BitfieldSize ); + + else if ( UnderlyingType->ArrExpr ) result.append_fmt( "%s %s[%s];", UnderlyingType->to_string(), Name, UnderlyingType->ArrExpr->to_string() ); else diff --git a/project/components/data_structures.hpp b/project/components/data_structures.hpp index 6fdfcc3..65cd330 100644 --- a/project/components/data_structures.hpp +++ b/project/components/data_structures.hpp @@ -231,7 +231,10 @@ struct AST AST* UnderlyingType; // Enum, Typedef AST* ValueType; // Parameter, Variable }; - AST* Params; // Function, Operator, Template + union { + AST* Params; // Function, Operator, Template + AST* BitfieldSize; // Varaiable (Class/Struct Data Member) + }; union { AST* ArrExpr; // Type Symbol AST* Body; // Class, Enum, Function, Namespace, Struct, Union @@ -275,7 +278,10 @@ struct AST_POD AST* UnderlyingType; // Enum, Typedef AST* ValueType; // Parameter, Variable }; - AST* Params; // Function, Operator, Template + union { + AST* Params; // Function, Operator, Template + AST* BitfieldSize; // Varaiable (Class/Struct Data Member) + }; union { AST* ArrExpr; // Type Symbol AST* Body; // Class, Enum, Function, Namespace, Struct, Union @@ -1064,7 +1070,7 @@ struct AST_Var CodeAttributes Attributes; CodeSpecifiers Specs; CodeType ValueType; - char _PAD_PROPERTIES_[ sizeof(AST*) ]; + Code BitfieldSize; Code Value; }; }; diff --git a/project/components/interface.parsing.cpp b/project/components/interface.parsing.cpp index b1bf188..b3b8262 100644 --- a/project/components/interface.parsing.cpp +++ b/project/components/interface.parsing.cpp @@ -81,6 +81,11 @@ namespace Parser { return Idx + 1 < Arr.num() ? &Arr[Idx + 1] : nullptr; } + + Token& operator []( s32 idx ) + { + return Arr[idx]; + } }; struct StackNode @@ -1015,8 +1020,8 @@ if ( def.Ptr == nullptr ) \ internal Code parse_function_body(); internal Code parse_global_nspace(); -internal CodeClass parse_class ( bool from_typedef = false ); -internal CodeEnum parse_enum ( bool from_typedef = false ); +internal CodeClass parse_class ( bool inplace_def = false ); +internal CodeEnum parse_enum ( bool inplace_def = false ); internal CodeBody parse_export_body (); internal CodeBody parse_extern_link_body(); internal CodeExtern parse_exten_link (); @@ -1024,14 +1029,16 @@ internal CodeFriend parse_friend (); internal CodeFn parse_function (); internal CodeNamespace parse_namespace (); internal CodeOpCast parse_operator_cast (); -internal CodeStruct parse_struct ( bool from_typedef = false ); +internal CodeStruct parse_struct ( bool inplace_def = false ); internal CodeVar parse_variable (); internal CodeTemplate parse_template (); internal CodeType parse_type (); internal CodeTypedef parse_typedef (); -internal CodeUnion parse_union ( bool from_typedef = false ); +internal CodeUnion parse_union ( bool inplace_def = false ); internal CodeUsing parse_using (); +constexpr bool inplace_def = true; + internal inline CodeDefine parse_define() { @@ -1925,6 +1932,8 @@ CodeVar parse_variable_after_name( Code array_expr = parse_array_decl(); Code expr = { nullptr }; + Code bitfield_expr = { nullptr }; + if ( currtok.IsAssign ) { eat( TokType::Operator ); @@ -1947,6 +1956,28 @@ CodeVar parse_variable_after_name( expr = untyped_str( expr_tok ); } + if ( currtok.Type == TokType::Assign_Classifer ) + { + eat( TokType::Assign_Classifer ); + + Token expr_tok = currtok; + + if ( currtok.Type == TokType::Statement_End ) + { + log_failure( "Expected expression after bitfield \n%s", Context.to_string() ); + Context.pop(); + return CodeInvalid; + } + + while ( left && currtok.Type != TokType::Statement_End ) + { + eat( currtok.Type ); + } + + expr_tok.Length = ( (sptr)currtok.Text + currtok.Length ) - (sptr)expr_tok.Text; + bitfield_expr = untyped_str( expr_tok ); + } + eat( TokType::Statement_End ); using namespace ECode; @@ -1962,6 +1993,9 @@ CodeVar parse_variable_after_name( if (array_expr ) type->ArrExpr = array_expr; + if ( bitfield_expr ) + result->BitfieldSize = bitfield_expr; + if ( attributes ) result->Attributes = attributes; @@ -2009,6 +2043,7 @@ Code parse_variable_assignment() return expr; } + internal inline Code parse_operator_function_or_variable( bool expects_function, CodeAttributes attributes, CodeSpecifiers specifiers ) { @@ -2059,6 +2094,133 @@ Code parse_operator_function_or_variable( bool expects_function, CodeAttributes return result; } +internal inline +Code parse_complicated_definition( Parser::TokType which ) +{ + using namespace Parser; + push_scope(); + + labeled_scope_start + PARSE_FORWARD_OR_DEFINITION: + Code result = CodeInvalid; + + // ; + switch ( which ) + { + case TokType::Decl_Class: + result = parse_class(); + Context.pop(); + return result; + + case TokType::Decl_Enum: + result = parse_enum(); + Context.pop(); + return result; + + case TokType::Decl_Struct: + result = parse_struct(); + Context.pop(); + return result; + + case TokType::Decl_Union: + result = parse_union(); + Context.pop(); + return result; + + default: + log_failure( "Error, wrong token type given to parse_complicated_definition " + "(only supports class, enum, struct, union) \n%s" + , Context.to_string() ); + + Context.pop(); + return CodeInvalid; + } + labeled_scope_end + + TokArray tokens = Context.Tokens; + + s32 idx = tokens.Idx; + s32 level = 0; + for ( ; idx < tokens.Arr.num(); idx ++ ) + { + if ( tokens[idx].Type == TokType::BraceCurly_Open ) + level++; + + if ( tokens[idx].Type == TokType::BraceCurly_Close ) + level--; + + if ( level == 0 && tokens[idx].Type == TokType::Statement_End ) + break; + } + + if ( (idx - 2 ) == tokens.Idx ) + { + // Its a forward declaration only + goto PARSE_FORWARD_OR_DEFINITION; + } + + Token tok = tokens[ idx - 1 ]; + if ( tok.Type == TokType::Identifier ) + { + tok = tokens[ idx - 2 ]; + + bool is_indirection = tok.Type == TokType::Ampersand + || tok.Type == TokType::Star; + + bool ok_to_parse = false; + + if ( tok.Type == TokType::BraceCurly_Close ) + { + // Its an inplace definition + // { ... } ; + ok_to_parse = true; + } + else if ( tok.Type == TokType::Identifier && tokens[ idx - 3 ].Type == TokType::Decl_Struct ) + { + // Its a variable with type ID using struct namespace. + // ; + ok_to_parse = true; + } + else if ( is_indirection ) + { + // Its a indirection type with type ID using struct namespace. + // ; + ok_to_parse = true; + } + + if ( ! ok_to_parse ) + { + log_failure( "Unsupported or bad member definition after struct declaration\n%s", Context.to_string() ); + Context.pop(); + return CodeInvalid; + } + + Code result = parse_operator_function_or_variable( false, { nullptr }, { nullptr } ); + Context.pop(); + return result; + } + else if ( tok.Type == TokType::BraceCurly_Close ) + { + // Its a definition + // { ... }; + goto PARSE_FORWARD_OR_DEFINITION; + } + else if ( tok.Type == TokType::BraceSquare_Close) + { + // Its an array definition + // [ ... ]; + Code result = parse_operator_function_or_variable( false, { nullptr }, { nullptr } ); + Context.pop(); + return result; + } + else + { + log_failure( "Unsupported or bad member definition after struct declaration\n%s", Context.to_string() ); + Context.pop(); + return CodeInvalid; + } +} + internal CodeBody parse_class_struct_body( Parser::TokType which ) { @@ -2112,57 +2274,13 @@ CodeBody parse_class_struct_body( Parser::TokType which ) case TokType::Decl_Class: { - s32 idx = Context.Tokens.Idx; - for ( ; Context.Tokens.Arr[idx].Type != TokType::Statement_End; idx ++) - { - if ( Context.Tokens.Arr[idx].Type == TokType::BraceCurly_Open ) - { - idx = Context.Tokens.Idx; - break; - } - } - - Token tok_before_end = Context.Tokens.Arr[ idx ]; - if ( tok_before_end.Type == TokType::Identifier - || tok_before_end.Type == TokType::Capture_End - || tok_before_end.Type == TokType::Spec_Const - || tok_before_end.Type == TokType::Spec_Volatile - || tok_before_end.Type == TokType::Spec_Override - || tok_before_end.Type == TokType::Spec_Final ) - { - member = parse_operator_function_or_variable( false, attributes, specifiers ); - break; - } - - member = parse_class(); + member = parse_complicated_definition( TokType::Decl_Class ); } break; case TokType::Decl_Enum: { - s32 idx = Context.Tokens.Idx; - for ( ; Context.Tokens.Arr[idx].Type != TokType::Statement_End; idx ++) - { - if ( Context.Tokens.Arr[idx].Type == TokType::BraceCurly_Open ) - { - idx = Context.Tokens.Idx; - break; - } - } - - Token tok_before_end = Context.Tokens.Arr[ idx ]; - if ( tok_before_end.Type == TokType::Identifier - || tok_before_end.Type == TokType::Capture_End - || tok_before_end.Type == TokType::Spec_Const - || tok_before_end.Type == TokType::Spec_Volatile - || tok_before_end.Type == TokType::Spec_Override - || tok_before_end.Type == TokType::Spec_Final ) - { - member = parse_operator_function_or_variable( false, attributes, specifiers ); - break; - } - - member = parse_enum(); + member = parse_complicated_definition( TokType::Decl_Enum ); } break; @@ -2176,29 +2294,7 @@ CodeBody parse_class_struct_body( Parser::TokType which ) case TokType::Decl_Struct: { - s32 idx = Context.Tokens.Idx; - for ( ; Context.Tokens.Arr[idx].Type != TokType::Statement_End; idx ++) - { - if ( Context.Tokens.Arr[idx].Type == TokType::BraceCurly_Open ) - { - idx = Context.Tokens.Idx; - break; - } - } - - Token tok_before_end = Context.Tokens.Arr[ idx ]; - if ( tok_before_end.Type == TokType::Identifier - || tok_before_end.Type == TokType::Capture_End - || tok_before_end.Type == TokType::Spec_Const - || tok_before_end.Type == TokType::Spec_Volatile - || tok_before_end.Type == TokType::Spec_Override - || tok_before_end.Type == TokType::Spec_Final ) - { - member = parse_operator_function_or_variable( false, attributes, specifiers ); - break; - } - - member = parse_struct(); + member = parse_complicated_definition( TokType::Decl_Struct ); } break; @@ -2212,29 +2308,7 @@ CodeBody parse_class_struct_body( Parser::TokType which ) case TokType::Decl_Union: { - s32 idx = Context.Tokens.Idx; - for ( ; Context.Tokens.Arr[idx].Type != TokType::Statement_End; idx ++) - { - if ( Context.Tokens.Arr[idx].Type == TokType::BraceCurly_Open ) - { - idx = Context.Tokens.Idx; - break; - } - } - - Token tok_before_end = Context.Tokens.Arr[ idx ]; - if ( tok_before_end.Type == TokType::Identifier - || tok_before_end.Type == TokType::Capture_End - || tok_before_end.Type == TokType::Spec_Const - || tok_before_end.Type == TokType::Spec_Volatile - || tok_before_end.Type == TokType::Spec_Override - || tok_before_end.Type == TokType::Spec_Final ) - { - member = parse_operator_function_or_variable( false, attributes, specifiers ); - break; - } - - member = parse_union(); + member = parse_complicated_definition( TokType::Decl_Union ); } break; @@ -2269,10 +2343,12 @@ CodeBody parse_class_struct_body( Parser::TokType which ) case TokType::Preprocess_Else: member = preprocess_else; + eat( TokType::Preprocess_Else ); break; case TokType::Preprocess_EndIf: member = preprocess_endif; + eat( TokType::Preprocess_EndIf ); break; case TokType::Preprocess_Unsupported: @@ -2383,7 +2459,7 @@ CodeBody parse_class_struct_body( Parser::TokType which ) } internal -Code parse_class_struct( Parser::TokType which, bool from_typedef = false ) +Code parse_class_struct( Parser::TokType which, bool inplace_def = false ) { using namespace Parser; @@ -2451,7 +2527,7 @@ Code parse_class_struct( Parser::TokType which, bool from_typedef = false ) body = parse_class_struct_body( which ); } - if ( ! from_typedef ) + if ( ! inplace_def ) eat( TokType::Statement_End ); if ( which == TokType::Decl_Class ) @@ -2537,12 +2613,12 @@ CodeBody parse_global_nspace( CodeT which ) eat( TokType::Comment ); break; - case TokType::Decl_Enum: - member = parse_enum(); + case TokType::Decl_Class: + member = parse_complicated_definition( TokType::Decl_Class ); break; - case TokType::Decl_Class: - member = parse_class(); + case TokType::Decl_Enum: + member = parse_complicated_definition( TokType::Decl_Enum ); break; case TokType::Decl_Extern_Linkage: @@ -2557,12 +2633,7 @@ CodeBody parse_global_nspace( CodeT which ) break; case TokType::Decl_Struct: - if ( currtok.Line >= 2202 ) - { - log_fmt("here"); - } - - member = parse_struct(); + member = parse_complicated_definition( TokType::Decl_Struct ); break; case TokType::Decl_Template: @@ -2574,7 +2645,7 @@ CodeBody parse_global_nspace( CodeT which ) break; case TokType::Decl_Union: - member = parse_union(); + member = parse_complicated_definition( TokType::Decl_Union ); break; case TokType::Decl_Using: @@ -2723,11 +2794,11 @@ CodeBody parse_global_nspace( CodeT which ) } internal -CodeClass parse_class( bool from_typedef ) +CodeClass parse_class( bool inplace_def ) { using namespace Parser; push_scope(); - CodeClass result = (CodeClass) parse_class_struct( Parser::TokType::Decl_Class, from_typedef ); + CodeClass result = (CodeClass) parse_class_struct( Parser::TokType::Decl_Class, inplace_def ); Context.pop(); return result; } @@ -2749,7 +2820,7 @@ CodeClass parse_class( StrC def ) } internal -CodeEnum parse_enum( bool from_typedef ) +CodeEnum parse_enum( bool inplace_def ) { using namespace Parser; using namespace ECode; @@ -2758,6 +2829,8 @@ CodeEnum parse_enum( bool from_typedef ) SpecifierT specs_found[16] { ESpecifier::NumSpecifiers }; s32 NumSpecifiers = 0; + CodeAttributes attributes = { nullptr }; + Token name = { nullptr, 0, TokType::Invalid }; Code array_expr = { nullptr }; CodeType type = { nullptr }; @@ -2776,7 +2849,7 @@ CodeEnum parse_enum( bool from_typedef ) is_enum_class = true; } - // TODO : Parse attributes + attributes = parse_attributes(); if ( currtok.Type != TokType::Identifier ) { @@ -2809,6 +2882,17 @@ CodeEnum parse_enum( bool from_typedef ) while ( currtok.Type != TokType::BraceCurly_Close ) { + if ( currtok.Type == TokType::Comment ) + { + eat( TokType::Comment ); + continue; + } + else if ( currtok.Type == TokType::Preprocess_Macro ) + { + eat( TokType::Preprocess_Macro ); + continue; + } + eat( TokType::Identifier); if ( currtok.Type == TokType::Operator && currtok.Text[0] == '=' ) @@ -2832,7 +2916,7 @@ CodeEnum parse_enum( bool from_typedef ) eat( TokType::BraceCurly_Close ); } - if ( ! from_typedef ) + if ( ! inplace_def ) eat( TokType::Statement_End ); using namespace ECode; @@ -2856,6 +2940,9 @@ CodeEnum parse_enum( bool from_typedef ) result->Name = get_cached_string( name ); + if ( attributes ) + result->Attributes = attributes; + if ( type ) result->UnderlyingType = type; @@ -3324,11 +3411,11 @@ CodeOpCast parse_operator_cast( StrC def ) } internal inline -CodeStruct parse_struct( bool from_typedef ) +CodeStruct parse_struct( bool inplace_def ) { using namespace Parser; push_scope(); - CodeStruct result = (CodeStruct) parse_class_struct( TokType::Decl_Struct, from_typedef ); + CodeStruct result = (CodeStruct) parse_class_struct( TokType::Decl_Struct, inplace_def ); Context.pop(); return result; } @@ -3763,7 +3850,7 @@ CodeTypedef parse_typedef( StrC def ) } internal -CodeUnion parse_union( bool from_typedef ) +CodeUnion parse_union( bool inplace_def ) { using namespace Parser; push_scope(); @@ -3805,7 +3892,7 @@ CodeUnion parse_union( bool from_typedef ) eat( TokType::BraceCurly_Close ); - if ( ! from_typedef ) + if ( ! inplace_def ) eat( TokType::Statement_End ); CodeUnion diff --git a/project/dependencies/macros.hpp b/project/dependencies/macros.hpp index 2e3125e..752dc3c 100644 --- a/project/dependencies/macros.hpp +++ b/project/dependencies/macros.hpp @@ -86,6 +86,9 @@ } \ while(0); +#define labeled_scope_start if ( false ) { +#define labeled_scope_end } + #define clamp( x, lower, upper ) min( max( ( x ), ( lower ) ), ( upper ) ) #define count_of( x ) ( ( size_of( x ) / size_of( 0 [ x ] ) ) / ( ( sw )( ! ( size_of( x ) % size_of( 0 [ x ] ) ) ) ) ) #define is_between( x, lower, upper ) ( ( ( lower ) <= ( x ) ) && ( ( x ) <= ( upper ) ) ) From 0f16d1131e311f1216b09f6d57504b0dcc785ab3 Mon Sep 17 00:00:00 2001 From: Ed_ Date: Tue, 1 Aug 2023 14:02:54 -0400 Subject: [PATCH 5/9] Got past parsing, fixing serialization --- project/components/ast.cpp | 22 +-- project/components/header_end.hpp | 24 ++- project/components/interface.parsing.cpp | 239 +++++++++++++++++------ project/components/untyped.cpp | 36 ++++ test/gen/singleheader_copy.hpp | 0 test/test.singleheader_ast.cpp | 11 +- 6 files changed, 252 insertions(+), 80 deletions(-) create mode 100644 test/gen/singleheader_copy.hpp diff --git a/project/components/ast.cpp b/project/components/ast.cpp index d6e9538..50ae27e 100644 --- a/project/components/ast.cpp +++ b/project/components/ast.cpp @@ -508,39 +508,39 @@ String AST::to_string() break; case Preprocess_Define: - result.append_fmt( "#define %s %s", Name, Content ); + result.append_fmt( "#define %s %s\n", Name, Content ); break; case Preprocess_If: - result.append_fmt( "#if %s", Content ); + result.append_fmt( "#if %s\n", Content ); break; case Preprocess_IfDef: - result.append_fmt( "#ifdef %s", Content ); + result.append_fmt( "#ifdef %s\n", Content ); break; case Preprocess_IfNotDef: - result.append_fmt( "#ifndef %s", Content ); + result.append_fmt( "#ifndef %s\n", Content ); break; case Preprocess_Include: - result.append_fmt( "#include \"%s\"", Content ); + result.append_fmt( "#include \"%s\"\n", Content ); break; case Preprocess_ElIf: - result.append_fmt( "#elif %s", Content ); + result.append_fmt( "#elif %s\n", Content ); break; case Preprocess_Else: - result.append_fmt( "#else" ); + result.append_fmt( "#else\n" ); break; case Preprocess_EndIf: - result.append_fmt( "#endif" ); + result.append_fmt( "#endif\n" ); break; case Preprocess_Pragma: - result.append_fmt( "#pragma %s", Content ); + result.append_fmt( "#pragma %s\n", Content ); break; case Specifiers: @@ -653,11 +653,11 @@ String AST::to_string() if ( UnderlyingType->Type == Typename && UnderlyingType->ArrExpr ) { - result.append_fmt( "[%s];", UnderlyingType->ArrExpr->to_string() ); + result.append_fmt( "[%s];\n", UnderlyingType->ArrExpr->to_string() ); } else { - result.append( ";" ); + result.append( ";\n" ); } } break; diff --git a/project/components/header_end.hpp b/project/components/header_end.hpp index 5921178..d3b4db6 100644 --- a/project/components/header_end.hpp +++ b/project/components/header_end.hpp @@ -26,12 +26,29 @@ void AST::append( AST* other ) char const* AST::debug_str() { + if ( Parent ) + { + char const* fmt = stringize( + \nType : %s + \nParent : %s %s + \nName : %s + ); + + // These should be used immediately in a log. + // Thus if its desired to keep the debug str + // for multiple calls to bprintf, + // allocate this to proper string. + return str_fmt_buf( fmt + , type_str() + , Parent->Name + , Parent->type_str() + , Name ? Name : "" + ); + } + char const* fmt = stringize( - \nCode Debug: \nType : %s - \nParent : %s \nName : %s - \nComment : %s ); // These should be used immediately in a log. @@ -40,7 +57,6 @@ char const* AST::debug_str() // allocate this to proper string. return str_fmt_buf( fmt , type_str() - , Parent ? Parent->Name : "" , Name ? Name : "" ); } diff --git a/project/components/interface.parsing.cpp b/project/components/interface.parsing.cpp index b3b8262..82e79d5 100644 --- a/project/components/interface.parsing.cpp +++ b/project/components/interface.parsing.cpp @@ -1,6 +1,3 @@ -/* -These constructors are the most implementation intensive other than the editor or scanner. -*/ namespace Parser { @@ -331,6 +328,12 @@ namespace Parser continue; // Skip found token, its all handled here. } + if ( token.Type == TokType::Preprocess_Else || token.Type == TokType::Preprocess_EndIf ) + { + Tokens.append( token ); + continue; + } + Tokens.append( token ); SkipWhitespace(); @@ -355,12 +358,6 @@ namespace Parser defines.set( key, name ); } - if ( token.Type == TokType::Preprocess_Else || token.Type == TokType::Preprocess_EndIf ) - { - Tokens.append( token ); - continue; - } - Token content = { scanner, 0, TokType::Preprocess_Content, false, line, column }; if ( token.Type == TokType::Preprocess_Include ) @@ -1165,8 +1162,10 @@ Code parse_static_assert() Code assert = make_code(); assert->Type = ECode::Untyped; - eat( TokType::StaticAssert ); + Token content = currtok; + + eat( TokType::StaticAssert ); eat( TokType::Capture_Start ); s32 level = 0; @@ -1182,6 +1181,14 @@ Code parse_static_assert() eat( TokType::Capture_End ); eat( TokType::Statement_End ); + content.Length = ( (sptr)prevtok.Text + prevtok.Length ) - (sptr)content.Text; + + content.Text = str_fmt_buf( "%.*s\n", content.Length, content.Text ); + content.Length++; + + assert->Content = get_cached_string( content ); + assert->Name = assert->Content; + Context.pop(); return assert; } @@ -1682,7 +1689,10 @@ CodeOperator parse_operator_after_ret_type( // Parse Operator eat( TokType::Decl_Operator ); - if ( ! check( TokType::Operator ) ) + if ( ! left && currtok.Type != TokType::Operator + && currtok.Type != TokType::Star + && currtok.Type != TokType::Ampersand + && currtok.Type != TokType::Ampersand_DBL ) { log_failure( "Expected operator after 'operator' keyword\n%s", Context.to_string() ); Context.pop(); @@ -1697,12 +1707,26 @@ CodeOperator parse_operator_after_ret_type( if ( currtok.Text[1] == '=' ) op = Assign_Add; + if ( currtok.Text[1] == '+' ) + op = Increment; + else op = Add; } break; case '-': { + if ( currtok.Text[1] == '>' ) + { + if ( currtok.Text[2] == '*' ) + op = MemberOfPointer; + + else + op = MemberOfPointer; + + break; + } + if ( currtok.Text[1] == '=' ) op = Assign_Subtract; @@ -1876,11 +1900,14 @@ CodeOperator parse_operator_after_ret_type( return CodeInvalid; } - eat( TokType::Operator ); + eat( currtok.Type ); // Parse Params CodeParam params = parse_params(); + if ( params.ast == nullptr && op == EOperator::Multiply ) + op = MemberOfPointer; + while ( left && currtok.is_specifier() ) { if ( specifiers.ast == nullptr ) @@ -2273,15 +2300,11 @@ CodeBody parse_class_struct_body( Parser::TokType which ) break; case TokType::Decl_Class: - { member = parse_complicated_definition( TokType::Decl_Class ); - } break; case TokType::Decl_Enum: - { member = parse_complicated_definition( TokType::Decl_Enum ); - } break; case TokType::Decl_Friend: @@ -2293,9 +2316,7 @@ CodeBody parse_class_struct_body( Parser::TokType which ) break; case TokType::Decl_Struct: - { member = parse_complicated_definition( TokType::Decl_Struct ); - } break; case TokType::Decl_Template: @@ -2307,10 +2328,7 @@ CodeBody parse_class_struct_body( Parser::TokType which ) break; case TokType::Decl_Union: - { member = parse_complicated_definition( TokType::Decl_Union ); - } - break; case TokType::Decl_Using: @@ -2489,7 +2507,8 @@ Code parse_class_struct( Parser::TokType which, bool inplace_def = false ) attributes = parse_attributes(); - name = parse_identifier(); + if ( check( TokType::Identifier ) ) + name = parse_identifier(); local_persist char interface_arr_mem[ kilobytes(4) ] {0}; @@ -2834,7 +2853,6 @@ CodeEnum parse_enum( bool inplace_def ) Token name = { nullptr, 0, TokType::Invalid }; Code array_expr = { nullptr }; CodeType type = { nullptr }; - Token body = { nullptr, 0, TokType::Invalid }; char entries_code[ kilobytes(128) ] { 0 }; s32 entries_length = 0; @@ -2851,16 +2869,12 @@ CodeEnum parse_enum( bool inplace_def ) attributes = parse_attributes(); - if ( currtok.Type != TokType::Identifier ) + if ( check( TokType::Identifier ) ) { - log_failure( "Expected identifier for enum name\n%s", Context.to_string() ); - Context.pop(); - return CodeInvalid; + name = currtok; + eat( TokType::Identifier ); } - name = currtok; - eat( TokType::Identifier ); - if ( currtok.Type == TokType::Assign_Classifer ) { eat( TokType::Assign_Classifer ); @@ -2874,45 +2888,91 @@ CodeEnum parse_enum( bool inplace_def ) } } + CodeBody body = { nullptr }; + if ( currtok.Type == TokType::BraceCurly_Open ) { + body = (CodeBody) make_code(); + eat( TokType::BraceCurly_Open ); - body = currtok; + Code member = CodeInvalid; while ( currtok.Type != TokType::BraceCurly_Close ) { - if ( currtok.Type == TokType::Comment ) + switch ( currtok.Type ) { - eat( TokType::Comment ); - continue; - } - else if ( currtok.Type == TokType::Preprocess_Macro ) - { - eat( TokType::Preprocess_Macro ); - continue; + case TokType::Comment: + member = def_comment( currtok ); + eat( TokType::Comment ); + break; + + case TokType::Preprocess_Define: + member = parse_define(); + break; + + case TokType::Preprocess_If: + case TokType::Preprocess_IfDef: + case TokType::Preprocess_IfNotDef: + case TokType::Preprocess_ElIf: + member = parse_preprocess_cond(); + break; + + case TokType::Preprocess_Else: + member = preprocess_else; + eat( TokType::Preprocess_Else ); + break; + + case TokType::Preprocess_EndIf: + member = preprocess_endif; + eat( TokType::Preprocess_EndIf ); + break; + + case TokType::Preprocess_Macro: + member = untyped_str( currtok ); + eat( TokType::Preprocess_Macro ); + break; + + case TokType::Preprocess_Pragma: + member = parse_pragma(); + break; + + default: + Token entry = currtok; + + eat( TokType::Identifier); + + if ( currtok.Type == TokType::Operator && currtok.Text[0] == '=' ) + { + eat( TokType::Operator ); + + while ( currtok.Type != TokType::Comma && currtok.Type != TokType::BraceCurly_Close ) + { + eat( currtok.Type ); + } + } + + if ( currtok.Type == TokType::Comma ) + { + eat( TokType::Comma ); + } + + entry.Length = ( (sptr)prevtok.Text + prevtok.Length ) - (sptr)entry.Text; + + member = untyped_str( entry ); + break; } - eat( TokType::Identifier); - - if ( currtok.Type == TokType::Operator && currtok.Text[0] == '=' ) + if ( member == Code::Invalid ) { - eat( TokType::Operator ); - - while ( currtok.Type != TokType::Comma && currtok.Type != TokType::BraceCurly_Close ) - { - eat( currtok.Type ); - } + log_failure( "Failed to parse member\n%s", Context.to_string() ); + Context.pop(); + return CodeInvalid; } - if ( currtok.Type == TokType::Comma ) - { - eat( TokType::Comma ); - } + body.append( member ); } - body.Length = ( (sptr)prevtok.Text + prevtok.Length ) - (sptr)body.Text; - eat( TokType::BraceCurly_Close ); } @@ -2924,14 +2984,10 @@ CodeEnum parse_enum( bool inplace_def ) CodeEnum result = (CodeEnum) make_code(); - if ( body.Length ) + if ( body.ast ) { - // mem_copy( entries_code, body.Text, body.Length ); - - Code untyped_body = untyped_str( body ); - result->Type = is_enum_class ? Enum_Class : Enum; - result->Body = untyped_body; + result->Body = body; } else { @@ -3884,10 +3940,67 @@ CodeUnion parse_union( bool inplace_def ) while ( ! check( TokType::BraceCurly_Close ) ) { - Code entry = parse_variable(); + Code member = { nullptr }; + switch ( currtok.Type ) + { + case TokType::Comment: + member = def_comment( currtok ); + eat( TokType::Comment ); + break; - if ( entry ) - body.append( entry ); + case TokType::Decl_Class: + member = parse_complicated_definition( TokType::Decl_Class ); + break; + + case TokType::Decl_Enum: + member = parse_complicated_definition( TokType::Decl_Enum ); + break; + + case TokType::Decl_Struct: + member = parse_complicated_definition( TokType::Decl_Struct ); + break; + + case TokType::Decl_Union: + member = parse_complicated_definition( TokType::Decl_Union ); + break; + + case TokType::Preprocess_Define: + member = parse_define(); + break; + + case TokType::Preprocess_If: + case TokType::Preprocess_IfDef: + case TokType::Preprocess_IfNotDef: + case TokType::Preprocess_ElIf: + member = parse_preprocess_cond(); + break; + + case TokType::Preprocess_Else: + member = preprocess_else; + eat( TokType::Preprocess_Else ); + break; + + case TokType::Preprocess_EndIf: + member = preprocess_endif; + eat( TokType::Preprocess_EndIf ); + break; + + case TokType::Preprocess_Macro: + member = untyped_str( currtok ); + eat( TokType::Preprocess_Macro ); + break; + + case TokType::Preprocess_Pragma: + member = parse_pragma(); + break; + + default: + member = parse_variable(); + break; + } + + if ( member ) + body.append( member ); } eat( TokType::BraceCurly_Close ); diff --git a/project/components/untyped.cpp b/project/components/untyped.cpp index 12f4971..46c6e14 100644 --- a/project/components/untyped.cpp +++ b/project/components/untyped.cpp @@ -99,17 +99,35 @@ sw token_fmt_va( char* buf, uw buf_size, s32 num_tokens, va_list va ) Code untyped_str( StrC content ) { + if ( content.Len == 0 ) + { + log_failure( "untyped_str: empty string" ); + return CodeInvalid; + } + Code result = make_code(); result->Name = get_cached_string( content ); result->Type = ECode::Untyped; result->Content = result->Name; + if ( result->Name == nullptr ) + { + log_failure( "untyped_str: could not cache string" ); + return CodeInvalid; + } + return result; } Code untyped_fmt( char const* fmt, ...) { + if ( fmt == nullptr ) + { + log_failure( "untyped_fmt: null format string" ); + return CodeInvalid; + } + local_persist thread_local char buf[GEN_PRINTF_MAXLEN] = { 0 }; @@ -124,11 +142,23 @@ Code untyped_fmt( char const* fmt, ...) result->Type = ECode::Untyped; result->Content = get_cached_string( { length, buf } ); + if ( result->Name == nullptr ) + { + log_failure( "untyped_fmt: could not cache string" ); + return CodeInvalid; + } + return result; } Code untyped_token_fmt( s32 num_tokens, ... ) { + if ( num_tokens == 0 ) + { + log_failure( "untyped_token_fmt: zero tokens" ); + return CodeInvalid; + } + local_persist thread_local char buf[GEN_PRINTF_MAXLEN] = { 0 }; @@ -143,5 +173,11 @@ Code untyped_token_fmt( s32 num_tokens, ... ) result->Type = ECode::Untyped; result->Content = result->Name; + if ( result->Name == nullptr ) + { + log_failure( "untyped_fmt: could not cache string" ); + return CodeInvalid; + } + return result; } diff --git a/test/gen/singleheader_copy.hpp b/test/gen/singleheader_copy.hpp new file mode 100644 index 0000000..e69de29 diff --git a/test/test.singleheader_ast.cpp b/test/test.singleheader_ast.cpp index cf26097..143f554 100644 --- a/test/test.singleheader_ast.cpp +++ b/test/test.singleheader_ast.cpp @@ -18,11 +18,18 @@ void check_singleheader_ast() CodeBody ast = parse_global_body( { file.size, (char const*)file.data } ); - log_fmt("generated AST!!!"); + log_fmt("generated AST!!!\n"); + + s32 idx = 0; + for ( Code entry : ast ) + { + log_fmt("Entry %d: %s", idx, entry.to_string() ); + idx++; + } Builder builder; builder.open( "singleheader_copy.hpp" ); - log_fmt("serializng ast"); + log_fmt("serializng ast\n"); builder.print( ast ); builder.write(); From 684569750d8f71659ca64de53437074e69baf0e3 Mon Sep 17 00:00:00 2001 From: Ed_ Date: Tue, 1 Aug 2023 16:07:47 -0400 Subject: [PATCH 6/9] first serialization of singlehearder without asserts. (Still failing after around 4k lines. --- project/components/ast.cpp | 26 ++++++++------ project/components/header_end.hpp | 2 +- project/components/interface.cpp | 10 +++--- project/components/interface.parsing.cpp | 46 ++++++++++++++++-------- test/gen/singleheader_copy.hpp | 0 test/test.singleheader_ast.cpp | 2 +- 6 files changed, 55 insertions(+), 31 deletions(-) delete mode 100644 test/gen/singleheader_copy.hpp diff --git a/project/components/ast.cpp b/project/components/ast.cpp index 50ae27e..05c0d35 100644 --- a/project/components/ast.cpp +++ b/project/components/ast.cpp @@ -448,7 +448,7 @@ String AST::to_string() { if ( Specs ) { - result.append_fmt( "operator %s()" ); + result.append_fmt( "operator %s()", EOperator::to_str( Op ) ); CodeSpecifiers specs = cast(); @@ -488,6 +488,12 @@ String AST::to_string() case Parameters: { + if ( ValueType == nullptr ) + { + result.append_fmt( "%s", Name ); + break; + } + if ( Name ) result.append_fmt( "%s %s", ValueType->to_string(), Name ); @@ -508,39 +514,39 @@ String AST::to_string() break; case Preprocess_Define: - result.append_fmt( "#define %s %s\n", Name, Content ); + result.append_fmt( "#define %s %s", Name, Content ); break; case Preprocess_If: - result.append_fmt( "#if %s\n", Content ); + result.append_fmt( "#if %s", Content ); break; case Preprocess_IfDef: - result.append_fmt( "#ifdef %s\n", Content ); + result.append_fmt( "#ifdef %s", Content ); break; case Preprocess_IfNotDef: - result.append_fmt( "#ifndef %s\n", Content ); + result.append_fmt( "#ifndef %s", Content ); break; case Preprocess_Include: - result.append_fmt( "#include \"%s\"\n", Content ); + result.append_fmt( "#include \"%s\"", Content ); break; case Preprocess_ElIf: - result.append_fmt( "#elif %s\n", Content ); + result.append_fmt( "#elif %s", Content ); break; case Preprocess_Else: - result.append_fmt( "#else\n" ); + result.append_fmt( "#else" ); break; case Preprocess_EndIf: - result.append_fmt( "#endif\n" ); + result.append_fmt( "#endif" ); break; case Preprocess_Pragma: - result.append_fmt( "#pragma %s\n", Content ); + result.append_fmt( "#pragma %s", Content ); break; case Specifiers: diff --git a/project/components/header_end.hpp b/project/components/header_end.hpp index d3b4db6..d4a8740 100644 --- a/project/components/header_end.hpp +++ b/project/components/header_end.hpp @@ -368,7 +368,7 @@ CodeBody def_body( CodeT type ) } Code - result = make_code(); + result = make_code(); result->Type = type; return (CodeBody)result; } diff --git a/project/components/interface.cpp b/project/components/interface.cpp index cf2b2ec..0fc16c8 100644 --- a/project/components/interface.cpp +++ b/project/components/interface.cpp @@ -74,6 +74,11 @@ void define_constants() Code::Invalid = make_code(); Code::Invalid.set_global(); + t_empty = (CodeType) make_code(); + t_empty->Type = ECode::Typename; + t_empty->Name = get_cached_string( txt_StrC("") ); + t_empty.set_global(); + access_private = make_code(); access_private->Type = ECode::Access_Private; access_private->Name = get_cached_string( txt_StrC("private:") ); @@ -161,11 +166,6 @@ void define_constants() #endif # undef def_constant_code_type - t_empty = (CodeType) make_code(); - t_empty->Type = ECode::Typename; - t_empty->Name = get_cached_string( txt_StrC("") ); - t_empty.set_global(); - # pragma push_macro( "global" ) # pragma push_macro( "internal" ) # pragma push_macro( "local_persist" ) diff --git a/project/components/interface.parsing.cpp b/project/components/interface.parsing.cpp index 82e79d5..f9f830f 100644 --- a/project/components/interface.parsing.cpp +++ b/project/components/interface.parsing.cpp @@ -2036,6 +2036,22 @@ CodeVar parse_variable_after_name( return result; } +internal inline +Code parse_simple_preprocess( Parser::TokType which ) +{ + using namespace Parser; + push_scope(); + + Token tok = currtok; + tok.Text = str_fmt_buf( "%.*s\n", tok.Length, tok.Text ); + tok.Length++; + Code result = untyped_str( tok ); + eat( which ); + + Context.pop(); + return result; +} + internal inline Code parse_variable_assignment() { @@ -2070,7 +2086,6 @@ Code parse_variable_assignment() return expr; } - internal inline Code parse_operator_function_or_variable( bool expects_function, CodeAttributes attributes, CodeSpecifiers specifiers ) { @@ -2351,8 +2366,7 @@ CodeBody parse_class_struct_body( Parser::TokType which ) break; case TokType::Preprocess_Macro: - member = untyped_str( currtok ); - eat( TokType::Preprocess_Macro ); + member = parse_simple_preprocess( TokType::Preprocess_Macro ); break; case TokType::Preprocess_Pragma: @@ -2370,8 +2384,7 @@ CodeBody parse_class_struct_body( Parser::TokType which ) break; case TokType::Preprocess_Unsupported: - member = untyped_str( currtok ); - eat( TokType::Preprocess_Unsupported ); + member = parse_simple_preprocess( TokType::Preprocess_Unsupported ); break; case TokType::StaticAssert: @@ -2687,8 +2700,7 @@ CodeBody parse_global_nspace( CodeT which ) break; case TokType::Preprocess_Macro: - member = untyped_str( currtok ); - eat( TokType::Preprocess_Macro ); + member = parse_simple_preprocess( TokType::Preprocess_Macro ); break; case TokType::Preprocess_Pragma: @@ -2706,8 +2718,7 @@ CodeBody parse_global_nspace( CodeT which ) break; case TokType::Preprocess_Unsupported: - member = untyped_str( currtok ); - eat( TokType::Preprocess_Unsupported ); + member = parse_simple_preprocess( TokType::Preprocess_Unsupported ); break; case TokType::StaticAssert: @@ -2892,7 +2903,8 @@ CodeEnum parse_enum( bool inplace_def ) if ( currtok.Type == TokType::BraceCurly_Open ) { - body = (CodeBody) make_code(); + body = (CodeBody) make_code(); + body->Type = ECode::Enum_Body; eat( TokType::BraceCurly_Open ); @@ -2929,14 +2941,17 @@ CodeEnum parse_enum( bool inplace_def ) break; case TokType::Preprocess_Macro: - member = untyped_str( currtok ); - eat( TokType::Preprocess_Macro ); + member = parse_simple_preprocess( TokType::Preprocess_Macro ); break; case TokType::Preprocess_Pragma: member = parse_pragma(); break; + case TokType::Preprocess_Unsupported: + member = parse_simple_preprocess( TokType::Preprocess_Unsupported ); + break; + default: Token entry = currtok; @@ -3986,14 +4001,17 @@ CodeUnion parse_union( bool inplace_def ) break; case TokType::Preprocess_Macro: - member = untyped_str( currtok ); - eat( TokType::Preprocess_Macro ); + member = parse_simple_preprocess( TokType::Preprocess_Macro ); break; case TokType::Preprocess_Pragma: member = parse_pragma(); break; + case TokType::Preprocess_Unsupported: + member = parse_simple_preprocess( TokType::Preprocess_Unsupported ); + break; + default: member = parse_variable(); break; diff --git a/test/gen/singleheader_copy.hpp b/test/gen/singleheader_copy.hpp deleted file mode 100644 index e69de29..0000000 diff --git a/test/test.singleheader_ast.cpp b/test/test.singleheader_ast.cpp index 143f554..aa1a4ad 100644 --- a/test/test.singleheader_ast.cpp +++ b/test/test.singleheader_ast.cpp @@ -28,7 +28,7 @@ void check_singleheader_ast() } Builder builder; - builder.open( "singleheader_copy.hpp" ); + builder.open( "singleheader_copy.gen.hpp" ); log_fmt("serializng ast\n"); builder.print( ast ); builder.write(); From 4c8a0f000524652d98e9a92a2cf60e1a3f2e5f00 Mon Sep 17 00:00:00 2001 From: Ed_ Date: Tue, 1 Aug 2023 20:56:00 -0400 Subject: [PATCH 7/9] Iterations on serialization improvements. --- project/components/ast.cpp | 50 +++++++++++++----------- project/components/interface.parsing.cpp | 20 ++++++---- project/file_processors/builder.cpp | 2 +- test/test.singleheader_ast.cpp | 14 +++---- 4 files changed, 48 insertions(+), 38 deletions(-) diff --git a/project/components/ast.cpp b/project/components/ast.cpp index 05c0d35..75dbeaa 100644 --- a/project/components/ast.cpp +++ b/project/components/ast.cpp @@ -38,10 +38,13 @@ String AST::to_string() case Comment: { + result.append("\n"); + static char line[MaxCommentLineLength]; s32 left = Content.length(); s32 index = 0; + s32 curr = 0; do { s32 length = 0; @@ -49,12 +52,17 @@ String AST::to_string() { length++; left--; + index++; } + index++; - str_copy( line, Content, length ); - line[length] = '\0'; + str_copy( line, Content + curr, length ); + result.append_fmt( "//%.*s", length, line ); + mem_set( line, 0, MaxCommentLineLength); - result.append_fmt( "// %s", line ); + length++; + left--; + curr = index; } while ( left--, left > 0 ); } @@ -115,7 +123,7 @@ String AST::to_string() result.append_fmt( "class %s\n{\n%s\n}", Name, Body->to_string() ); } - if ( Parent && Parent->Type != ECode::Typedef ) + if ( Parent == nullptr || ( Parent->Type != ECode::Typedef && Parent->Type != ECode::Variable ) ) result.append(";"); } break; @@ -130,7 +138,7 @@ String AST::to_string() else result.append_fmt( "class %s", Name ); - if ( Parent && Parent->Type != ECode::Typedef ) + if ( Parent == nullptr || ( Parent->Type != ECode::Typedef && Parent->Type != ECode::Variable ) ) result.append(";"); } break; @@ -164,7 +172,7 @@ String AST::to_string() , Body->to_string() ); - if ( Parent && Parent->Type != ECode::Typedef ) + if ( Parent == nullptr || ( Parent->Type != ECode::Typedef && Parent->Type != ECode::Variable ) ) result.append(";"); } break; @@ -179,7 +187,7 @@ String AST::to_string() result.append_fmt( "enum %s : %s", Name, UnderlyingType->to_string() ); - if ( Parent && Parent->Type != ECode::Typedef ) + if ( Parent == nullptr || ( Parent->Type != ECode::Typedef && Parent->Type != ECode::Variable ) ) result.append(";"); } break; @@ -221,7 +229,7 @@ String AST::to_string() ); } - if ( Parent && Parent->Type != ECode::Typedef ) + if ( Parent == nullptr || ( Parent->Type != ECode::Typedef && Parent->Type != ECode::Variable ) ) result.append(";"); } break; @@ -238,7 +246,7 @@ String AST::to_string() result.append_fmt( "%s : %s", Name, UnderlyingType->to_string() ); - if ( Parent && Parent->Type != ECode::Typedef ) + if ( Parent == nullptr || ( Parent->Type != ECode::Typedef && Parent->Type != ECode::Variable ) ) result.append(";"); } break; @@ -282,7 +290,7 @@ String AST::to_string() result.append_fmt( "%s ", Attributes->to_string() ); if ( Specs ) - result.append_fmt( "%s\n", Specs->to_string() ); + result.append_fmt( "%s", Specs->to_string() ); if ( ReturnType ) result.append_fmt( "%s %s(", ReturnType->to_string(), Name ); @@ -322,7 +330,7 @@ String AST::to_string() result.append_fmt( "%s ", Attributes->to_string() ); if ( Specs ) - result.append_fmt( "%s\n", Specs->to_string() ); + result.append_fmt( "%s", Specs->to_string() ); if ( ReturnType ) result.append_fmt( "%s %s(", ReturnType->to_string(), Name ); @@ -514,7 +522,7 @@ String AST::to_string() break; case Preprocess_Define: - result.append_fmt( "#define %s %s", Name, Content ); + result.append_fmt( "#define %s%s", Name, Content ); break; case Preprocess_If: @@ -542,7 +550,7 @@ String AST::to_string() break; case Preprocess_EndIf: - result.append_fmt( "#endif" ); + result.append_fmt( "#endif\n" ); break; case Preprocess_Pragma: @@ -619,7 +627,7 @@ String AST::to_string() result.append_fmt( "struct %s\n{\n%s\n}", Name, Body->to_string() ); } - if ( Parent && Parent->Type != ECode::Typedef ) + if ( Parent == nullptr || ( Parent->Type != ECode::Typedef && Parent->Type != ECode::Variable ) ) result.append(";"); } break; @@ -634,7 +642,7 @@ String AST::to_string() else result.append_fmt( "struct %s", Name ); - if ( Parent && Parent->Type != ECode::Typedef ) + if ( Parent == nullptr || ( Parent->Type != ECode::Typedef && Parent->Type != ECode::Variable ) ) result.append(";"); } break; @@ -659,11 +667,11 @@ String AST::to_string() if ( UnderlyingType->Type == Typename && UnderlyingType->ArrExpr ) { - result.append_fmt( "[%s];\n", UnderlyingType->ArrExpr->to_string() ); + result.append_fmt( "[%s];", UnderlyingType->ArrExpr->to_string() ); } else { - result.append( ";\n" ); + result.append( ";" ); } } break; @@ -713,9 +721,7 @@ String AST::to_string() ); } - bool add_semicolon = Parent && Parent->Type != ECode::Typedef && Parent->Type != ECode::Variable; - - if ( add_semicolon ) + if ( Parent == nullptr || ( Parent->Type != ECode::Typedef && Parent->Type != ECode::Variable ) ) result.append(";"); } break; @@ -765,7 +771,7 @@ String AST::to_string() result.append_fmt( "[%s]", ValueType->ArrExpr->to_string() ); if ( BitfieldSize ) - result.append_fmt( " : %lu", BitfieldSize ); + result.append_fmt( " : %s", BitfieldSize->to_string() ); if ( Value ) result.append_fmt( " = %s", Value->to_string() ); @@ -776,7 +782,7 @@ String AST::to_string() } if ( BitfieldSize ) - result.append_fmt( "%s : %lu", ValueType->to_string(), BitfieldSize ); + result.append_fmt( "%s : %s", ValueType->to_string(), BitfieldSize->to_string() ); else if ( UnderlyingType->ArrExpr ) result.append_fmt( "%s %s[%s];", UnderlyingType->to_string(), Name, UnderlyingType->ArrExpr->to_string() ); diff --git a/project/components/interface.parsing.cpp b/project/components/interface.parsing.cpp index f9f830f..42d851b 100644 --- a/project/components/interface.parsing.cpp +++ b/project/components/interface.parsing.cpp @@ -257,6 +257,8 @@ namespace Parser { case '#': { + char const* hash = scanner; + move_forward(); SkipWhitespace(); @@ -292,13 +294,13 @@ namespace Parser if ( current == '\r' ) { move_forward(); - token.Length++; + // token.Length++; } if ( current == '\n' ) { move_forward(); - token.Length++; + // token.Length++; continue; } else @@ -316,7 +318,7 @@ namespace Parser if ( current == '\n' ) { move_forward(); - token.Length++; + // token.Length++; break; } @@ -324,6 +326,8 @@ namespace Parser token.Length++; } + token.Text = hash; + token.Length = (sptr)token.Text + token.Length - (sptr)hash; Tokens.append( token ); continue; // Skip found token, its all handled here. } @@ -434,7 +438,7 @@ namespace Parser if ( current == '\n' ) { move_forward(); - content.Length++; + // content.Length++; break; } @@ -1183,8 +1187,8 @@ Code parse_static_assert() content.Length = ( (sptr)prevtok.Text + prevtok.Length ) - (sptr)content.Text; - content.Text = str_fmt_buf( "%.*s\n", content.Length, content.Text ); - content.Length++; + // content.Text = str_fmt_buf( "%.*s\n", content.Length, content.Text ); + // content.Length++; assert->Content = get_cached_string( content ); assert->Name = assert->Content; @@ -1979,7 +1983,7 @@ CodeVar parse_variable_after_name( eat( currtok.Type ); } - expr_tok.Length = ( (sptr)currtok.Text + currtok.Length ) - (sptr)expr_tok.Text; + expr_tok.Length = ( (sptr)currtok.Text + currtok.Length ) - (sptr)expr_tok.Text - 1; expr = untyped_str( expr_tok ); } @@ -2043,7 +2047,7 @@ Code parse_simple_preprocess( Parser::TokType which ) push_scope(); Token tok = currtok; - tok.Text = str_fmt_buf( "%.*s\n", tok.Length, tok.Text ); + tok.Text = str_fmt_buf( "%.*s", tok.Length, tok.Text ); tok.Length++; Code result = untyped_str( tok ); eat( which ); diff --git a/project/file_processors/builder.cpp b/project/file_processors/builder.cpp index b8d5bc5..64662ea 100644 --- a/project/file_processors/builder.cpp +++ b/project/file_processors/builder.cpp @@ -1,6 +1,6 @@ void Builder::print( Code code ) { - Buffer.append_fmt( "%s\n", code->to_string() ); + Buffer.append_fmt( "%s", code->to_string() ); } void Builder::print_fmt( char const* fmt, ... ) diff --git a/test/test.singleheader_ast.cpp b/test/test.singleheader_ast.cpp index aa1a4ad..b5d495f 100644 --- a/test/test.singleheader_ast.cpp +++ b/test/test.singleheader_ast.cpp @@ -20,16 +20,16 @@ void check_singleheader_ast() log_fmt("generated AST!!!\n"); - s32 idx = 0; - for ( Code entry : ast ) - { - log_fmt("Entry %d: %s", idx, entry.to_string() ); - idx++; - } + // s32 idx = 0; + // for ( Code entry : ast ) + // { + // log_fmt("Entry %d: %s", idx, entry.to_string() ); + // idx++; + // } Builder builder; builder.open( "singleheader_copy.gen.hpp" ); - log_fmt("serializng ast\n"); + log_fmt("\n\nserializng ast\n"); builder.print( ast ); builder.write(); From b96b0821c15a0224e51de1ba930cbde6e1ea19ea Mon Sep 17 00:00:00 2001 From: Ed_ Date: Wed, 2 Aug 2023 12:39:35 -0400 Subject: [PATCH 8/9] Fixes towards parsing (getting to line 12575 now of the singleheader. --- project/components/ast.cpp | 10 +- project/components/especifier.hpp | 4 + project/components/etoktype.cpp | 1 + project/components/header_end.hpp | 1 + project/components/interface.cpp | 4 + project/components/interface.hpp | 2 +- project/components/interface.parsing.cpp | 265 ++++++++++++++++++++--- project/components/interface.upfront.cpp | 4 +- project/components/static_data.cpp | 1 + project/dependencies/file_handling.cpp | 2 +- project/enums/ESpecifier.csv | 1 + project/enums/ETokType.csv | 1 + scripts/.clang-format | 8 +- test/test.singleheader_ast.cpp | 12 +- 14 files changed, 265 insertions(+), 51 deletions(-) diff --git a/project/components/ast.cpp b/project/components/ast.cpp index 75dbeaa..33babe4 100644 --- a/project/components/ast.cpp +++ b/project/components/ast.cpp @@ -456,7 +456,10 @@ String AST::to_string() { if ( Specs ) { - result.append_fmt( "operator %s()", EOperator::to_str( Op ) ); + if ( Name && Name.length() ) + result.append_fmt( "%.*soperator %s()", Name.length(), Name, EOperator::to_str( Op )); + else + result.append_fmt( "operator %s()", EOperator::to_str( Op ) ); CodeSpecifiers specs = cast(); @@ -470,7 +473,10 @@ String AST::to_string() break; } - result.append_fmt("operator %s()\n{\n%s\n}", ValueType->to_string(), Body->to_string() ); + if ( Name && Name.length() ) + result.append_fmt("%.*soperator %s()\n{\n%s\n}", Name.length(), Name, ValueType->to_string(), Body->to_string() ); + else + result.append_fmt("operator %s()\n{\n%s\n}", ValueType->to_string(), Body->to_string() ); } break; diff --git a/project/components/especifier.hpp b/project/components/especifier.hpp index 9affa84..e2939ac 100644 --- a/project/components/especifier.hpp +++ b/project/components/especifier.hpp @@ -20,6 +20,7 @@ namespace ESpecifier Entry( Internal_Linkage, internal ) \ Entry( Local_Persist, local_persist ) \ Entry( Mutable, mutable ) \ + Entry( NeverInline, neverinline ) \ Entry( Ptr, * ) \ Entry( Ref, & ) \ Entry( Register, register ) \ @@ -56,9 +57,11 @@ namespace ESpecifier # pragma push_macro( "global" ) # pragma push_macro( "internal" ) # pragma push_macro( "local_persist" ) + # pragma push_macro( "neverinline" ) # undef global # undef internal # undef local_persist + # undef neverinline # define Entry( Spec_, Code_ ) { sizeof(stringize(Code_)), stringize(Code_) }, Define_Specifiers @@ -67,6 +70,7 @@ namespace ESpecifier # pragma pop_macro( "global" ) # pragma pop_macro( "internal" ) # pragma pop_macro( "local_persist" ) + # pragma pop_macro( "neverinline" ) }; return lookup[ specifier ]; diff --git a/project/components/etoktype.cpp b/project/components/etoktype.cpp index fd3952b..78a5932 100644 --- a/project/components/etoktype.cpp +++ b/project/components/etoktype.cpp @@ -80,6 +80,7 @@ namespace Parser Entry( Spec_Internal_Linkage, "internal" ) \ Entry( Spec_LocalPersist, "local_persist" ) \ Entry( Spec_Mutable, "mutable" ) \ + Entry( Spec_NeverInline, "neverinline" ) \ Entry( Spec_Override, "override" ) \ Entry( Spec_Static, "static" ) \ Entry( Spec_ThreadLocal, "thread_local" ) \ diff --git a/project/components/header_end.hpp b/project/components/header_end.hpp index d4a8740..47f923e 100644 --- a/project/components/header_end.hpp +++ b/project/components/header_end.hpp @@ -468,6 +468,7 @@ extern CodeSpecifiers spec_inline; extern CodeSpecifiers spec_internal_linkage; extern CodeSpecifiers spec_local_persist; extern CodeSpecifiers spec_mutable; +extern CodeSpecifiers spec_neverinline; extern CodeSpecifiers spec_override; extern CodeSpecifiers spec_ptr; extern CodeSpecifiers spec_ref; diff --git a/project/components/interface.cpp b/project/components/interface.cpp index 0fc16c8..ca4fc3a 100644 --- a/project/components/interface.cpp +++ b/project/components/interface.cpp @@ -169,9 +169,11 @@ void define_constants() # pragma push_macro( "global" ) # pragma push_macro( "internal" ) # pragma push_macro( "local_persist" ) +# pragma push_macro( "neverinline" ) # undef global # undef internal # undef local_persist +# undef neverinline # define def_constant_spec( Type_, ... ) \ spec_##Type_ = def_specifiers( num_args(__VA_ARGS__), __VA_ARGS__); \ @@ -188,6 +190,7 @@ void define_constants() def_constant_spec( internal_linkage, ESpecifier::Internal_Linkage ); def_constant_spec( local_persist, ESpecifier::Local_Persist ); def_constant_spec( mutable, ESpecifier::Mutable ); + def_constant_spec( neverinline, ESpecifier::NeverInline ); def_constant_spec( override, ESpecifier::Override ); def_constant_spec( ptr, ESpecifier::Ptr ); def_constant_spec( ref, ESpecifier::Ref ); @@ -204,6 +207,7 @@ void define_constants() # pragma pop_macro( "global" ) # pragma pop_macro( "internal" ) # pragma pop_macro( "local_persist" ) +# pragma pop_macro( "neverinline" ) # undef def_constant_spec } diff --git a/project/components/interface.hpp b/project/components/interface.hpp index 0256b02..00dc4c5 100644 --- a/project/components/interface.hpp +++ b/project/components/interface.hpp @@ -65,7 +65,7 @@ CodeInclude def_include ( StrC content ); CodeModule def_module ( StrC name, ModuleFlag mflags = ModuleFlag::None ); CodeNamespace def_namespace( StrC name, Code body, ModuleFlag mflags = ModuleFlag::None ); -CodeOperator def_operator( OperatorT op +CodeOperator def_operator( OperatorT op, StrC nspace , CodeParam params = NoCode, CodeType ret_type = NoCode, Code body = NoCode , CodeSpecifiers specifiers = NoCode, CodeAttributes attributes = NoCode , ModuleFlag mflags = ModuleFlag::None ); diff --git a/project/components/interface.parsing.cpp b/project/components/interface.parsing.cpp index 42d851b..7dde9a3 100644 --- a/project/components/interface.parsing.cpp +++ b/project/components/interface.parsing.cpp @@ -6,9 +6,9 @@ namespace Parser char const* Text; sptr Length; TokType Type; - bool IsAssign; s32 Line; s32 Column; + bool IsAssign; // TokFlags Flags; operator bool() @@ -133,7 +133,10 @@ namespace Parser sptr length_from_err = dist; String line_from_err = String::make( GlobalAllocator, { length_from_err, last_valid.Text } ); - result.append_fmt("\t(%d, %d):%*c\n", last_valid.Line, last_valid.Column, length_from_err, '^' ); + if ( length_from_err < 100 ) + result.append_fmt("\t(%d, %d):%*c\n", last_valid.Line, last_valid.Column, length_from_err, '^' ); + else + result.append_fmt("\t(%d, %d)\n", last_valid.Line, last_valid.Column ); StackNode* curr_scope = Scope; s32 level = 0; @@ -141,7 +144,7 @@ namespace Parser { if ( curr_scope->Name ) { - result.append_fmt("\t%d: %s, AST Name: %s\n", level, curr_scope->ProcName.Ptr, (StrC)curr_scope->Name ); + result.append_fmt("\t%d: %s, AST Name: %.*s\n", level, curr_scope->ProcName.Ptr, curr_scope->Name.Length, (StrC)curr_scope->Name ); } else { @@ -245,7 +248,12 @@ namespace Parser while (left ) { - Token token = { nullptr, 0, TokType::Invalid, false, line, column }; + Token token = { nullptr, 0, TokType::Invalid, line, column, false }; + + if ( line == 4921 ) + { + log_fmt("here"); + } bool is_define = false; @@ -344,7 +352,7 @@ namespace Parser if ( token.Type == TokType::Preprocess_Define ) { - Token name = { scanner, 0, TokType::Identifier, false, line, column }; + Token name = { scanner, 0, TokType::Identifier, line, column, false }; name.Text = scanner; name.Length = 1; @@ -356,13 +364,19 @@ namespace Parser name.Length++; } + if ( left && current == '(' ) + { + move_forward(); + name.Length++; + } + Tokens.append( name ); u64 key = crc32( name.Text, name.Length ); defines.set( key, name ); } - Token content = { scanner, 0, TokType::Preprocess_Content, false, line, column }; + Token content = { scanner, 0, TokType::Preprocess_Content, line, column, false }; if ( token.Type == TokType::Preprocess_Include ) { @@ -920,7 +934,12 @@ namespace Parser continue; } - u64 key = crc32( token.Text, token.Length ); + u64 key = 0; + if ( current == '(') + key = crc32( token.Text, token.Length + 1 ); + else + key = crc32( token.Text, token.Length ); + StrC* define = defines.get( key ); if ( define ) { @@ -1059,6 +1078,7 @@ CodeDefine parse_define() return CodeInvalid; } + Context.Scope->Name = currtok; define->Name = get_cached_string( currtok ); eat( TokType::Identifier ); @@ -1101,6 +1121,7 @@ CodePreprocessCond parse_preprocess_cond() return CodeInvalid; } + Context.Scope->Name = currtok; cond->Content = get_cached_string( currtok ); eat( TokType::Preprocess_Content ); @@ -1125,6 +1146,8 @@ CodeInclude parse_include() Context.pop(); return CodeInvalid; } + + Context.Scope->Name = currtok; include->Content = get_cached_string( currtok ); eat( TokType::String ); @@ -1150,6 +1173,7 @@ CodePragma parse_pragma() return CodeInvalid; } + Context.Scope->Name = currtok; pragma->Content = get_cached_string( currtok ); eat( TokType::Preprocess_Content ); @@ -1169,6 +1193,8 @@ Code parse_static_assert() Token content = currtok; + Context.Scope->Name = content; + eat( TokType::StaticAssert ); eat( TokType::Capture_Start ); @@ -1203,6 +1229,15 @@ Code parse_array_decl() using namespace Parser; push_scope(); + if ( check( TokType::Operator ) && currtok.Text[0] == '[' && currtok.Text[1] == ']' ) + { + Code array_expr = untyped_str( currtok ); + eat( TokType::Operator ); + + Context.pop(); + return array_expr; + } + if ( check( TokType::BraceSquare_Open ) ) { eat( TokType::BraceSquare_Open ); @@ -1333,6 +1368,7 @@ Parser::Token parse_identifier() push_scope(); Token name = currtok; + Context.Scope->Name = name; eat( TokType::Identifier ); @@ -1460,7 +1496,12 @@ CodeParam parse_params( bool use_template_capture = false ) return CodeInvalid; } - Token name = { nullptr, 0, TokType::Invalid, false }; + Token name = NullToken; + + if ( Context.Tokens.Idx == 18546 ) + { + log_fmt("here"); + } if ( check( TokType::Identifier ) ) { @@ -1473,20 +1514,20 @@ CodeParam parse_params( bool use_template_capture = false ) Token value_tok = currtok; - if ( currtok.Type == TokType::Statement_End ) + if ( currtok.Type == TokType::Comma ) { log_failure( "Expected value after assignment operator\n%s.", Context.to_string() ); Context.pop(); return CodeInvalid; } - while ( left && currtok.Type != TokType::Statement_End ) + while ( left && currtok.Type != TokType::Comma ) { value_tok.Length = ( (sptr)currtok.Text + currtok.Length ) - (sptr)value_tok.Text; eat( currtok.Type ); } - value = parse_type(); + value = untyped_str( value_tok ); } } @@ -1541,20 +1582,20 @@ CodeParam parse_params( bool use_template_capture = false ) Token value_tok = currtok; - if ( currtok.Type == TokType::Statement_End ) + if ( currtok.Type == TokType::Comma ) { log_failure( "Expected value after assignment operator\n%s", Context.to_string() ); Context.pop(); return CodeInvalid; } - while ( left && currtok.Type != TokType::Statement_End ) + while ( left && currtok.Type != TokType::Comma ) { value_tok.Length = ( (sptr)currtok.Text + currtok.Length ) - (sptr)value_tok.Text; eat( currtok.Type ); } - value = parse_type(); + value = untyped_str( value_tok ); } } @@ -1599,7 +1640,7 @@ CodeFn parse_function_after_name( , CodeAttributes attributes , CodeSpecifiers specifiers , CodeType ret_type - , StrC name + , Parser::Token name ) { using namespace Parser; @@ -1690,7 +1731,21 @@ CodeOperator parse_operator_after_ret_type( using namespace EOperator; push_scope(); - // Parse Operator + Token nspace = NullToken; + if ( check( TokType::Identifier ) ) + { + nspace = currtok; + while ( left && currtok.Type == TokType::Identifier ) + { + eat( TokType::Identifier ); + + if ( currtok.Type == TokType::Access_StaticSymbol ) + eat( TokType::Access_StaticSymbol ); + } + + nspace.Length = ( (sptr)prevtok.Text + prevtok.Length ) - (sptr)nspace.Text; + } + eat( TokType::Decl_Operator ); if ( ! left && currtok.Type != TokType::Operator @@ -1703,6 +1758,8 @@ CodeOperator parse_operator_after_ret_type( return CodeInvalid; } + Context.Scope->Name = currtok; + OperatorT op = Invalid; switch ( currtok.Text[0] ) { @@ -1942,7 +1999,7 @@ CodeOperator parse_operator_after_ret_type( } // OpValidateResult check_result = operator__validate( op, params, ret_type, specifiers ); - CodeOperator result = def_operator( op, params, ret_type, body, specifiers, attributes, mflags ); + CodeOperator result = def_operator( op, nspace, params, ret_type, body, specifiers, attributes, mflags ); Context.pop(); return result; } @@ -2047,11 +2104,40 @@ Code parse_simple_preprocess( Parser::TokType which ) push_scope(); Token tok = currtok; - tok.Text = str_fmt_buf( "%.*s", tok.Length, tok.Text ); - tok.Length++; - Code result = untyped_str( tok ); eat( which ); + if ( currtok.Type == TokType::BraceCurly_Open ) + { + // Eat the block scope right after the macro. Were assuming the macro defines a function definition's signature + eat( TokType::BraceCurly_Open ); + + s32 level = 0; + while ( left && ( currtok.Type != TokType::BraceCurly_Close || level > 0 ) ) + { + if ( currtok.Type == TokType::BraceCurly_Open ) + level++; + + else if ( currtok.Type == TokType::BraceCurly_Close && level > 0 ) + level--; + + eat( currtok.Type ); + } + eat( TokType::BraceCurly_Close ); + + tok.Length = ( (sptr)prevtok.Text + prevtok.Length ) - (sptr)tok.Text; + } + + Code result = untyped_str( tok ); + Context.Scope->Name = tok; + + if ( str_compare( Context.Scope->Prev->ProcName.Ptr, "parse_typedef", Context.Scope->Prev->ProcName.Len ) != 0 ) + { + if ( check( TokType::Statement_End )) + { + eat( TokType::Statement_End ); + } + } + Context.pop(); return result; } @@ -2098,6 +2184,14 @@ Code parse_operator_function_or_variable( bool expects_function, CodeAttributes Code result = CodeInvalid; + if ( currtok.Type == TokType::Preprocess_Macro ) + { + // Were dealing with a macro after attributes/specifiers. + result = parse_simple_preprocess( TokType::Preprocess_Macro ); + Context.pop(); + return result; + } + CodeType type = parse_type(); if ( type == CodeInvalid ) @@ -2106,15 +2200,38 @@ Code parse_operator_function_or_variable( bool expects_function, CodeAttributes return CodeInvalid; } - if ( check( TokType::Decl_Operator) ) + bool found_operator = false; + s32 idx = Context.Tokens.Idx; + + for ( ; idx < Context.Tokens.Arr.num(); idx++ ) + { + Token tok = Context.Tokens[ idx ]; + + if ( tok.Type == TokType::Identifier ) + { + idx++; + tok = Context.Tokens[ idx ]; + if ( tok.Type == TokType::Access_StaticSymbol ) + continue; + + break; + } + + if ( tok.Type == TokType::Decl_Operator ) + found_operator = true; + + break; + } + + if ( found_operator ) { // Dealing with an operator overload result = parse_operator_after_ret_type( ModuleFlag::None, attributes, specifiers, type ); } else { - StrC name = currtok; - eat( TokType::Identifier ); + Token name = parse_identifier(); + Context.Scope->Name = name; if ( check( TokType::Capture_Start) ) { @@ -2146,6 +2263,8 @@ Code parse_complicated_definition( Parser::TokType which ) using namespace Parser; push_scope(); + bool is_inplace = false; + labeled_scope_start PARSE_FORWARD_OR_DEFINITION: Code result = CodeInvalid; @@ -2154,22 +2273,22 @@ Code parse_complicated_definition( Parser::TokType which ) switch ( which ) { case TokType::Decl_Class: - result = parse_class(); + result = parse_class( is_inplace ); Context.pop(); return result; case TokType::Decl_Enum: - result = parse_enum(); + result = parse_enum( is_inplace ); Context.pop(); return result; case TokType::Decl_Struct: - result = parse_struct(); + result = parse_struct( is_inplace ); Context.pop(); return result; case TokType::Decl_Union: - result = parse_union(); + result = parse_union( is_inplace ); Context.pop(); return result; @@ -2220,6 +2339,7 @@ Code parse_complicated_definition( Parser::TokType which ) // Its an inplace definition // { ... } ; ok_to_parse = true; + is_inplace = true; } else if ( tok.Type == TokType::Identifier && tokens[ idx - 3 ].Type == TokType::Decl_Struct ) { @@ -2525,7 +2645,10 @@ Code parse_class_struct( Parser::TokType which, bool inplace_def = false ) attributes = parse_attributes(); if ( check( TokType::Identifier ) ) + { name = parse_identifier(); + Context.Scope->Name = name; + } local_persist char interface_arr_mem[ kilobytes(4) ] {0}; @@ -2642,6 +2765,8 @@ CodeBody parse_global_nspace( CodeT which ) bool expects_function = false; + Context.Scope->Start = currtok; + switch ( currtok.Type ) { case TokType::Comment: @@ -2757,6 +2882,7 @@ CodeBody parse_global_nspace( CodeT which ) case TokType::Spec_Global: case TokType::Spec_Inline: case TokType::Spec_Internal_Linkage: + case TokType::Spec_NeverInline: case TokType::Spec_Static: { SpecifierT specs_found[16] { ESpecifier::NumSpecifiers }; @@ -2766,13 +2892,18 @@ CodeBody parse_global_nspace( CodeT which ) { SpecifierT spec = ESpecifier::to_type( currtok ); + bool ignore_spec = false; + switch ( spec ) { case ESpecifier::Constexpr: case ESpecifier::Constinit: + case ESpecifier::Global: case ESpecifier::External_Linkage: + case ESpecifier::Internal_Linkage: case ESpecifier::Inline: case ESpecifier::Mutable: + case ESpecifier::NeverInline: case ESpecifier::Static: case ESpecifier::Volatile: break; @@ -2781,11 +2912,20 @@ CodeBody parse_global_nspace( CodeT which ) expects_function = true; break; + case ESpecifier::Const: + ignore_spec = true; + break; + default: - log_failure( "Invalid specifier %s for variable\n%s", ESpecifier::to_str(spec), Context.to_string() ); + StrC spec_str = ESpecifier::to_str(spec); + + log_failure( "Invalid specifier %.*s for variable\n%s", spec_str.Len, spec_str, Context.to_string() ); return CodeInvalid; } + if (ignore_spec) + break; + specs_found[NumSpecifiers] = spec; NumSpecifiers++; eat( currtok.Type ); @@ -2807,6 +2947,32 @@ CodeBody parse_global_nspace( CodeT which ) case TokType::Type_double: case TokType::Type_int: { + bool found_operator_cast = false; + s32 idx = Context.Tokens.Idx; + + for ( ; idx < Context.Tokens.Arr.num(); idx++ ) + { + Token tok = Context.Tokens[ idx ]; + + if ( tok.Type == TokType::Identifier ) + { + idx++; + tok = Context.Tokens[ idx ]; + if ( tok.Type == TokType::Access_StaticSymbol ) + continue; + + break; + } + + if ( tok.Type == TokType::Decl_Operator ) + found_operator_cast = true; + + break; + } + + if ( found_operator_cast ) + member = parse_operator_cast(); + member = parse_operator_function_or_variable( expects_function, attributes, specifiers ); } } @@ -2887,6 +3053,7 @@ CodeEnum parse_enum( bool inplace_def ) if ( check( TokType::Identifier ) ) { name = currtok; + Context.Scope->Name = currtok; eat( TokType::Identifier ); } @@ -3144,6 +3311,7 @@ CodeFriend parse_friend() { // Name Token name = parse_identifier(); + Context.Scope->Name = name; // Parameter list CodeParam params = parse_params(); @@ -3248,6 +3416,7 @@ CodeFn parse_functon() } Token name = parse_identifier(); + Context.Scope->Name = name; if ( ! name ) { Context.pop(); @@ -3298,6 +3467,7 @@ CodeNamespace parse_namespace() eat( TokType::Decl_Namespace ); Token name = parse_identifier(); + Context.Scope->Name = name; CodeBody body = parse_global_nspace( ECode::Namespace_Body ); if ( body == Code::Invalid ) @@ -3409,10 +3579,27 @@ CodeOpCast parse_operator_cast() using namespace Parser; push_scope(); + Token name = NullToken; + if ( check( TokType::Identifier ) ) + { + name = currtok; + while ( left && currtok.Type == TokType::Identifier ) + { + eat( TokType::Identifier ); + + if ( currtok.Type == TokType::Access_StaticSymbol ) + eat( TokType::Access_StaticSymbol ); + } + + name.Length = ( (sptr)prevtok.Text + prevtok.Length ) - (sptr)name.Text; + } + eat( TokType::Decl_Operator ); Code type = parse_type(); + Context.Scope->Name = { type->Name.Data, type->Name.length() }; + eat( TokType::Capture_Start ); eat( TokType::Capture_End ); @@ -3443,16 +3630,22 @@ CodeOpCast parse_operator_cast() eat( currtok.Type ); } + eat( TokType::BraceCurly_Close ); body_str.Length = ( (sptr)prevtok.Text + prevtok.Length ) - (sptr)body_str.Text; body = untyped_str( body_str ); - - eat( TokType::BraceCurly_Close ); + } + else + { + eat( TokType::Statement_End ); } CodeOpCast result = (CodeOpCast) make_code(); + if ( name ) + result->Name = get_cached_string( name ); + if (body) { result->Type = ECode::Operator_Cast; @@ -3691,6 +3884,7 @@ CodeType parse_type() name.Length = ( (sptr)currtok.Text + currtok.Length ) - (sptr)name.Text; eat( TokType::Identifier ); + Context.Scope->Name = name; } else if ( currtok.Type >= TokType::Type_Unsigned && currtok.Type <= TokType::Type_MS_W64 ) { @@ -3703,10 +3897,12 @@ CodeType parse_type() } name.Length = ( (sptr)prevtok.Text + prevtok.Length ) - (sptr)name.Text; + Context.Scope->Name = name; } else { name = parse_identifier(); + Context.Scope->Name = name; if ( ! name ) { log_failure( "Error, failed to type signature\n%s", Context.to_string() ); @@ -3851,13 +4047,11 @@ CodeTypedef parse_typedef() constexpr bool from_typedef = true; - // TODO : Confirm if this should stay... (Macro abuse, kept because used by zpl library code...) - // TODO : I could refactor the library code to not use this, and just ban it from usage - // TODO : (as I already do for all macros that are not at entries in a body ast...) if ( check( TokType::Preprocess_Macro )) { type = t_empty; name = currtok; + Context.Scope->Name = name; eat( TokType::Preprocess_Macro ); } else @@ -3947,6 +4141,7 @@ CodeUnion parse_union( bool inplace_def ) if ( check( TokType::Identifier ) ) { name = currtok; + Context.Scope->Name = currtok; eat( TokType::Identifier ); } @@ -4094,6 +4289,7 @@ CodeUsing parse_using() } name = currtok; + Context.Scope->Name = name; eat( TokType::Identifier ); if ( currtok.IsAssign ) @@ -4216,8 +4412,7 @@ CodeVar parse_variable() if ( type == Code::Invalid ) return CodeInvalid; - Context.Scope->Name = currtok; - eat( TokType::Identifier ); + Context.Scope->Name = parse_identifier(); CodeVar result = parse_variable_after_name( mflags, attributes, specifiers, type, Context.Scope->Name ); diff --git a/project/components/interface.upfront.cpp b/project/components/interface.upfront.cpp index 145ef8f..35c9f92 100644 --- a/project/components/interface.upfront.cpp +++ b/project/components/interface.upfront.cpp @@ -783,7 +783,7 @@ CodeNamespace def_namespace( StrC name, Code body, ModuleFlag mflags ) return result; } -CodeOperator def_operator( OperatorT op +CodeOperator def_operator( OperatorT op, StrC nspace , CodeParam params_code, CodeType ret_type, Code body , CodeSpecifiers specifiers, CodeAttributes attributes , ModuleFlag mflags ) @@ -809,7 +809,7 @@ CodeOperator def_operator( OperatorT op return CodeInvalid; } - char const* name = str_fmt_buf( "operator %s", to_str(op) ); + char const* name = str_fmt_buf( "%.*soperator %s", nspace.Len, nspace.Ptr, to_str(op) ); CodeOperator result = (CodeOperator) make_code(); diff --git a/project/components/static_data.cpp b/project/components/static_data.cpp index 77f8bd9..f914d94 100644 --- a/project/components/static_data.cpp +++ b/project/components/static_data.cpp @@ -50,6 +50,7 @@ global CodeSpecifiers spec_inline; global CodeSpecifiers spec_internal_linkage; global CodeSpecifiers spec_local_persist; global CodeSpecifiers spec_mutable; +global CodeSpecifiers spec_neverinline; global CodeSpecifiers spec_override; global CodeSpecifiers spec_ptr; global CodeSpecifiers spec_ref; diff --git a/project/dependencies/file_handling.cpp b/project/dependencies/file_handling.cpp index 56d25b8..3c9a931 100644 --- a/project/dependencies/file_handling.cpp +++ b/project/dependencies/file_handling.cpp @@ -224,7 +224,7 @@ internal GEN_FILE_CLOSE_PROC( _posix_file_close ) FileOperations const default_file_operations = { _posix_file_read, _posix_file_write, _posix_file_seek, _posix_file_close }; -GEN_NEVER_INLINE GEN_FILE_OPEN_PROC( _posix_file_open ) +neverinline GEN_FILE_OPEN_PROC( _posix_file_open ) { s32 os_mode; switch ( mode & GEN_FILE_MODES ) diff --git a/project/enums/ESpecifier.csv b/project/enums/ESpecifier.csv index 1dc7a4e..736628e 100644 --- a/project/enums/ESpecifier.csv +++ b/project/enums/ESpecifier.csv @@ -9,6 +9,7 @@ Inline, inline Internal_Linkage, internal Local_Persist, local_persist Mutable, mutable +NeverInline, neverinline Ptr, * Ref, & Register, register diff --git a/project/enums/ETokType.csv b/project/enums/ETokType.csv index e085541..4f6c717 100644 --- a/project/enums/ETokType.csv +++ b/project/enums/ETokType.csv @@ -61,6 +61,7 @@ Spec_Inline, "inline" Spec_Internal_Linkage, "internal" Spec_LocalPersist, "local_persist" Spec_Mutable, "mutable" +Spec_NeverInline, "neverinline" Spec_Override, "override" Spec_Static, "static" Spec_ThreadLocal, "thread_local" diff --git a/scripts/.clang-format b/scripts/.clang-format index 9f67e41..3b57be7 100644 --- a/scripts/.clang-format +++ b/scripts/.clang-format @@ -36,8 +36,8 @@ AlwaysBreakAfterReturnType: None AlwaysBreakBeforeMultilineStrings: true AlwaysBreakTemplateDeclarations: Yes -BinPackArguments: true -BinPackParameters: true +BinPackArguments: false +BinPackParameters: false BitFieldColonSpacing: Both @@ -94,13 +94,13 @@ IncludeBlocks: Preserve IndentCaseBlocks: false -IndentCaseLabels: false +IndentCaseLabels: true IndentExternBlock: AfterExternBlock IndentGotoLabels: true IndentPPDirectives: AfterHash IndentRequires: true IndentWidth: 4 -IndentWrappedFunctionNames: false +IndentWrappedFunctionNames: true # InsertNewlineAtEOF: true # InsertTrailingCommas: Wrapped diff --git a/test/test.singleheader_ast.cpp b/test/test.singleheader_ast.cpp index b5d495f..4322b74 100644 --- a/test/test.singleheader_ast.cpp +++ b/test/test.singleheader_ast.cpp @@ -20,12 +20,12 @@ void check_singleheader_ast() log_fmt("generated AST!!!\n"); - // s32 idx = 0; - // for ( Code entry : ast ) - // { - // log_fmt("Entry %d: %s", idx, entry.to_string() ); - // idx++; - // } + s32 idx = 0; + for ( Code entry : ast ) + { + log_fmt("Entry %d: %s\n", idx, entry.to_string() ); + idx++; + } Builder builder; builder.open( "singleheader_copy.gen.hpp" ); From a8a9b681f0a1ec75272782252b1901fcd25ebe8e Mon Sep 17 00:00:00 2001 From: Ed_ Date: Wed, 2 Aug 2023 14:01:56 -0400 Subject: [PATCH 9/9] test completes singleheader ast construction and serailizes with corruption --- project/components/ast.cpp | 4 +- project/components/interface.parsing.cpp | 76 ++++++++++-------------- project/file_processors/builder.cpp | 2 +- test/test.singleheader_ast.cpp | 4 ++ 4 files changed, 39 insertions(+), 47 deletions(-) diff --git a/project/components/ast.cpp b/project/components/ast.cpp index 33babe4..8804942 100644 --- a/project/components/ast.cpp +++ b/project/components/ast.cpp @@ -483,7 +483,7 @@ String AST::to_string() case Operator_Cast_Fwd: if ( Specs ) { - result.append_fmt( "operator %s()" ); + result.append_fmt( "operator %s()", ValueType->to_string() ); CodeSpecifiers specs = cast(); @@ -493,7 +493,7 @@ String AST::to_string() result.append_fmt( " %s", (char const*)ESpecifier::to_str( spec ) ); } - result.append_fmt( ";", Body->to_string() ); + result.append( ";" ); break; } diff --git a/project/components/interface.parsing.cpp b/project/components/interface.parsing.cpp index 7dde9a3..8de7a61 100644 --- a/project/components/interface.parsing.cpp +++ b/project/components/interface.parsing.cpp @@ -1498,11 +1498,6 @@ CodeParam parse_params( bool use_template_capture = false ) Token name = NullToken; - if ( Context.Tokens.Idx == 18546 ) - { - log_fmt("here"); - } - if ( check( TokType::Identifier ) ) { name = currtok; @@ -1521,7 +1516,10 @@ CodeParam parse_params( bool use_template_capture = false ) return CodeInvalid; } - while ( left && currtok.Type != TokType::Comma ) + while ( left + && currtok.Type != TokType::Comma + && currtok.Type != TokType::Capture_End + ) { value_tok.Length = ( (sptr)currtok.Text + currtok.Length ) - (sptr)value_tok.Text; eat( currtok.Type ); @@ -1589,7 +1587,9 @@ CodeParam parse_params( bool use_template_capture = false ) return CodeInvalid; } - while ( left && currtok.Type != TokType::Comma ) + while ( left + && currtok.Type != TokType::Comma && currtok.Type != TokType::Capture_End + ) { value_tok.Length = ( (sptr)currtok.Text + currtok.Length ) - (sptr)value_tok.Text; eat( currtok.Type ); @@ -2017,9 +2017,8 @@ CodeVar parse_variable_after_name( using namespace Parser; push_scope(); - Code array_expr = parse_array_decl(); - Code expr = { nullptr }; - + Code array_expr = parse_array_decl(); + Code expr = { nullptr }; Code bitfield_expr = { nullptr }; if ( currtok.IsAssign ) @@ -2044,6 +2043,29 @@ CodeVar parse_variable_after_name( expr = untyped_str( expr_tok ); } + if ( currtok.Type == TokType::BraceCurly_Open ) + { + Token expr_tok = currtok; + + eat( TokType::BraceCurly_Open ); + + s32 level = 0; + while ( left && ( currtok.Type != TokType::BraceCurly_Close || level > 0 ) ) + { + if ( currtok.Type == TokType::BraceCurly_Open ) + level++; + + else if ( currtok.Type == TokType::BraceCurly_Close && level > 0 ) + level--; + + eat( currtok.Type ); + } + eat( TokType::BraceCurly_Close ); + + expr_tok.Length = ( (sptr)prevtok.Text + prevtok.Length ) - (sptr)expr_tok.Text; + expr = untyped_str( expr_tok ); + } + if ( currtok.Type == TokType::Assign_Classifer ) { eat( TokType::Assign_Classifer ); @@ -2142,40 +2164,6 @@ Code parse_simple_preprocess( Parser::TokType which ) return result; } -internal inline -Code parse_variable_assignment() -{ - using namespace Parser; - push_scope(); - - Code expr = CodeInvalid; - - if ( currtok.IsAssign ) - { - eat( TokType::Operator ); - - Token expr_tok = currtok; - - if ( currtok.Type == TokType::Statement_End ) - { - log_failure( "Expected expression after assignment operator\n%s", Context.to_string() ); - Context.pop(); - return CodeInvalid; - } - - while ( left && currtok.Type != TokType::Statement_End ) - { - expr_tok.Length = ( (sptr)currtok.Text + currtok.Length ) - (sptr)expr_tok.Text; - eat( currtok.Type ); - } - - expr = untyped_str( expr_tok ); - } - - Context.pop(); - return expr; -} - internal inline Code parse_operator_function_or_variable( bool expects_function, CodeAttributes attributes, CodeSpecifiers specifiers ) { diff --git a/project/file_processors/builder.cpp b/project/file_processors/builder.cpp index 64662ea..e747d43 100644 --- a/project/file_processors/builder.cpp +++ b/project/file_processors/builder.cpp @@ -1,6 +1,6 @@ void Builder::print( Code code ) { - Buffer.append_fmt( "%s", code->to_string() ); + Buffer.append( code->to_string() ); } void Builder::print_fmt( char const* fmt, ... ) diff --git a/test/test.singleheader_ast.cpp b/test/test.singleheader_ast.cpp index 4322b74..7fa0efe 100644 --- a/test/test.singleheader_ast.cpp +++ b/test/test.singleheader_ast.cpp @@ -23,6 +23,10 @@ void check_singleheader_ast() s32 idx = 0; for ( Code entry : ast ) { + if (idx == 900) + { + log_fmt("break here\n"); + } log_fmt("Entry %d: %s\n", idx, entry.to_string() ); idx++; }