From 7ba474069cecc67f9e3327464b1abf600c83610e Mon Sep 17 00:00:00 2001 From: Ed_ Date: Sat, 22 Apr 2023 22:24:55 -0400 Subject: [PATCH] Docs update, parser impl design changes, AST::add fleshed out. --- Readme.md | 168 +-- project/Bloat.cpp | 94 -- project/Bloat.hpp | 35 +- project/Bloat.redef.hpp | 4 - project/Bloat.undef.hpp | 18 - project/Readme.md | 87 ++ project/gen.cpp | 1944 +++++++++++++++++----------------- project/gen.hpp | 617 +++-------- project/gen.singleheader.cpp | 0 9 files changed, 1330 insertions(+), 1637 deletions(-) delete mode 100644 project/Bloat.redef.hpp delete mode 100644 project/Bloat.undef.hpp create mode 100644 project/Readme.md delete mode 100644 project/gen.singleheader.cpp diff --git a/Readme.md b/Readme.md index 85c2dfa..cdd1517 100644 --- a/Readme.md +++ b/Readme.md @@ -2,7 +2,10 @@ An attempt at simple staged metaprogramming for c/c++. -This library is intended for small-to midsized projects. +The library is mostly a compositon of code element constructors. +These build up a code AST to then serialize with a file builder. + +Intended for small-to midsized projects. ### TOC @@ -15,7 +18,7 @@ This library is intended for small-to midsized projects. * [The four constructors](#there-are-four-sets-of-interfaces-for-code-ast-generation-the-library-provides) * [Code generation and modification](#code-generation-and-modification) * [On multithreading](#on-multi-threading) -* [On extending with whatever features you want](#on-extending-with-whatever-features-you-want) +* [Extending the library](#extending-the-library) * [Why](#why) * [TODO](#todo) @@ -27,7 +30,7 @@ Version 1 will have C and a subset of C++ features available to it. I will generate with this library a C99 or 11 variant when Version 1 is complete. A single-header version will also be generated. -The size target of this library is to stay under 5000 sloc (data & interface code). +The size target of this library is to stay under 5-6k sloc (data & interface code). With the dependency code being under 10000 sloc. (Containers, Memory, String handling, Language bloat) Any dependencies from the zpl library will be exposed manually with using declarations into global scope. @@ -70,7 +73,8 @@ The user is given `Code` typed objects that are used to build up the AST. Example using each construction interface: -#### Upfront +### Upfront + ```cpp Code t_uw = def_type( name(uw) ); @@ -88,7 +92,7 @@ Code header; } ``` -#### Incremental +### Incremental ```cpp // Types are done the same with upfront. Incremental does not have a full interface replacment. @@ -106,7 +110,7 @@ Code header = make_struct( name(ArrayHeader) ); } ``` -#### Parse +### Parse ```cpp Code header = parse_struct( code( @@ -122,20 +126,17 @@ Code header = parse_struct( code( Parse will automatically generate any types that have not been used previously. -#### Undtyped +### Undtyped ```cpp -Code header = untyped_str( - R("struct ArrayHeader +Code header = untyped_str( R(" + struct ArrayHeader { - #define Using_ArrayHeader_Data \ - uw Num; \ - uw Capacity; \ + uw Num; + uw Capacity; allocator Allocator; - Using_ArrayHeader_Data - };)" -); - + }; +)"); ``` `name` is a helper macro for providing a string literal with its size, intended for the name paraemter of functions. @@ -213,38 +214,42 @@ If in your use case, decide to have exclusive separation or partial separation o * Macro or template generation : This library is to avoid those, adding support for them adds unnecessary complexity. * Vendor provided dynamic dispatch (virtuals) : Roll your own, this library might roll its own vtable/interface generation helpers in the future. -* RTTI : This is kinda covered with the last point, but just wanted to emphasize. -* Exceptions : Most fo the -* Execution statment validation : Execution expressions are defined using the untyped string API. +* RTTI +* Exceptions +* Execution statement validation : Execution expressions are defined using the untyped string API. Keywords in from "Modern C++": -* constexpr : Great to store compile-time constants, (easier to garanteed when emitted from gentime) +* constexpr : Great to store compile-time constants, (easier to garantee when emitted from gentime) * consteval : Technically fine so long as templates are not used. Need to make sure to execute in moderation. * constinit : Better than constexpr at doing its job, however, its only c++ 20. * export : Useful if c++ modules ever come around to actually being usable. * import : ^^ * module : ^^ -These features are in as they either are not horrible when used conservatively or are a performance benefit (modules). +These features are not horrible when used conservatively, or are a performance benefit (modules). -When it comes to excution statements: -There is no parse API for validating excution statements (possibly will add in the future, but very limited in what it can do). -This reason there isn't one: thats where the can of worms open for parsing validation. -For most metaprogramming (espcially for c/c++), expression validation is not necessary, it can be done by the compiler for the runtime program. +When it comes to expressions: + +There is no support for validating expressions. +The reason: thats where the can of worms open for parsing validation. This library would most likey more than double in size with that addition alone. +For most metaprogramming (espcially for C/C++), expression validation is not necessary for metaprogramming, it can be done by the compiler for the runtime program. Most of the time, the critical complex metaprogramming conundrums are actaully producing the frame of abstractions around the expressions. Thus its not very much a priority to add such a level of complexity to the library when there isn't a high reward or need for it. -To further this point, lets say you do have an error with an execution statment. It will either be caught by the c++ compiler when compiling the target program, or at runtime for the program. +To further this point, lets say you do have an error with an expressions composition. +It will either be caught by the c++ compiler when compiling the target program, or at runtime for the program. -* If its not caught by the compiler, the only downside is the error appers on the generated function. Those with knowledge of how that definition was generated know where to find the code that inlined that expression in that file for that definition. -* If its caught at runtime. The expression will be shown in a stack trace if debug symbols are enabled in the generated function body. Yet again those with knowledge of how that definition was generated know where to find the code that inlined that expression. +* If its not caught by the compiler, the only downside is the error appers on the generated function. + Those with knowledge of how that definition was generated know where to find the code that inlined that expression in that file for that definition. +* If its caught at runtime. The expression will be shown in a stack trace if debug symbols are enabled in the generated function body. + Yet again those with knowledge of how that definition was generated know where to find the code that inlined that expression. -In both these cases will get objectively better debug information than you would normally get on most c++ compilers with complex macros or templates. +In both these cases the user will get objectively better debug information than you would normally get on most c++ compilers/editors using complex macros or templates. ### The Data & Interface -As mentioned in [Usage](#Usage), the user is provided Code objects by calling the constructor functions to generate them or find existing matches. +As mentioned in [Usage](#usage), the user is provided Code objects by calling the constructor functions to generate them or find existing matches. The AST is managed by the library and provided the user via its interface prodedures. However, the user may specificy memory configuration. @@ -252,22 +257,44 @@ However, the user may specificy memory configuration. Data layout of AST struct: ```cpp -AST* Parent; -CachedString Name; -CachedString Comment; union { - array(AST*) Entries; - CachedString Content; + AST* ArrStatic[AST::ArrS_Cap]; + Array(AST*) Entries; + StringCached Content; + SpecifierT ArrSpecs[AST::ArrSpecs_Cap]; }; +AST* Parent; +StringCached Name; +StringCached Comment; CodeT Type; OperatorT Op; bool Readonly; -u8 _64_Align[23]; +bool DynamicEntries; +u8 StaticIndex; +u8 _Align_Pad[6]; ``` *`CodeT` is a typedef for `ECode::Type` which has an underlying type of `u32`* *`OperatorT` is a typedef for `EOperator::Type` which has an underlying type of `u32`* +AST widths are setup to be AST_POD_Size. +The width dictates how much the static array can hold before it must give way to using an allocated array: + +```cpp +constexpr static +uw ArrS_Cap = +( AST_POD_Size + - sizeof(AST*) + - sizeof(StringCached) * 2 + - sizeof(CodeT) + - sizeof(OperatorT) + - sizeof(bool) * 2 + - sizeof(u8) * 7 ) +/ sizeof(AST*); +``` + +*Ex: If the AST_POD_Size is 256 the capacity of the static array is 26.* + ASTs can be set to readonly by calling Code's lock() member function. Adding comments is always available even if the AST is set to readonly. @@ -278,10 +305,7 @@ Data Notes: * ASTs are wrapped for the user in a Code struct which essentially a warpper for a AST* type. * Both AST and Code have member symbols but their data layout is enforced to be POD types. * This library treats memory failures as fatal. -* The default setup assumes large definition sets may be provided to bodies so AST::Entires are dynamic arrays. - * They're allocated to arenas currently and are pretty wasteful if they go over their reserve size (its never recycled). - * Most likely will need to implement a dynamic-sized bucket allocation strategy for the entry arrays if memory is getting stressed. - * Otherwise if you are using fixed size entries and your definitions are under 128~512 entries for the body, you may be better of with a fixed-sized array. +* Entires start as a static array, however if it goes over capacity a dynamic array is allocated for the entires. * Strings are stored in their own set of arenas. AST constructors use cached strings for names, and content. ## There are four sets of interfaces for Code AST generation the library provides @@ -300,7 +324,7 @@ Interface : * def_class * def_enum -* def_enum_class +* def_execution NOTE: This is equivalent to untyped_str, except that its intended for use only in execution scopes. * def_friend * def_function * def_namespace @@ -314,6 +338,7 @@ Interface : * def_type * def_typedef * def_using + * def_class_body * def_enum_body * def_function_body NOTE: Use this for operator bodies as well. @@ -323,7 +348,7 @@ Interface : Usage: -```c++ +```cpp = def_( ... ); Code @@ -385,6 +410,7 @@ Interface : * parse_type * parse_typedef * parse_using + * parse_classes * parse_enums * parse_functions @@ -394,10 +420,16 @@ Interface : * parse_typedefs * parse_usings +The parse API treats any execution scope definitions with no validation and are turned into untyped Code ASTs. +This includes the assignmetn of variables; due to the library not yet supporting c/c++ expression parsing. + +The plural variants provide an array of codes, its up to the user to add them to a body AST +(they are not auto-added to a body) + Usage: ```cpp -Code = parse_( ); +Code = parse_( string with code ); Code = def_( ..., parse_( @@ -411,9 +443,6 @@ Code = make_( ... ) } ``` -The parse API treats any execution scope definitions with no validation and are turned into untyped Code ASTs. -This includes the assignmetn of variables; due to the library not yet supporting c/c++ expression parsing. - ### Untyped constructions Code ASTs are constructed using unvalidated strings. @@ -447,17 +476,28 @@ Template metaprogramming in the traditional sense becomes possible with the use ```cpp char const* token_key, token_value, ...; -char const* template = txt( - Code with {key value} to replace with token_values +char const* template_str = txt( + Code with {key} to replace with token_values ... ); -char const* gen_code_str = token_fmt( template, num_tokens, token, ... ); +char const* gen_code_str = token_fmt( template, num_tokens, { token_key, token_value }, ... ); Code = parse_( gen_code_str ); ``` +## Extent of operator overload validation + +The AST and constructors will be able to validate that the arguments provided for the operator type match the expected form: + +* If return type must match a parameter +* If number of parameters is correct +* If added as a member symbol to a class or struct, that operator matches the requirements for the class (types match up) + +The user is responsible for making sure the code types provided are correct +and have the desired specifiers assigned to them beforehand. + ## Code generation and modification -There are three provided interfaces: +There are three provided file interfaces: * Builder * Editor @@ -483,9 +523,9 @@ Editor and Scanner are disabled by default, use `GEN_FEATURE_EDITOR` and `GEN_FE All three have the same parameters with exception to remove which only has SymbolInfo and Policy: * SymbolInfo: - * File : The file the symbol resides in. Leave null to indicate to search all files. - * Marker : #define symbol that indicates a location or following signature is valid to manipulate. Leave null to indicate that the signature should only be used. - * Signature : Use a Code symbol to find a valid location to manipulate, can be further filtered with the marker. Leave null to indicate that the marker should only be used. + * File : The file the symbol resides in. Leave null to indicate to search all files. Leave null to indicated all-file search. + * Marker : #define symbol that indicates a location or following signature is valid to manipulate. Leave null to indicate the signature should only be used. + * Signature : Use a Code symbol to find a valid location to manipulate, can be further filtered with the marker. Leave null to indicate the marker should only be used. * Policy : Additional policy info for completing the request (empty for now) * Code : Code to inject if adding, or replace existing code with. @@ -515,24 +555,19 @@ Request queue in both Editor and Scanner are cleared once process_requests compl ## On multi-threading -Its intended eventually for this library to support multi-threading at some point, -however for now it does not. +Currently unsupported. The following changes would have to be made: -The following changes would have to be made: - -* Setup static data accesss with fences if more than one thread will generate ASTs +* Setup static data accesss with fences if more than one thread will generate ASTs ( or keep a different set for each thread) * Make sure local peristent data of functions are also thread local. * The builder should be done on a per-thread basis. -* Due to the design of the editor and scanner, it will most likely - be best to make each file a job to process request entries on. - Receipts should have an an array to store per thread. - They can be combined to the final reciepts array when all files have been processed. +* Due to the design of the editor and scanner, it will most likely be best to make each file a job to process request entries on. Receipts should have an an array to store per thread. They can be combined to the final reciepts array when all files have been processed. -For now single-threaded should be pretty quick even without heavy optimizations. +For now single-threaded has a bunch of optimization that most likely have done to it and will be more than capable +for the majority of projects this thing is intended for. (IF you use this on Unreal... well your asking for it...) -## On extending with whatever features you want +## Extending the library -This library is relatively very small, and you can easily extend it. +This library is relatively very small, and can be extended without much hassle. The untyped codes and builder/editor/scanner can be technically be used to circumvent any sort of constrictions the library has with: modern c++, templates, macros, etc. @@ -560,12 +595,13 @@ Thus a rule of thumb is if its a simple definition you can get away with just th However, if: -* Your compile time complexity becomes large. +* Compile time complexity becomes large. * You enjoy actually *seeing* the generated code instead of just the error symbols or the pdb symbols. * You value your debugging expereince, and would like to debug your metaprogram, without having to step through the debug version of the compiler (if you even can) * Want to roll your own reflection system * Want to maintain a series of libraries for internal use, but don't want to deal with manual merging as often when they update. * Want to create tailored headers for your code or for your libraries since you usually don't need the majority of the code within them. +* You just dislike metaprogramming with template expansion Then this might help you boostrap a toolset todo so. diff --git a/project/Bloat.cpp b/project/Bloat.cpp index 1ba59fb..0f435c9 100644 --- a/project/Bloat.cpp +++ b/project/Bloat.cpp @@ -2,11 +2,6 @@ #include "Bloat.hpp" -namespace Global -{ - bool ShouldShowDebug = false; -} - namespace Memory { using namespace zpl; @@ -41,92 +36,3 @@ namespace Memory arena_free( & Global_Arena); } } - - -struct TokEntry -{ - char const* Str; - sw Length; -}; - -ZPL_TABLE( static, TokMap, tokmap_, TokEntry ) - -sw token_fmt_va( char* buf, uw buf_size, char const* fmt, s32 num_tokens, va_list va ) -{ - char const* buf_begin = buf; - sw remaining = buf_size; - - TokMap tok_map; - { - tokmap_init( & tok_map, g_allocator ); - - s32 left = num_tokens; - - while ( left-- ) - { - char const* token = va_arg( va, char const* ); - char const* value = va_arg( va, char const* ); - - TokEntry entry - { - value, - str_len(value, (sw)128) - }; - - u32 key = crc32( token, str_len(token, 32) ); - - tokmap_set( & tok_map, key, entry ); - } - } - - sw result = 0; - char current = *fmt; - - while ( current ) - { - sw len = 0; - - while ( current && current != '{' && remaining ) - { - *buf = *fmt; - buf++; - fmt++; - - current = *fmt; - } - - if ( current == '{' ) - { - char const* scanner = fmt; - - s32 tok_len = 0; - - while ( *scanner != '}' ) - { - tok_len++; - scanner++; - } - - char const* token = fmt; - - u32 key = crc32( token, tok_len ); - TokEntry value = * tokmap_get( & tok_map, key ); - sw left = value.Length; - - while ( left-- ) - { - *buf = *value.Str; - buf++; - value.Str++; - } - - scanner++; - fmt = scanner; - current = *fmt; - } - } - - tokmap_clear( & tok_map ); - - return result; -} diff --git a/project/Bloat.hpp b/project/Bloat.hpp index b29c605..320af7f 100644 --- a/project/Bloat.hpp +++ b/project/Bloat.hpp @@ -45,6 +45,8 @@ using zpl::u32; using zpl::u64; using zpl::uw; using zpl::sw; +using zpl::sptr; +using zpl::uptr; using zpl::Arena; using zpl::AllocatorInfo; @@ -62,6 +64,7 @@ using zpl::arena_allocator; using zpl::arena_init_from_memory; using zpl::arena_init_from_allocator; using zpl::arena_free; +using zpl::assert_crash; using zpl::str_fmt_buf; using zpl::char_is_alpha; using zpl::char_is_alphanumeric; @@ -102,7 +105,7 @@ using zpl::str_len; #endif -#if defined(__GNUC__) || defined(__clang__) || true +#if defined(__GNUC__) || defined(__clang__) // Supports 0-10 arguments #define macro_num_args_impl( _0, \ _1, _2, _3, _4, _5, _6, _7, _8, _9, _10, \ @@ -142,7 +145,6 @@ using zpl::str_len; #define bit( Value_ ) ( 1 << Value_ ) #define bitfield_is_equal( Field_, Mask_ ) ( ( (Mask_) & (Field_) ) == (Mask_) ) -#define ct constexpr #define forceinline ZPL_ALWAYS_INLINE #define print_nl( _) zpl_printf("\n") #define ccast( Type_, Value_ ) * const_cast< Type_* >( & (Value_) ) @@ -176,16 +178,12 @@ do \ } \ while(0); -ct char const* Msg_Invalid_Value = "INVALID VALUE PROVIDED"; - -namespace Global -{ - extern bool ShouldShowDebug; -} +constexpr +char const* Msg_Invalid_Value = "INVALID VALUE PROVIDED"; namespace Memory { - ct uw Initial_Reserve = megabytes(10); + constexpr uw Initial_Reserve = megabytes(10); extern Arena Global_Arena; // #define g_allocator arena_allocator( & Memory::Global_Arena) @@ -198,28 +196,9 @@ namespace Memory void cleanup(); } -sw token_fmt_va( char* buf, uw buf_size, char const* fmt, s32 num_tokens, va_list va ); - -inline -char const* token_fmt( char const* fmt, sw num_tokens, ... ) -{ - local_persist thread_local - char buf[ZPL_PRINTF_MAXLEN] = { 0 }; - - va_list va; - va_start(va, fmt); - token_fmt_va(buf, ZPL_PRINTF_MAXLEN, fmt, num_tokens, va); - va_end(va); - - return buf; -} - inline sw log_fmt(char const *fmt, ...) { - if ( Global::ShouldShowDebug == false ) - return 0; - sw res; va_list va; diff --git a/project/Bloat.redef.hpp b/project/Bloat.redef.hpp deleted file mode 100644 index 4017ade..0000000 --- a/project/Bloat.redef.hpp +++ /dev/null @@ -1,4 +0,0 @@ -/* - Redefine any macros. - Used if you have some code thats getting parsed and don't want it ot get preprocessed. -*/ diff --git a/project/Bloat.undef.hpp b/project/Bloat.undef.hpp deleted file mode 100644 index 28ccbc3..0000000 --- a/project/Bloat.undef.hpp +++ /dev/null @@ -1,18 +0,0 @@ -/* - Remvoe any macro definitions related to the Bloat header. -*/ - -#undef bit -#undef bitfield_is_equal -#undef ct -#undef forceinline -#undef print_nl -#undef scast -#undef rcast -#undef pcast -#undef txt - -#undef do_once -#undef do_once_start -#undef do_once_end - diff --git a/project/Readme.md b/project/Readme.md new file mode 100644 index 0000000..fc46c76 --- /dev/null +++ b/project/Readme.md @@ -0,0 +1,87 @@ +# Documentation + +This library is currently in a bootstrapping phase. + +Eventually it will have zero dependencies and have its code size severely constricted. + +All dependencies are currently held within `Bloat.hpp` and `Bloat.cpp` + +All the library code is contained in two files: `gen.hpp` and `gen.cpp` + + +## Bloat.hpp/cpp + +Currently acts as the isolation header for thridparty dependencies along with code not directly related to the library. + +Organization: + +* ZPL inclusion and selective symbol exposure to global scope. +* Utility macro definitions used throughout the library. +* Global memory arena definition +* Token string formatter +* Formatted and Fatal Logs + +The cpp contains the implementation of the global memory arena and the token formmatter. + +Any global symbol pollution will be removed when dependencies are intergrated properly into the library. + +## gen.hpp + +While getting fleshed out, all feature macros are defined on the top of the header. + +These macros are: + +* `GEN_DEFINE_DSL` : Define the preprocessor DSL for using the library interface +* `GEN_DEFINE_LIBRARY_CORE_CONSTANTS` : Optional typename codes as they are non-standard to C/C++ and not necessary to library usage +* `GEN_ENCORCE_READONLY_AST` : Defines checks in Code when accessing the AST to make sure readonly marked ASTs are not mutated +* `GEN_FEATURE_INCREMENTAL` : Defines the incremental constructors +* `GEN_FEATURE_PARSING` : Defines the parse constructors +* `GEN_FEATURE_EDITOR` : Defines the file editing features for changing definitions based on ASTs +* `GEN_FEATURE_SCANNER` : Defines the file scanning features for generating ASTs + +Due to the design of `gen.hpp` to support being written alongside runtime intended code (in the same file), all the code is wrapped in a `gen_time` `#ifdef` and then wrapped further in a `gen` namespace to avoid pollution of the global scope. + +*Note: Its possible with the scanner feature to support parsing runtime files that use "generic" macros or identifiers with certain patterns. +This can be used to auto-queue generation of dependent definitions for the symbols used.* + +### Organization + +log_failure definition : based on whether to always use fatal on all errors + +Major enum definitions and their associated functions used with the AST data + +* `ECode` : Used to tag ASTs by their type +* `EOperator` : Used to tag operator overloads with thier op type +* `ESpecifier` : Used with specifier ASTs for all specifiers the user may tag an associated AST with. + +#### Data Structures + +`StringTable` : Hash table for cached strings. (`StringCached` typedef used to denote strings managed by it) + +`AST` : The node data strucuture for the code. +`Code` : Wrapper for `AST` with functionality for handling it appropriately. + +`TypeTable` : Hash table for cached typename ASTs. + +#### Gen Interface + +First set of fowards are either backend functions used for various aspects of AST generation or configurating allocators used for different containers. + +Interface fowards defined in order of: Upfront, Incremental, Parsing, Untyped. + +From there forwards for the File handlers are defined: Builder, Editor, Scanner. + +#### Macros + +General helper macros are defined along with the optional DSL macros. + +#### Constants + +Constants including optional ones are defined. + +#### Inlines + +Inlined functions related to the AST datatype that required forwards for gen interface functions are defined. + +## gen.cpp + diff --git a/project/gen.cpp b/project/gen.cpp index cc242d9..876032e 100644 --- a/project/gen.cpp +++ b/project/gen.cpp @@ -26,7 +26,7 @@ namespace gen } #pragma region Constants -# ifdef GEN_DEFINE_LIBRARY_CODE_CONSTANTS + #ifdef GEN_DEFINE_LIBRARY_CODE_CONSTANTS Code type_ns(void); Code type_ns(bool); @@ -48,7 +48,7 @@ namespace gen Code type_ns(f32); Code type_ns(f64); -# endif + #endif Code access_public; Code access_protected; @@ -61,195 +61,305 @@ namespace gen Code spec_ref; #pragma endregion Constants -# pragma region AST +#pragma region AST Body Case Macros +# define AST_BODY_CLASS_UNALLOWED_TYPES \ + case Class_Body: \ + case Enum_Body: \ + case Friend: \ + case Function_Body: \ + case Function_Fwd: \ + case Global_Body: \ + case Namespace: \ + case Namespace_Body: \ + case Operator: \ + case Operator_Fwd: \ + case Parameters: \ + case Specifiers: \ + case Struct_Body: \ + case Typename: + +# define AST_BODY_FUNCTION_UNALLOWED_TYPES \ + case Access_Public: \ + case Access_Protected: \ + case Access_Private: \ + case Class_Body: \ + case Enum_Body: \ + case Friend: \ + case Function_Body: \ + case Function_Fwd: \ + case Global_Body: \ + case Namespace: \ + case Namespace_Body: \ + case Operator: \ + case Operator_Fwd: \ + case Operator_Member: \ + case Operator_Member_Fwd: \ + case Parameters: \ + case Specifiers: \ + case Struct_Body: \ + case Typename: + +# define AST_BODY_GLOBAL_UNALLOWED_TYPES \ + case Access_Public: \ + case Access_Protected: \ + case Access_Private: \ + case Class_Body: \ + case Enum_Body: \ + case Execution: \ + case Friend: \ + case Function_Body: \ + case Global_Body: \ + case Namespace_Body: \ + case Operator_Member: \ + case Operator_Member_Fwd: \ + case Parameters: \ + case Specifiers: \ + case Struct_Body: \ + case Typename: + +# define AST_BODY_NAMESPACE_UNALLOWED_TYPES \ + case Access_Public: \ + case Access_Protected: \ + case Access_Private: \ + case Class_Body: \ + case Enum_Body: \ + case Execution: \ + case Friend: \ + case Function_Body: \ + case Global_Body: \ + case Namespace_Body: \ + case Operator_Member: \ + case Operator_Member_Fwd: \ + case Parameters: \ + case Specifiers: \ + case Struct_Body: \ + case Typename: + +# define AST_BODY_STRUCT_UNALLOWED_TYPES \ + case Enum_Body: \ + case Execution: \ + case Function_Body: \ + case Global_Body: \ + case Namespace: \ + case Namespace_Body: \ + case Operator: \ + case Operator_Fwd: \ + case Parameters: \ + case Specifiers: \ + case Struct_Body: \ + case Typename: \ + case Using_Namespace: +#pragma endregion AST Body Case Macros + +#pragma region AST Code Code::Invalid; bool AST::add( AST* other ) { + #ifdef GEN_FEATURE_INCREMENTAL + if ( other == nullptr ) + { + log_failure( "AST::add: Provided a null AST" ); + return false; + } + + if ( other->Type == ECode::Invalid ) + { + log_failure( "AST::add: Provided an invalid AST" ); + return false; + } + switch ( Type ) { using namespace ECode; case Invalid: - break; + log_failure( "AST::add: Cannot add an AST to an invalid AST." ); + return false; case Untyped: - break; + log_failure( "AST::add: Cannot add an AST to an untyped AST." ); + return false; case Access_Public: - break; + log_failure( "AST::add: Cannot add an AST to a public access specifier." ); + return false; case Access_Protected: - break; + log_failure( "AST::add: Cannot add an AST to a protected access specifier." ); + return false; case Access_Private: - break; + log_failure( "AST::add: Cannot add an AST to a private access specifier." ); + return false; case Class: - break; + log_failure( "AST::add: Cannot add an AST to a class, only to its body" ); + return false; case Class_Fwd: - break; + log_failure( "AST::add: Cannot add an AST to a class forward declaration." ); + return false; case Class_Body: + switch ( other->Type ) + { + AST_BODY_CLASS_UNALLOWED_TYPES + { + log_failure( "AST::add: Cannot add an AST to a class body." ); + return false; + } + + default: + break; + } break; case Enum: - break; + log_failure( "AST::add: Cannot add an AST to an enum, only to its body" ); + return false; case Enum_Fwd: - break; + log_failure( "AST::add: Cannot add an AST to an enum forward declaration." ); + return false; case Enum_Body: + if ( other->Type != Untyped ) + { + log_failure( "AST::add: Cannot add an AST which is not untyped to an enum body." ); + return false; + } break; case Enum_Class: - break; + log_failure( "AST::add: Cannot add an AST to an enum class, only to its body" ); + return false; case Enum_Class_Fwd: - break; + log_failure( "AST::add: Cannot add an AST to an enum class forward declaration." ); + return false; case Friend: - break; + log_failure( "AST::add: Cannot add an AST to a friend declaration." ); + return false; case Function: - break; + log_failure( "AST::add: Cannot add an AST to a function, only to its body" ); + return false; case Function_Body: + switch ( other->Type ) + { + AST_BODY_FUNCTION_UNALLOWED_TYPES + { + log_failure( "AST::add: Cannot add an AST to a function body." ); + return false; + } + + default: + break; + } break; case Function_Fwd: - break; + log_failure( "AST::add: Cannot add an AST to a function forward declaration." ); + return false; case Global_Body: + switch ( other->Type ) + { + AST_BODY_GLOBAL_UNALLOWED_TYPES + { + log_failure( "AST::add: Cannot add an AST to a global body." ); + return false; + } + + default: + break; + } break; + case Namespace: - break; + if ( Type != Global_Body ) + { + log_failure( "AST::add: Cannot add a namespace to a non-global body." ); + return false; + } case Namespace_Body: + switch ( other-> Type ) + { + AST_BODY_NAMESPACE_UNALLOWED_TYPES + { + log_failure( "AST::add: Cannot add an AST to a namespace body." ); + return false; + } + + default: + break; + } break; case Operator: - break; + log_failure( "AST::add: Cannot add an operator, only to its body" ); + return false; case Operator_Fwd: - break; + log_failure( "AST::add: Cannot add an operator forward declaration." ); + return false; case Parameters: - break; + log_failure( "AST::add: Cannot add to a parameter list, use AST::add_param instead" ); + return false; case Specifiers: - break; + log_failure( "AST::add: Cannot add to a specifier, use AST::add_specifier instead." ); + return false; case Struct: - break; + log_failure( "AST::add: Cannot add to a struct, only to its body." ); + return false; case Struct_Body: + switch ( other->Type ) + { + AST_BODY_STRUCT_UNALLOWED_TYPES + { + log_failure( "AST::add: Cannot add to a struct body." ); + return false; + } + + default: + break; + } break; case Variable: - break; + log_failure( "AST::add: Cannot add to a variable." ); + return false; case Typedef: - break; + log_failure( "AST::add: Cannot add to a typedef." ); + return false; case Typename: - break; + log_failure( "AST::add: Cannot add to a typename." ); + return false; case Using: - break; + log_failure( "AST::add: Cannot add to a using statement." ); + return false; } array_append( Entries, other ); other->Parent = this; return true; - } - - bool AST::check() - { - switch ( Type ) - { - using namespace ECode; - - case Invalid: - break; - - case Untyped: - break; - - case Access_Public: - break; - - case Access_Protected: - break; - - case Access_Private: - break; - - case Enum: - break; - - case Enum_Fwd: - break; - - case Enum_Body: - break; - - case Friend: - break; - - case Function: - break; - - case Function_Body: - break; - - case Function_Fwd: - break; - - case Global_Body: - break; - - case Namespace: - break; - - case Namespace_Body: - break; - - case Operator: - break; - - case Operator_Fwd: - break; - - case Parameters: - break; - - case Specifiers: - break; - - case Struct: - break; - - case Struct_Body: - break; - - case Variable: - break; - - case Typedef: - break; - - case Typename: - break; - - case Using: - break; - } - - return true; + #else + log_failure( "AST::add: Incremental AST building is not enabled." ); + return false; + #endif } AST* AST::duplicate() @@ -504,7 +614,7 @@ namespace gen return result; } -# pragma endregion AST +#pragma endregion AST #pragma region Gen Interface void init() @@ -564,12 +674,12 @@ namespace gen Code::Invalid = make_code(); Code::Invalid.lock(); -# ifdef GEN_DEFINE_LIBRARY_CODE_CONSTANTS + #ifdef GEN_DEFINE_LIBRARY_CODE_CONSTANTS Code& t_bool_write = ccast( Code, t_void ); t_bool_write = def_type( name(void) ); -# define def_constant_code_type( Type_ ) \ + # define def_constant_code_type( Type_ ) \ Code& \ t_##Type_ = def_type( name(Type_) ); \ t_##Type_->Readonly = true; @@ -595,21 +705,21 @@ namespace gen def_constant_code_type( f32 ); def_constant_code_type( f64 ); -# undef def_constant_code_type -# endif + # undef def_constant_code_type + #endif Code& spec_constexpr_write = ccast( Code, spec_constexpr ); spec_constexpr_write = def_specifiers( 1, ESpecifier::Constexpr ); -# define def_constant_spec( Type_, ... ) \ + # define def_constant_spec( Type_, ... ) \ Code& \ spec_##Type_ = def_specifiers( macro_num_args(__VA_ARGS__), __VA_ARGS__); \ spec_##Type_.lock(); def_constant_spec( const, ESpecifier::Const ); def_constant_spec( inline, ESpecifier::Inline ); -# undef def_constant_spec + # undef def_constant_spec } void clear_code_memory() @@ -778,8 +888,8 @@ namespace gen return OpValidateResult::Fail; } -# pragma region Helper Macros -# define check_params() \ + #pragma region Helper Macros + # define check_params() \ if ( ! params_code ) \ { \ log_failure("gen::def_operator: params is null and operator%s requires it", to_str(op)); \ @@ -791,7 +901,7 @@ namespace gen return OpValidateResult::Fail; \ } -# define check_param_eq_ret() \ + # define check_param_eq_ret() \ if ( ! is_member_symbol && params_code->param_type() != ret_type ) \ { \ log_failure("gen_def_operator: operator%s requires first parameter to equal return type\n" \ @@ -803,7 +913,7 @@ namespace gen ); \ return OpValidateResult::Fail; \ } -# pragma endregion Helper Macros + #pragma endregion Helper Macros if ( ! ret_type ) { @@ -820,7 +930,7 @@ namespace gen switch ( op ) { -# define specs( ... ) macro_num_args( __VA_ARGS__ ), __VA_ARGS__ + # define specs( ... ) macro_num_args( __VA_ARGS__ ), __VA_ARGS__ case Assign: check_params(); @@ -1081,14 +1191,13 @@ namespace gen case Comma: check_params(); break; -# undef specs + # undef specs } -# undef check_params -# undef check_ret_type -# undef check_param_eq_ret - return is_member_symbol ? OpValidateResult::Member : OpValidateResult::Global; + # undef check_params + # undef check_ret_type + # undef check_param_eq_ret } void set_allocator_data_arrays( AllocatorInfo allocator ) @@ -1116,7 +1225,7 @@ namespace gen StaticData::Allocator_StringArena = allocator; } -# pragma region Helper Functions +#pragma region Helper Marcos // This snippet is used in nearly all the functions. # define name_check( Context_, Length_, Name_ ) \ { \ @@ -1158,9 +1267,9 @@ namespace gen # define not_implemented( Context_ ) \ log_failure( "gen::%s: This function is not implemented" ); \ return Code::Invalid; -# pragma endregion Helper Functions +#pragma endregion Helper Marcos -# pragma region Upfront Constructors +#pragma region Upfront Constructors /* The implementaiton of the upfront constructors involves bascially doing three things: * Validate the arguments given to construct the intended type of AST is valid. @@ -1694,7 +1803,7 @@ namespace gen If a function's implementation deviates from the macros then its just writen it out. */ -# pragma region Helper Macros for def_**_body functions +#pragma region Helper Macros for def_**_body functions # define def_body_start( Name_ ) \ using namespace ECode; \ \ @@ -1744,7 +1853,7 @@ namespace gen result->add_entry( entry ); \ } \ while ( num--, num > 0 ) -# pragma endregion Helper Macros for def_**_body functions +#pragma endregion Helper Macros for def_**_body functions Code def_class_body( s32 num, ... ) { @@ -1757,19 +1866,7 @@ namespace gen va_list va; va_start(va, num); def_body_code_validation_start( def_class_body, va_arg( va, Code ) ); - case Enum_Body: - case Execution: - case Function_Body: - case Global_Body: - case Namespace: - case Namespace_Body: - case Operator: - case Operator_Fwd: - case Parameters: - case Specifiers: - case Struct_Body: - case Typename: - case Using_Namespace: + AST_BODY_CLASS_UNALLOWED_TYPES def_body_code_validation_end( def_class_body ); va_end(va); @@ -1786,25 +1883,7 @@ namespace gen result->Type = Function_Body; def_body_code_validation_start( def_class_body, *codes; codes++ ); - case Access_Public: - case Access_Protected: - case Access_Private: - case Class_Body: - case Enum_Body: - case Friend: - case Function_Body: - case Function_Fwd: - case Global_Body: - case Namespace: - case Namespace_Body: - case Operator: - case Operator_Fwd: - case Operator_Member: - case Operator_Member_Fwd: - case Parameters: - case Specifiers: - case Struct_Body: - case Typename: + AST_BODY_CLASS_UNALLOWED_TYPES def_body_code_validation_end( def_class_body ); result.lock(); @@ -1889,25 +1968,7 @@ namespace gen va_list va; va_start(va, num); def_body_code_validation_start( def_function_body, va_arg(va, Code) ); - case Access_Public: - case Access_Protected: - case Access_Private: - case Class_Body: - case Enum_Body: - case Friend: - case Function_Body: - case Function_Fwd: - case Global_Body: - case Namespace: - case Namespace_Body: - case Operator: - case Operator_Fwd: - case Operator_Member: - case Operator_Member_Fwd: - case Parameters: - case Specifiers: - case Struct_Body: - case Typename: + AST_BODY_FUNCTION_UNALLOWED_TYPES def_body_code_validation_end( def_function_body ); va_end(va); @@ -1924,25 +1985,7 @@ namespace gen result->Type = Function_Body; def_body_code_validation_start( def_function_body, *codes; codes++ ); - case Access_Public: - case Access_Protected: - case Access_Private: - case Class_Body: - case Enum_Body: - case Friend: - case Function_Body: - case Function_Fwd: - case Global_Body: - case Namespace: - case Namespace_Body: - case Operator: - case Operator_Fwd: - case Operator_Member: - case Operator_Member_Fwd: - case Parameters: - case Specifiers: - case Struct_Body: - case Typename: + AST_BODY_FUNCTION_UNALLOWED_TYPES def_body_code_validation_end( def_function_body ); result.lock(); @@ -1960,22 +2003,7 @@ namespace gen va_list va; va_start(va, num); def_body_code_validation_start( def_global_body, va_arg(va, Code) ); - case Access_Public: - case Access_Protected: - case Access_Private: - case Class_Body: - case Enum_Body: - case Execution: - case Friend: - case Function_Body: - case Global_Body: - case Namespace_Body: - case Operator_Member: - case Operator_Member_Fwd: - case Parameters: - case Specifiers: - case Struct_Body: - case Typename: + AST_BODY_GLOBAL_UNALLOWED_TYPES def_body_code_validation_end( def_global_body ); va_end(va); @@ -1992,22 +2020,7 @@ namespace gen result->Type = Global_Body; def_body_code_validation_start( def_global_body, *codes; codes++ ); - case Access_Public: - case Access_Protected: - case Access_Private: - case Class_Body: - case Enum_Body: - case Execution: - case Friend: - case Function_Body: - case Global_Body: - case Namespace_Body: - case Operator_Member: - case Operator_Member_Fwd: - case Parameters: - case Specifiers: - case Struct_Body: - case Typename: + AST_BODY_GLOBAL_UNALLOWED_TYPES def_body_code_validation_end( def_global_body ); result.lock(); @@ -2025,22 +2038,7 @@ namespace gen va_list va; va_start(va, num); def_body_code_validation_start( def_namespace_body, va_arg(va, Code) ); - case Access_Public: - case Access_Protected: - case Access_Private: - case Class_Body: - case Enum_Body: - case Execution: - case Friend: - case Function_Body: - case Global_Body: - case Namespace_Body: - case Operator_Member: - case Operator_Member_Fwd: - case Parameters: - case Specifiers: - case Struct_Body: - case Typename: + AST_BODY_NAMESPACE_UNALLOWED_TYPES def_body_code_validation_end( def_namespace_body ); va_end(va); @@ -2057,22 +2055,7 @@ namespace gen result->Type = Global_Body; def_body_code_validation_start( def_namespace_body, *codes; codes++ ); - case Access_Public: - case Access_Protected: - case Access_Private: - case Class_Body: - case Enum_Body: - case Execution: - case Friend: - case Function_Body: - case Global_Body: - case Namespace_Body: - case Operator_Member: - case Operator_Member_Fwd: - case Parameters: - case Specifiers: - case Struct_Body: - case Typename: + AST_BODY_NAMESPACE_UNALLOWED_TYPES def_body_code_validation_end( def_namespace_body ); result.lock(); @@ -2219,19 +2202,7 @@ namespace gen va_list va; va_start(va, num); def_body_code_validation_start( def_struct_body, va_arg(va, Code) ); - case Enum_Body: - case Execution: - case Function_Body: - case Global_Body: - case Namespace: - case Namespace_Body: - case Operator: - case Operator_Fwd: - case Parameters: - case Specifiers: - case Struct_Body: - case Typename: - case Using_Namespace: + AST_BODY_STRUCT_UNALLOWED_TYPES def_body_code_validation_end( def_struct_body ); va_end(va); @@ -2248,28 +2219,16 @@ namespace gen result->Type = Struct_Body; def_body_code_validation_start( def_struct_body, *codes; codes++ ); - case Enum_Body: - case Execution: - case Function_Body: - case Global_Body: - case Namespace: - case Namespace_Body: - case Operator: - case Operator_Fwd: - case Parameters: - case Specifiers: - case Struct_Body: - case Typename: - case Using_Namespace: + AST_BODY_STRUCT_UNALLOWED_TYPES def_body_code_validation_end( def_struct_body ); result.lock(); return result; } -# pragma endregion Upfront Constructors +#pragma endregion Upfront Constructors -# pragma region Incremetnal Constructors -# ifdef GEN_FEATURE_INCREMENTAL +#pragma region Incremetnal Constructors +#ifdef GEN_FEATURE_INCREMENTAL Code make_class( s32 length, char const* name, Code parent, Code specifiers ) { using namespace ECode; @@ -2501,11 +2460,15 @@ namespace gen return result; } -# endif // GEN_FEATURE_INCREMENTAL -# pragma endregion Incremetnal Constructions -# pragma region Parsing Constructors -# ifdef GEN_FEATURE_PARSING +# undef name_check +# undef null_check +# undef null_or_invalid_check +#endif // GEN_FEATURE_INCREMENTAL +#pragma endregion Incremetnal Constructions + +#pragma region Parsing Constructors +#ifdef GEN_FEATURE_PARSING /* These constructors are the most implementation intensive other than the edtior or scanner. @@ -2518,428 +2481,399 @@ namespace gen It uses the upfront constructors to help keep code from getitng to large since the target is to keep the sloc low */ -# pragma region Helper Macros -# define check_parse_args( func, length, def ) \ - if ( length <= 0 ) \ - { \ - log_failure( "gen::" txt(func) ": length must greater than 0" ); \ - return Code::Invalid; \ - } \ - if ( def == nullptr ) \ - { \ - log_failure( "gen::" txt(func) ": def was null" ); \ - return Code::Invalid; \ - } - -/* - These macros are used to make the parsing code more readable. -*/ -# define curr_tok ( * tokens ) - -# define eat( Type_ ) \ - if ( curr_tok.Type != Type_ ) \ - { \ - String token_str = string_make_length( g_allocator, curr_tok.Text, curr_tok.Length ); \ - log_failure( "gen::" txt(context) ": expected %s, got %s", txt(Type_), curr_tok.Type ); \ - return Code::Invalid; \ - } \ - tokIDX++; \ - left-- -# pragma endregion Helper Macros - -# pragma region Lexer -/* - This is a simple lexer that focuses on tokenizing only tokens relevant to the library. - It will not be capable of lexing C++ code with unsupported features. -*/ - -// Angle brackets not supported as they are used for template arguments outside of expressions -// Any angle brackets found will be considered an operator token. - -# define Define_TokType \ - Entry( Access_Public, "public" ) \ - Entry( Access_Protected, "protected" ) \ - Entry( Access_Private, "private" ) \ - Entry( Access_MemberSymbol, "." ) \ - Entry( Access_StaticSymbol, "::") \ - Entry( Ampersand, "&" ) \ - Entry( Ampersand_DBL, "&&" ) \ - Entry( Assign_Classifer, ":" ) \ - Entry( BraceCurly_Open, "{" ) \ - Entry( BraceCurly_Close, "}" ) \ - Entry( BraceSquare_Open, "[" ) \ - Entry( BraceSquare_Close, "]" ) \ - Entry( Capture_Start, "(" ) \ - Entry( Capture_End, ")" ) \ - Entry( Comment, "__comment__" ) \ - Entry( Char, "__char__" ) \ - Entry( Comma, "," ) \ - Entry( Decl_Class, "class" ) \ - Entry( Decl_Enum, "enum" ) \ - Entry( Decl_Friend, "friend" ) \ - Entry( Decl_Namespace, "namespace" ) \ - Entry( Decl_Struct, "struct" ) \ - Entry( Decl_Typedef, "typedef" ) \ - Entry( Decl_Using, "using" ) \ - Entry( Decl_Union, "union" ) \ - Entry( Identifier, "__SymID__" ) \ - Entry( Number, "number" ) \ - Entry( Operator, "operator" ) \ - Entry( Spec_API, txt(API_Keyword) ) \ - Entry( Spec_Alignas, "alignas" ) \ - Entry( Spec_CLinkage, "extern \"C\"" ) \ - Entry( Spec_Const, "const" ) \ - Entry( Spec_Consteval, "consteval" ) \ - Entry( Spec_Constexpr, "constexpr" ) \ - Entry( Spec_Constinit, "constinit" ) \ - Entry( Spec_Export, "export" ) \ - Entry( Spec_Extern, "extern" ) \ - Entry( Spec_Import, "import" ) \ - Entry( Spec_Inline, "inline" ) \ - Entry( Spec_Module, "module" ) \ - Entry( Spec_Static, "static" ) \ - Entry( Spec_ThreadLocal, "thread_local" ) \ - Entry( Spec_Volatile, "volatile") \ - Entry( Star, "*" ) \ - Entry( Statement_End, ";" ) \ - Entry( String, "__String__" ) \ - Entry( Type_Unsigned, "unsigned" ) \ - Entry( Type_Signed, "signed" ) \ - Entry( Type_Short, "short" ) \ - Entry( Type_Long, "long" ) - - enum class TokType : u32 + namespace Parser { -# define Entry( Name_, Str_ ) Name_, - Define_TokType -# undef Entry - Num, - Invalid - }; + /* + This is a simple lexer that focuses on tokenizing only tokens relevant to the library. + It will not be capable of lexing C++ code with unsupported features. + */ - struct Token - { - char const* Text; - s32 Length; - TokType Type; - }; + // Angle brackets not supported as they are used for template arguments outside of expressions + // Any angle brackets found will be considered an operator token. - TokType get_token_type( char const* word, s32 length ) - { - local_persist - char const* lookup[(u32)TokType::Num] = + # define Define_TokType \ + Entry( Access_Public, "public" ) \ + Entry( Access_Protected, "protected" ) \ + Entry( Access_Private, "private" ) \ + Entry( Access_MemberSymbol, "." ) \ + Entry( Access_StaticSymbol, "::") \ + Entry( Ampersand, "&" ) \ + Entry( Ampersand_DBL, "&&" ) \ + Entry( Assign_Classifer, ":" ) \ + Entry( BraceCurly_Open, "{" ) \ + Entry( BraceCurly_Close, "}" ) \ + Entry( BraceSquare_Open, "[" ) \ + Entry( BraceSquare_Close, "]" ) \ + Entry( Capture_Start, "(" ) \ + Entry( Capture_End, ")" ) \ + Entry( Comment, "__comment__" ) \ + Entry( Char, "__char__" ) \ + Entry( Comma, "," ) \ + Entry( Decl_Class, "class" ) \ + Entry( Decl_Enum, "enum" ) \ + Entry( Decl_Friend, "friend" ) \ + Entry( Decl_Namespace, "namespace" ) \ + Entry( Decl_Struct, "struct" ) \ + Entry( Decl_Typedef, "typedef" ) \ + Entry( Decl_Using, "using" ) \ + Entry( Decl_Union, "union" ) \ + Entry( Identifier, "__SymID__" ) \ + Entry( Number, "number" ) \ + Entry( Operator, "operator" ) \ + Entry( Spec_API, txt(API_Keyword) ) \ + Entry( Spec_Alignas, "alignas" ) \ + Entry( Spec_CLinkage, "extern \"C\"" ) \ + Entry( Spec_Const, "const" ) \ + Entry( Spec_Consteval, "consteval" ) \ + Entry( Spec_Constexpr, "constexpr" ) \ + Entry( Spec_Constinit, "constinit" ) \ + Entry( Spec_Export, "export" ) \ + Entry( Spec_Extern, "extern" ) \ + Entry( Spec_Import, "import" ) \ + Entry( Spec_Inline, "inline" ) \ + Entry( Spec_Module, "module" ) \ + Entry( Spec_Static, "static" ) \ + Entry( Spec_ThreadLocal, "thread_local" ) \ + Entry( Spec_Volatile, "volatile") \ + Entry( Star, "*" ) \ + Entry( Statement_End, ";" ) \ + Entry( String, "__String__" ) \ + Entry( Type_Unsigned, "unsigned" ) \ + Entry( Type_Signed, "signed" ) \ + Entry( Type_Short, "short" ) \ + Entry( Type_Long, "long" ) + + enum class TokType : u32 { -# define Entry( Name_, Str_ ) Str_, + # define Entry( Name_, Str_ ) Name_, Define_TokType -# undef Entry + # undef Entry + + Num, + Invalid }; - for ( u32 index = 0; index < (u32)TokType::Num; index++ ) + struct Token { - if ( str_compare( word, lookup[index], length ) == 0 ) - return scast(TokType, index); - } + char const* Text; + sptr Length; + TokType Type; + }; - return TokType::Invalid; - } - - inline - bool tok_is_specifier( Token const& tok ) - { - return tok.Type >= TokType::Spec_API && tok.Type <= TokType::Spec_Volatile; - } - - Arena LexAllocator; - - Array(Token) lex( s32 length, char const* content) - { -# define current ( * scanner ) - -# define move_forward() \ - left--; \ - scanner++ - -# define SkipWhitespace() \ - while ( left && char_is_space( current ) ) \ - { \ - move_forward(); \ - } - -# define SkipWhitespace_Checked( Context_, Msg_, ... ) \ - while ( left && char_is_space( current ) ) \ - { \ - move_forward(); \ - } \ - if ( left <= 0 ) \ - { \ - log_failure( "gen::" txt(Context_) ": " Msg_, __VA_ARGS__ ); \ - return Code::Invalid; \ - } - - do_once_start - arena_init_from_allocator( & LexAllocator, heap(), megabytes(10) ); - - if ( LexAllocator.physical_start == nullptr ) + TokType get_tok_type( char const* word, s32 length ) + { + local_persist + char const* lookup[(u32)TokType::Num] = { - log_failure( "gen::lex: failed to allocate memory for parsing constructor's lexer"); - return nullptr; + # define Entry( Name_, Str_ ) Str_, + Define_TokType + # undef Entry + }; + + for ( u32 index = 0; index < (u32)TokType::Num; index++ ) + { + if ( str_compare( word, lookup[index], length ) == 0 ) + return scast(TokType, index); } - do_once_end - local_persist thread_local - Array(Token) Tokens = nullptr; - - s32 left = length; - char const* scanner = content; - - char const* word = scanner; - s32 word_length = 0; - - SkipWhitespace(); - if ( left <= 0 ) - { - log_failure( "gen::lex: no tokens found (only whitespace provided)" ); - return Tokens; + return TokType::Invalid; } - if ( Tokens ) - array_clear( Tokens ); - - array_init_reserve( Tokens, arena_allocator( & LexAllocator), length / 8 ); - - while (left ) + char const* str_tok_ype( TokType type ) { - Token token = { nullptr, 0, TokType::Invalid }; - - switch ( current ) + local_persist + char const* lookup[(u32)TokType::Num] = { - case '.': - token.Text = scanner; - token.Length = 1; - token.Type = TokType::Access_MemberSymbol; + # define Entry( Name_, Str_ ) Str_, + Define_TokType + # undef Entry + }; - if (left) - move_forward(); - goto FoundToken; + return lookup[(u32)type]; + } - case '&' : - token.Text = scanner; - token.Length = 1; - token.Type = TokType::Ampersand; + inline + bool tok_is_specifier( Token const& tok ) + { + return tok.Type >= TokType::Spec_API && tok.Type <= TokType::Spec_Volatile; + } - if (left) - move_forward(); + # undef Define_TokType - if ( current == '&' ) // && - { - token.Length = 2; - token.Type = TokType::Ampersand_DBL; + Arena LexAllocator; + + struct TokArray + { + s32 Idx; + Array(Token) Arr; + + inline + bool __eat( TokType type, char const* context ) + { + if ( Arr[0].Type != type ) + { + String token_str = string_make_length( g_allocator, Arr[Idx].Text, Arr[Idx].Length ); + log_failure( "gen::%s: expected %s, got %s", context, str_tok_ype(type), str_tok_ype(Arr[Idx].Type) ); + return Code::Invalid; + } + + Idx++; + } + }; + + TokArray lex( s32 length, char const* content) + { + # define current ( * scanner ) + + # define move_forward() \ + left--; \ + scanner++ + + # define SkipWhitespace() \ + while ( left && char_is_space( current ) ) \ + { \ + move_forward(); \ + } + + # define SkipWhitespace_Checked( Context_, Msg_, ... ) \ + while ( left && char_is_space( current ) ) \ + { \ + move_forward(); \ + } \ + if ( left <= 0 ) \ + { \ + log_failure( "gen::" txt(Context_) ": " Msg_, __VA_ARGS__ ); \ + return { 0, nullptr }; \ + } + + do_once_start + arena_init_from_allocator( & LexAllocator, heap(), megabytes(10) ); + + if ( LexAllocator.physical_start == nullptr ) + { + log_failure( "gen::lex: failed to allocate memory for parsing constructor's lexer"); + return { 0, nullptr }; + } + do_once_end + + local_persist thread_local + Array(Token) Tokens = nullptr; + + s32 left = length; + char const* scanner = content; + + char const* word = scanner; + s32 word_length = 0; + + SkipWhitespace(); + if ( left <= 0 ) + { + log_failure( "gen::lex: no tokens found (only whitespace provided)" ); + return { 0, nullptr }; + } + + if ( Tokens ) + array_clear( Tokens ); + + array_init_reserve( Tokens, arena_allocator( & LexAllocator), length / 8 ); + + while (left ) + { + Token token = { nullptr, 0, TokType::Invalid }; + + switch ( current ) + { + case '.': + token.Text = scanner; + token.Length = 1; + token.Type = TokType::Access_MemberSymbol; if (left) move_forward(); - } + goto FoundToken; - goto FoundToken; + case '&' : + token.Text = scanner; + token.Length = 1; + token.Type = TokType::Ampersand; - case ':': - token.Text = scanner; - token.Length = 1; - token.Type = TokType::Assign_Classifer; - - if (left) - move_forward(); - - if ( current == ':' ) - { - move_forward(); - token.Type = TokType::Access_StaticSymbol; - token.Length++; - } - goto FoundToken; - - case '{': - token.Text = scanner; - token.Length = 1; - token.Type = TokType::BraceCurly_Open; - goto FoundToken; - - case '}': - token.Text = scanner; - token.Length = 1; - token.Type = TokType::BraceCurly_Close; - goto FoundToken; - - case '[': - token.Text = scanner; - token.Length = 1; - token.Type = TokType::BraceSquare_Open; - if ( left ) - { - move_forward(); - - if ( current == ']' ) - { - token.Length = 2; - token.Type = TokType::Operator; + if (left) move_forward(); - } - } - goto FoundToken; - case ']': - token.Text = scanner; - token.Length = 1; - token.Type = TokType::BraceSquare_Close; - goto FoundToken; - - case '(': - token.Text = scanner; - token.Length = 1; - token.Type = TokType::Capture_Start; - if ( left ) - { - move_forward(); - - if ( current == ')' ) + if ( current == '&' ) // && { - token.Length = 2; - token.Type = TokType::Operator; - move_forward(); - } - } - goto FoundToken; + token.Length = 2; + token.Type = TokType::Ampersand_DBL; - case ')': - token.Text = scanner; - token.Length = 1; - token.Type = TokType::Capture_End; - goto FoundToken; - - case '\'': - token.Text = scanner; - token.Length = 1; - token.Type = TokType::Char; - - move_forward(); - - while ( left && current != '\'' ) - { - move_forward(); - token.Length++; - } - - if ( left ) - { - move_forward(); - token.Length++; - } - goto FoundToken; - - case ',': - token.Text = scanner; - token.Length = 1; - token.Type = TokType::Comma; - goto FoundToken; - - case '*': - token.Text = scanner; - token.Length = 1; - token.Type = TokType::Star; - goto FoundToken; - - case ';': - token.Text = scanner; - token.Length = 1; - token.Type = TokType::Statement_End; - goto FoundToken; - - case '"': - token.Text = scanner; - token.Length = 1; - token.Type = TokType::String; - - move_forward(); - while ( left ) - { - if ( current == '"' ) - { - move_forward(); - break; - } - - if ( current == '\\' ) - { - move_forward(); - token.Length++; - - if ( left ) - { + if (left) move_forward(); - token.Length++; - } - continue; } - move_forward(); - token.Length++; - } - goto FoundToken; + goto FoundToken; - // All other operators we just label as an operator and move forward. - case '+': - case '%': - case '^': - case '~': - case '!': - case '=': - case '<': - case '>': - case '|': - token.Text = scanner; - token.Length = 1; - token.Type = TokType::Operator; - - if (left) - move_forward(); - - if ( current == '=' ) - { - token.Length++; + case ':': + token.Text = scanner; + token.Length = 1; + token.Type = TokType::Assign_Classifer; if (left) move_forward(); - } - else while ( left && current == *(scanner - 1) && length < 3 ) - { - token.Length++; - if (left) - move_forward(); - } - goto FoundToken; - - // Dash is unfortunatlly a bit more complicated... - case '-': - token.Text = scanner; - token.Length = 1; - token.Type = TokType::Operator; - if ( left ) - { - move_forward(); - - if ( current == '>' ) + if ( current == ':' ) { + move_forward(); + token.Type = TokType::Access_StaticSymbol; token.Length++; + } + goto FoundToken; + + case '{': + token.Text = scanner; + token.Length = 1; + token.Type = TokType::BraceCurly_Open; + goto FoundToken; + + case '}': + token.Text = scanner; + token.Length = 1; + token.Type = TokType::BraceCurly_Close; + goto FoundToken; + + case '[': + token.Text = scanner; + token.Length = 1; + token.Type = TokType::BraceSquare_Open; + if ( left ) + { move_forward(); - if ( current == '*' ) + if ( current == ']' ) { - token.Length++; + token.Length = 2; + token.Type = TokType::Operator; move_forward(); } } - else if ( current == '=' ) + goto FoundToken; + + case ']': + token.Text = scanner; + token.Length = 1; + token.Type = TokType::BraceSquare_Close; + goto FoundToken; + + case '(': + token.Text = scanner; + token.Length = 1; + token.Type = TokType::Capture_Start; + if ( left ) + { + move_forward(); + + if ( current == ')' ) + { + token.Length = 2; + token.Type = TokType::Operator; + move_forward(); + } + } + goto FoundToken; + + case ')': + token.Text = scanner; + token.Length = 1; + token.Type = TokType::Capture_End; + goto FoundToken; + + case '\'': + token.Text = scanner; + token.Length = 1; + token.Type = TokType::Char; + + move_forward(); + + while ( left && current != '\'' ) + { + move_forward(); + token.Length++; + } + + if ( left ) + { + move_forward(); + token.Length++; + } + goto FoundToken; + + case ',': + token.Text = scanner; + token.Length = 1; + token.Type = TokType::Comma; + goto FoundToken; + + case '*': + token.Text = scanner; + token.Length = 1; + token.Type = TokType::Star; + goto FoundToken; + + case ';': + token.Text = scanner; + token.Length = 1; + token.Type = TokType::Statement_End; + goto FoundToken; + + case '"': + token.Text = scanner; + token.Length = 1; + token.Type = TokType::String; + + move_forward(); + while ( left ) + { + if ( current == '"' ) + { + move_forward(); + break; + } + + if ( current == '\\' ) + { + move_forward(); + token.Length++; + + if ( left ) + { + move_forward(); + token.Length++; + } + continue; + } + + move_forward(); + token.Length++; + } + goto FoundToken; + + // All other operators we just label as an operator and move forward. + case '+': + case '%': + case '^': + case '~': + case '!': + case '=': + case '<': + case '>': + case '|': + token.Text = scanner; + token.Length = 1; + token.Type = TokType::Operator; + + if (left) + move_forward(); + + if ( current == '=' ) { token.Length++; @@ -2953,124 +2887,179 @@ namespace gen if (left) move_forward(); } - } - goto FoundToken; + goto FoundToken; - case '/': + // Dash is unfortunatlly a bit more complicated... + case '-': + token.Text = scanner; + token.Length = 1; + token.Type = TokType::Operator; + if ( left ) + { + move_forward(); + + if ( current == '>' ) + { + token.Length++; + move_forward(); + + if ( current == '*' ) + { + token.Length++; + move_forward(); + } + } + else if ( current == '=' ) + { + token.Length++; + + if (left) + move_forward(); + } + else while ( left && current == *(scanner - 1) && length < 3 ) + { + token.Length++; + + if (left) + move_forward(); + } + } + goto FoundToken; + + case '/': + token.Text = scanner; + token.Length = 1; + token.Type = TokType::Operator; + + if ( left ) + { + move_forward(); + + if ( current == '/' ) + { + token.Type = TokType::Comment; + + while ( left && current != '\n' ) + { + move_forward(); + token.Length++; + } + } + else if ( current == '*' ) + { + token.Type = TokType::Comment; + + while ( left && ( current != '*' || *(scanner + 1) != '/' ) ) + { + move_forward(); + token.Length++; + } + move_forward(); + move_forward(); + } + } + goto FoundToken; + } + + SkipWhitespace(); + if ( left <= 0 ) + break; + + if ( char_is_alpha( current ) || current == '_' ) + { token.Text = scanner; token.Length = 1; - token.Type = TokType::Operator; + move_forward(); - if ( left ) + while ( left && ( char_is_alphanumeric(current) || current == '_' ) ) { move_forward(); - - if ( current == '/' ) - { - token.Type = TokType::Comment; - - while ( left && current != '\n' ) - { - move_forward(); - token.Length++; - } - } - else if ( current == '*' ) - { - token.Type = TokType::Comment; - - while ( left && ( current != '*' || *(scanner + 1) != '/' ) ) - { - move_forward(); - token.Length++; - } - move_forward(); - move_forward(); - } + token.Length++; } + goto FoundToken; - } - - SkipWhitespace(); - if ( left <= 0 ) - break; - - if ( char_is_alpha( current ) || current == '_' ) - { - token.Text = scanner; - token.Length = 1; - move_forward(); - - while ( left && ( char_is_alphanumeric(current) || current == '_' ) ) + } + else { - move_forward(); - token.Length++; + String context_str = zpl::string_sprintf_buf( g_allocator, "%s", scanner, min( 100, left ) ); + + log_failure( "Failed to lex token %s", context_str ); + + // Skip to next whitespace since we can't know if anything else is valid until then. + while ( left && ! char_is_space( current ) ) + { + move_forward(); + } } - goto FoundToken; - } - else - { - String context_str = zpl::string_sprintf_buf( g_allocator, "%s", scanner, min( 100, left ) ); + FoundToken: - log_failure( "Failed to lex token %s", context_str ); - - // Skip to next whitespace since we can't know if anything else is valid until then. - while ( left && ! char_is_space( current ) ) + if ( token.Type != TokType::Invalid ) { - move_forward(); + array_append( Tokens, token ); + continue; + } + + TokType type = get_tok_type( token.Text, token.Length ); + + if ( type != TokType::Invalid ) + { + token.Type = type; + array_append( Tokens, token ); + } + else + { + // Its most likely an identifier... + + + String tok_str = zpl::string_sprintf_buf( g_allocator, "%s", token.Text, token.Length ); + + log_failure( "Failed to lex token %s", tok_str ); + + // Skip to next whitespace since we can't know if anything else is valid until then. + while ( left && ! char_is_space( current ) ) + { + move_forward(); + } } } - FoundToken: + return { 0, Tokens }; - if ( token.Type != TokType::Invalid ) - { - array_append( Tokens, token ); - continue; - } - - TokType type = get_token_type( token.Text, token.Length ); - - if ( type != TokType::Invalid ) - { - token.Type = type; - array_append( Tokens, token ); - } - else - { - // Its most likely an identifier... - - - String tok_str = zpl::string_sprintf_buf( g_allocator, "%s", token.Text, token.Length ); - - log_failure( "Failed to lex token %s", tok_str ); - - // Skip to next whitespace since we can't know if anything else is valid until then. - while ( left && ! char_is_space( current ) ) - { - move_forward(); - } - } + #undef current + #undef move_forward + #undef SkipWhitespace + #undef SkipWhitespace_Checked } - - return Tokens; - -# undef current -# undef move_forward -# undef SkipWhitespace -# undef SkipWhitespace_Checked } -# pragma endregion Lexer + +#pragma region Helper Macros +# define check_parse_args( func, length, def ) \ + if ( length <= 0 ) \ + { \ + log_failure( "gen::" txt(func) ": length must greater than 0" ); \ + return Code::Invalid; \ + } \ + if ( def == nullptr ) \ + { \ + log_failure( "gen::" txt(func) ": def was null" ); \ + return Code::Invalid; \ + } + +# define currtok toks.Arr[toks.Idx] +# define eat( Type_ ) toks.__eat( Type_, txt(context) ) +# define left array_count(toks.Arr) - toks.Idx +#pragma endregion Helper Macros Code parse_class( s32 length, char const* def ) { -# define context parse_class - Array(Token) tokens = lex( length, def ); + # define context parse_class + using namespace Parser; - if ( tokens == nullptr || array_count( tokens ) == 0 ) + TokArray toks = lex( length, def ); + if ( array_count( toks.Arr ) == 0 ) { - log_failure( "gen::parse_class: no tokens found" ); + log_failure( "gen::" txt(context) ": failed to lex tokens" ); return Code::Invalid; } @@ -3080,39 +3069,36 @@ namespace gen Code speciifes = { nullptr }; Code body = { nullptr }; - Token& curr_token = * tokens; - - s32 tokIDX = 0; - s32 left = array_count( tokens ); do { } while ( left--, left > 0 ); + + return Code::Invalid; + # undef context } Code parse_enum( s32 length, char const* def ) { # define context parse_enum check_parse_args( parse_enum, length, def ); + using namespace Parser; - Array(Token) tokens = lex( length, def ); - if ( tokens == nullptr ) + TokArray toks = lex( length, def ); + if ( array_count( toks.Arr ) == 0 ) { - log_failure( "gen::parse_enum: no tokens found for provided definition" ); + log_failure( "gen::" txt(context) ": failed to lex tokens" ); return Code::Invalid; } - s32 left = array_count( tokens ); - s32 tokIDX = 0; - SpecifierT specs_found[16] { ESpecifier::Num_Specifiers }; s32 num_specifiers = 0; - Token* name = nullptr; - Code array_expr = { nullptr }; - Code type = { nullptr }; - Token body = { nullptr, 0, TokType::Invalid }; + Token name = { nullptr, 0, TokType::Invalid }; + Code array_expr = { nullptr }; + Code type = { nullptr }; + Token body = { nullptr, 0, TokType::Invalid }; char entries_code[ kilobytes(128) ] { 0 }; s32 entries_length = 0; @@ -3121,46 +3107,41 @@ namespace gen eat( TokType::Decl_Enum ); - if ( curr_tok.Type == TokType::Decl_Class ) + if ( currtok.Type == TokType::Decl_Class ) { eat( TokType::Decl_Class); is_enum_class = true; } - if ( curr_tok.Type != TokType::Identifier ) + if ( currtok.Type != TokType::Identifier ) { log_failure( "gen::parse_enum: expected identifier for enum name" ); return Code::Invalid; } - name = tokens; + name = currtok; eat( TokType::Identifier ); - if ( curr_tok.Type == TokType::Assign_Classifer ) + if ( currtok.Type == TokType::Assign_Classifer ) { eat( TokType::Assign_Classifer ); - s32 left_length = curr_tok.Length + curr_tok.Text - array_front(tokens).Text; - - type = parse_type( curr_tok.Length , curr_tok.Text ); + type = parse_type( toks, txt(parse_enum) ); if ( type == Code::Invalid ) - { - log_failure( "gen::parse_enum: failed to parse enum type" ); return Code::Invalid; - } } - if ( curr_tok.Type == TokType::BraceCurly_Open ) + if ( currtok.Type == TokType::BraceCurly_Open ) { eat( TokType::BraceCurly_Open ); - body = curr_tok; + body = currtok; - while ( curr_tok.Type != TokType::BraceCurly_Close ) + while ( currtok.Type != TokType::BraceCurly_Close ) { - body.Length += curr_tok.Length; + body.Length += currtok.Length; - eat( curr_tok.Type ); + eat( currtok.Type ); } eat( TokType::BraceCurly_Close ); @@ -3189,7 +3170,7 @@ namespace gen result->Type = is_enum_class ? Enum_Class_Fwd : Enum_Fwd; } - result->Name = get_cached_string( name->Text, name->Length ); + result->Name = get_cached_string( name.Text, name.Length ); if ( type ) result->add_entry( type ); @@ -3206,19 +3187,18 @@ namespace gen Code parse_friend( s32 length, char const* def ) { + using namespace Parser; + # define context parse_friend check_parse_args( parse_friend, length, def ); - Array(Token) tokens = lex( length, def ); - if ( tokens == nullptr ) + TokArray toks = lex( length, def ); + if ( array_count( toks.Arr ) == 0 ) { - log_failure( "gen::parse_friend: no tokens found for provided definition" ); + log_failure( "gen::" txt(context) ": failed to lex tokens" ); return Code::Invalid; } - s32 left = array_count( tokens ); - s32 tokIDX = 0; - eat( TokType::Decl_Friend ); // This can either be a simple type, or a function declaration. @@ -3244,6 +3224,8 @@ namespace gen Code parse_function( s32 length, char const* def ) { + using namespace Parser; + check_parse_args( parse_function, length, def ); Arena mem; @@ -3257,7 +3239,7 @@ namespace gen u8 num_specifiers; // Making all significant tokens have a max length of 128 for this parser. - ct sw LengthID = 128; + constexpr sw LengthID = 128; struct Param { @@ -3321,7 +3303,7 @@ namespace gen u8 num_specifiers; // Making all significant tokens have a max length of 128 for this parser. - ct sw LengthID = 128; + constexpr sw LengthID = 128; char const name [LengthID] { 0 }; char const parent[LengthID] { 0 }; @@ -3331,106 +3313,125 @@ namespace gen Code parse_variable( s32 length, char const* def ) { - not_implemented( parse_variable ); + # define context parse_variable + check_parse_args( parse_variable, length, def ); + using namespace Parser; + + TokArray toks = lex( length, def ); + if ( array_count( toks.Arr ) == 0 ) + { + log_failure( "gen::" txt(context) ": failed to lex tokens" ); + return Code::Invalid; + } + + Token* name = nullptr; + + Code type = parse_type( toks, txt(parse_variable) ); + + if ( type == Code::Invalid ) + return Code::Invalid; + + if ( currtok.Type != TokType::Identifier ) + { + log_failure( "gen::parse_variable: expected identifier, recieved " txt(currtok.Type) ); + return Code::Invalid; + } + + name = & currtok; + eat( TokType::Identifier ); + + return Code::Invalid; + # undef context } - inline - bool parse_type_helper( char const* func_name - , Token& name - , s32& left, s32& tokIDX, Array(Token)& tokens - , s32& num_specifiers, SpecifierT* specs_found - , Code& array_expr - ) + Code parse_type( Parser::TokArray& toks, char const* func_name ) { -# pragma push_macro( "eat" ) -# undef eat -# define eat( Type_ ) \ - if ( curr_tok.Type != Type_ ) \ - { \ - String token_str = string_make_length( g_allocator, curr_tok.Text, curr_tok.Length ); \ - log_failure( "gen::" txt(context) ": expected %s, got %s", txt(Type_), curr_tok.Type ); \ - return Code::Invalid; \ - } \ - tokIDX++; \ - left-- + # define context parse_type + using namespace Parser; - while ( left && tok_is_specifier( curr_tok ) ) + SpecifierT specs_found[16] { ESpecifier::Num_Specifiers }; + s32 num_specifiers = 0; + + Token name = { nullptr, 0, TokType::Invalid }; + Code array_expr = { nullptr }; + + while ( left && tok_is_specifier( currtok ) ) { - SpecifierT spec = ESpecifier::to_type( curr_tok.Text, curr_tok.Length ); + SpecifierT spec = ESpecifier::to_type( currtok.Text, currtok.Length ); if ( spec != ESpecifier::Const && spec < ESpecifier::Type_Signed ) { - log_failure( "%s: Error, invalid specifier used in type definition: %s", func_name, curr_tok.Text ); - return false; + log_failure( "gen::parse_type: Error, invalid specifier used in type definition: %s", currtok.Text ); + return Code::Invalid; } specs_found[num_specifiers] = spec; num_specifiers++; - eat( curr_tok.Type ); + eat( currtok.Type ); } if ( left == 0 ) { log_failure( "%s: Error, unexpected end of type definition", func_name ); - return false; + return Code::Invalid; } - if ( curr_tok.Type == TokType::Decl_Class - || curr_tok.Type == TokType::Decl_Struct ) + if ( currtok.Type == TokType::Decl_Class + || currtok.Type == TokType::Decl_Struct ) { - name = curr_tok; - eat( curr_tok.Type ); + name = currtok; + eat( currtok.Type ); - name.Length += curr_tok.Length; + name.Length = ( (sptr)currtok.Text + currtok.Length ) - (sptr)name.Text; eat( TokType::Identifier ); } else { - name = curr_tok; + name = currtok; eat( TokType::Identifier ); } - while ( left && tok_is_specifier( curr_tok ) ) + while ( left && tok_is_specifier( currtok ) ) { - SpecifierT spec = ESpecifier::to_type( curr_tok.Text, curr_tok.Length ); + SpecifierT spec = ESpecifier::to_type( currtok.Text, currtok.Length ); if ( spec != ESpecifier::Const && spec != ESpecifier::Ref && spec != ESpecifier::RValue && spec < ESpecifier::Type_Signed ) { - log_failure( "%s: Error, invalid specifier used in type definition: %s", func_name, curr_tok.Text ); - return false; + log_failure( "%s: Error, invalid specifier used in type definition: %s", func_name, currtok.Text ); + return Code::Invalid; } specs_found[num_specifiers] = spec; num_specifiers++; - eat( curr_tok.Type ); + eat( currtok.Type ); } - if ( left && curr_tok.Type == TokType::BraceSquare_Open ) + if ( left && currtok.Type == TokType::BraceSquare_Open ) { eat( TokType::BraceSquare_Open ); if ( left == 0 ) { log_failure( "%s: Error, unexpected end of type definition", func_name ); - return false; + return Code::Invalid; } - if ( curr_tok.Type == TokType::BraceSquare_Close ) + if ( currtok.Type == TokType::BraceSquare_Close ) { eat( TokType::BraceSquare_Close ); - return true; + return Code::Invalid; } Token - untyped_tok = curr_tok; + untyped_tok = currtok; - while ( left && curr_tok.Type != TokType::BraceSquare_Close ) + while ( left && currtok.Type != TokType::BraceSquare_Close ) { - untyped_tok.Length += curr_tok.Length; + untyped_tok.Length += currtok.Length; } array_expr = untyped_str( untyped_tok.Length, untyped_tok.Text ); @@ -3438,59 +3439,24 @@ namespace gen if ( left == 0 ) { log_failure( "%s: Error, unexpected end of type definition", func_name ); - return false; + return Code::Invalid; } - if ( curr_tok.Type != TokType::BraceSquare_Close ) + if ( currtok.Type != TokType::BraceSquare_Close ) { log_failure( "%s: Error, expected ] in type definition", func_name ); - return false; + return Code::Invalid; } eat( TokType::BraceSquare_Close ); } - return true; -# pragma pop_macro( "eat" ) - } - - Code parse_type( s32 length, char const* def ) - { - check_parse_args( parse_type, length, def ); - - Array(Token) tokens = lex( length, def ); - - if ( tokens == nullptr ) - { - log_failure( "gen::parse_type: no tokens found for provided definition" ); - return Code::Invalid; - } - - s32 left = array_count( tokens ); - s32 tokIDX = 0; - - Token* name = nullptr; - Code array_expr = { nullptr }; - - SpecifierT specs_found[16] { ESpecifier::Num_Specifiers }; - s32 num_specifiers = 0; - - bool helper_result = parse_type_helper( txt(parse_type) - , * name - , left, tokIDX, tokens - , num_specifiers, specs_found - , array_expr - ); - - if ( ! helper_result ) - return Code::Invalid; - using namespace ECode; Code result = make_code(); result->Type = Typename; - result->Name = get_cached_string( name->Text, name->Length ); + result->Name = get_cached_string( name.Text, name.Length ); if (num_specifiers) { @@ -3504,52 +3470,58 @@ namespace gen result.lock(); return result; + # undef context + } + + Code parse_type( s32 length, char const* def ) + { + # define context parse_type + check_parse_args( parse_type, length, def ); + + using namespace Parser; + TokArray toks = lex( length, def ); + if ( array_count( toks.Arr ) == 0 ) + { + log_failure( "gen::" txt(context) ": failed to lex tokens" ); + return Code::Invalid; + } + + Code result = parse_type( toks, txt(parse_type) ); + return result; + # undef context } Code parse_typedef( s32 length, char const* def ) { -# define context parse_typedef + # define context parse_typedef check_parse_args( parse_typedef, length, def ); - Array(Token) tokens = lex( length, def ); - - if ( tokens == nullptr ) + using namespace Parser; + TokArray toks = lex( length, def ); + if ( array_count( toks.Arr ) == 0 ) { - log_failure( "gen::parse_typedef: no tokens found for provided definition" ); + log_failure( "gen::" txt(context) ": failed to lex tokens" ); return Code::Invalid; } - s32 left = array_count( tokens ); - s32 tokIDX = 0; - - Token* name = nullptr; - Code array_expr = { nullptr }; - Code type = { nullptr }; + Token name = { nullptr, 0, TokType::Invalid }; + Code array_expr = { nullptr }; + Code type = { nullptr }; SpecifierT specs_found[16] { ESpecifier::Num_Specifiers }; s32 num_specifiers = 0; eat( TokType::Decl_Typedef ); - bool helper_result = parse_type_helper( txt(parse_typedef) - , * name - , left, tokIDX, tokens - , num_specifiers, specs_found - , array_expr - ); + type = parse_type( toks, txt(parse_typedef) ); - if ( ! helper_result ) - return Code::Invalid; - - type = def_type( name->Length, name->Text, def_specifiers( num_specifiers, specs_found ) ); - - if ( curr_tok.Type != TokType::Identifier ) + if ( currtok.Type != TokType::Identifier ) { log_failure( "gen::parse_typedef: Error, expected identifier for typedef" ); return Code::Invalid; } - name = tokens; + name = currtok; eat( TokType::Identifier ); eat( TokType::Statement_End ); @@ -3559,31 +3531,28 @@ namespace gen Code result = make_code(); result->Type = Typedef; - result->Name = get_cached_string( name->Text, name->Length ); + result->Name = get_cached_string( name.Text, name.Length ); result->add_entry( type ); result.lock(); return result; -# undef context + # undef context } Code parse_using( s32 length, char const* def ) { -# define context parse_using + # define context parse_using check_parse_args( parse_using, length, def ); - Array(Token) tokens = lex( length, def ); - - if ( tokens == nullptr ) + using namespace Parser; + TokArray toks = lex( length, def ); + if ( array_count( toks.Arr ) == 0 ) { - log_failure( "gen::parse_using: no tokens found for provided definition" ); + log_failure( "gen::" txt(context) ": failed to lex tokens" ); return Code::Invalid; } - s32 left = array_count( tokens ); - s32 tokIDX = 0; - SpecifierT specs_found[16] { ESpecifier::Num_Specifiers }; s32 num_specifiers = 0; @@ -3595,7 +3564,7 @@ namespace gen eat( TokType::Decl_Using ); - if ( curr_tok.Type == TokType::Decl_Namespace ) + if ( currtok.Type == TokType::Decl_Namespace ) { is_namespace = true; eat( TokType::Decl_Namespace ); @@ -3603,7 +3572,7 @@ namespace gen eat( TokType::Identifier ); - if ( curr_tok.Type != TokType::Statement_End ) + if ( currtok.Type != TokType::Statement_End ) { if ( is_namespace ) { @@ -3611,17 +3580,7 @@ namespace gen return Code::Invalid; } - bool helper_result = parse_type_helper( txt(parse_using) - , * name - , left, tokIDX, tokens - , num_specifiers, specs_found - , array_expr - ); - - if ( ! helper_result ) - return Code::Invalid; - - type = def_type( name->Length, name->Text, def_specifiers( num_specifiers, specs_found ) ); + type = parse_type( toks, txt(parse_using) ); } eat( TokType::Statement_End ); @@ -3637,10 +3596,9 @@ namespace gen result.lock(); return result; -# undef context + # undef context } - s32 parse_classes( s32 length, char const* class_defs, Code* out_class_codes ) { not_implemented( parse_classes ); @@ -3690,11 +3648,105 @@ namespace gen { not_implemented( parse_usings ); } + + // Undef helper macros +# undef check_parse_args +# undef curr_tok +# undef eat +# undef left // End GEN_FEATURE_PARSING # endif -# pragma endregion Parsing Constructors +#pragma endregion Parsing Constructors + +#pragma region Untyped Constructors + struct TokEntry + { + char const* Str; + sw Length; + }; + + ZPL_TABLE( static, TokMap, tokmap_, TokEntry ) + + sw token_fmt_va( char* buf, uw buf_size, char const* fmt, s32 num_tokens, va_list va ) + { + char const* buf_begin = buf; + sw remaining = buf_size; + + TokMap tok_map; + { + tokmap_init( & tok_map, g_allocator ); + + s32 left = num_tokens; + + while ( left-- ) + { + char const* token = va_arg( va, char const* ); + char const* value = va_arg( va, char const* ); + + TokEntry entry + { + value, + str_len(value, (sw)128) + }; + + u32 key = crc32( token, str_len(token, 32) ); + + tokmap_set( & tok_map, key, entry ); + } + } + + sw result = 0; + char current = *fmt; + + while ( current ) + { + sw len = 0; + + while ( current && current != '{' && remaining ) + { + * buf = * fmt; + buf++; + fmt++; + + current = * fmt; + } + + if ( current == '{' ) + { + char const* scanner = fmt; + + s32 tok_len = 0; + + while ( *scanner != '}' ) + { + tok_len++; + scanner++; + } + + char const* token = fmt; + + u32 key = crc32( token, tok_len ); + TokEntry value = * tokmap_get( & tok_map, key ); + sw left = value.Length; + + while ( left-- ) + { + * buf = *value.Str; + buf++; + value.Str++; + } + + scanner++; + fmt = scanner; + current = * fmt; + } + } + + tokmap_clear( & tok_map ); + + return result; + } -# pragma region Untyped Constructors Code untyped_str( s32 length, char const* str ) { Code @@ -3719,7 +3771,7 @@ namespace gen Code result = make_code(); - result->Name = get_cached_string( fmt, strnlen(fmt, MaxNameLength) ); + result->Name = get_cached_string( fmt, str_len(fmt, MaxNameLength) ); result->Type = ECode::Untyped; result->Content = get_cached_string( buf, length ); @@ -3746,7 +3798,7 @@ namespace gen result.lock(); return result; } -# pragma endregion Untyped Constructors +#pragma endregion Untyped Constructors #pragma endregion Gen Interface #pragma region Builder @@ -3789,6 +3841,10 @@ namespace gen This is a more robust lexer than the ones used for the lexer in the parse constructors interface. Its needed to scan a C++ file and have awareness to skip content unsupported by the library. */ +struct FileLexer +{ + +}; #endif #pragma endregion File Lexer diff --git a/project/gen.hpp b/project/gen.hpp index fe1dc3d..35e32ba 100644 --- a/project/gen.hpp +++ b/project/gen.hpp @@ -1,384 +1,16 @@ /* - gencpp: A simple staged metaprogramming library for C++. + gencpp: An attempt at simple staged metaprogramming for c/c++. - The library is mostly a compositon of code element constructors. - These build up a code AST to then serialize with a file builder. + See Readme.md for more information from the project repository. - This library is intended for small-to midsize projects. - - AST type checking supports only a small subset of c++. - See the 'ECode' namespace and 'gen API' region to see what is supported. - - ### *WHAT IS NOT PROVIDED* - - This library aims to be used in a "orthodox" or minimal C++ workspace. - - * Macro or template generation : This library is to avoid those, adding support for them adds unnecessary complexity. - * Vendor provided dynamic dispatch (virtuals) : Roll your own, this library might roll its own vtable/interface generation helpers in the future. - * RTTI - * Exceptions - * Execution statement validation : Execution expressions are defined using the untyped string API. - - Keywords in from "Modern C++": - Specifiers: - * constexpr : Great to store compile-time constants, (easier to garanteed when emitted from gentime) - * consteval : Technically fine so long as templates are not used. Need to make sure to execute in moderation. - * constinit : Better than constexpr at doing its job, however, its only c++ 20. - * export : Useful if c++ modules ever come around to actually being usable. - * import : ^^ - * module : ^^ - - These features are in as they either are not horrible when used conservatively or are a performance benefit (modules). - - #### When it comes to expressions: - - There is no support for validating expressions. - The reason: thats where the can of worms open for parsing validation. This library would most likey more than double in size with that addition alone. - For most metaprogramming (espcially for C/C++), expression validation is not necessary for metaprogramming, it can be done by the compiler for the runtime program. - Most of the time, the critical complex metaprogramming conundrums are actaully producing the frame of abstractions around the expressions. - Thus its not very much a priority to add such a level of complexity to the library when there isn't a high reward or need for it. - - To further this point, lets say you do have an error with an expressions composition. - It will either be caught by the c++ compiler when compiling the target program, or at runtime for the program. - - * If its not caught by the compiler, the only downside is the error appers on the generated function. - Those with knowledge of how that definition was generated know where to find the code that inlined that expression in that file for that definition. - * If its caught at runtime. The expression will be shown in a stack trace if debug symbols are enabled in the generated function body. - Yet again those with knowledge of how that definition was generated know where to find the code that inlined that expression. - - In both these cases the user will get objectively better debug information than you would normally get on most c++ compilers/editors using complex macros or templates. - - ### The Data & Interface: - - The AST is managed by the library and provided the user via its interface prodedures. - - Notes: - - * The allocator definitions used are exposed to the user incase they want to dictate memory usage - * You'll find the memory handling in `init`, `gen_string_allocator`, `get_cached_string`, `make_code`, and `make_code_entries`. - * ASTs are wrapped for the user in a Code struct which essentially a warpper for a AST* type. - * Both AST and Code have member symbols but their data layout is enforced to be POD types. - * This library treats memory failures as fatal. - * The AST's data union is can either hold a static array of AST*'s, a dynmaic array if the the static capacity is not enough, or a cached string content. - * The dynamic array is allocated to arenas currently and are pretty wasteful if they go over their reserve size (its never recycled). - - Data layout of AST struct: - - union { - AST* ArrStatic[ArrS_Cap]; - Array(AST*) Entries; - StringCached Content; - - }; - AST* Parent; - StringCached Name; - StringCached Comment; - CodeT Type; - OperatorT Op; - bool Readonly; - bool DynamicEntries; - u8 StaticIndex; - u8 _Align_Pad[6]; - - *`CodeT` is a typedef for `ECode::Type` which is the type of the enum.* - - AST widths are setup to be AST_POD_Size. - The width dictates how much the static array can hold before it must give way to using an allocated array: - constexpr static - u32 ArrS_Cap = - ( AST_POD_Size - - sizeof(AST*) - - sizeof(StringCached) * 2 - - sizeof(CodeT) - - sizeof(OperatorT) - - sizeof(bool) * 2 - - sizeof(u8) * 7 ) - / sizeof(AST*); - - Ex: If the AST_POD_Size is 256 the capacity of the static array is 26. - - ASTs can be set to readonly by calling Code's lock() member function. - Adding comments is always available even if the AST is set to readonly. - - #### Misc - - * The allocator definitions used are exposed to the user incase they want to dictate memory usage - * You'll find the memory handling in `init`, `gen_string_allocator`, `get_cached_string`, `make_code`, and `make_code_entries`. - * ASTs are wrapped for the user in a Code struct which essentially a warpper for a AST* type. - * Both AST and Code have member symbols but their data layout is enforced to be POD types. - * This library treats memory failures as fatal. - * The default setup assumes large definition sets may be provided to bodies so AST::Entires are dynamic arrays. - * They're allocated to arenas currently and are pretty wasteful if they go over their reserve size (its never recycled). - * Most likely will need to implement a dynamic-sized bucket allocation strategy for the entry arrays if memory is getting stressed. - * Otherwise if you are using fixed size entries and your definitions are under 128~512 entries for the body, you may be better of with a fixed-sized array. - * Strings are stored in their own set of arenas. AST constructors use cached strings for names, and content. - - ### There are four sets of interfaces for Code AST generation the library provides - - * Upfront - * Incremental - * Parsing - * Untyped - - ### Upfront Construction - - All component ASTs must be previously constructed, and provided on creation of the code AST. - The construction will fail and return InvalidCode otherwise. - - Interface : - - * def_class - * def_enum - * def_execution NOTE: This is equivalent to untyped_str, except that its intended for use only in execution scopes. - * def_friend - * def_function - * def_namespace - * def_operator - * def_param - * def_params - * def_specifier - * def_specifiers - * def_struct - * def_variable - * def_type - * def_typedef - * def_using - - * def_class_body - * def_enum_body - * def_function_body NOTE: Use this for operator bodies as well. - * def_global_body - * def_namespace_body - * def_struct_body - - Usage: - Code = def_( ... ); - - Code - { - ... - = def_( ... ); - } - - ### Incremental construction - - A Code ast is provided but only completed upfront if all components are provided. - Components are then added using the AST API for adding ASTs: - - * code.add( AST* ) // Adds AST with validation. - * code.add_entry( AST* ) // Adds AST entry without validation. - - Code ASTs may be explictly validated at anytime using Code's check() member function. - - Interface : - - * make_class - * make_enum - * make_enum_class - * make_function - * make_global_body - * make_namespace - * make_operator - * make_params - * make_specifiers - * make_struct - - Usage: - Code = make_( ... ) - { - ->add( ... ); - ... - } - - ### Parse construction - - A string provided to the API is parsed for the intended language construct. - - Interface : - - * parse_class - * parse_enum - * parse_friend - * parse_function - * parse_global_body - * parse_namespace - * parse_operator - * parse_struct - * parse_strucs - * parse_variable - * parse_type - * parse_typedef - * parse_using - - * parse_classes - * parse_enums - * parse_functions - * parse_namespaces - * parse_operators - * parse_variables - * parse_typedefs - * parse_usings - - The parse API treats any execution scope definitions with no validation and are turned into untyped Code ASTs. - This includes the assignmetn of variables; due to the library not yet supporting c/c++ expression parsing. - - The pluralvariants provide an array of codes, its up to the user to add them to a body AST - (they are not auto-added to a body) - - Usage: - Code = parse_( string with code ); - - Code = def_( ..., parse_( - - )); - - Code = make_( ... ) - { - ->add( parse_( - - )); - } - - ### Untyped constructions - - Code ASTs are constructed using unvalidated strings. - - Interface : - - * token_fmt - * untyped_str - * untyped_fmt - * untyped_token_fmt - - During serialization any untyped Code AST is has its string value directly injected inline of - whatever context the content existed as an entry within. - Even though thesee are not validated from somewhat correct c/c++ syntax or components, it doesn't mean that - Untyped code can be added as any component of a Code AST: - - * Untyped code cannot have children, thus there cannot be recursive injection this way. - * Untyped code can only be a child of a parent of body AST, or for values of an assignment (ex: variable assignment). - - These restrictions help prevent abuse of untyped code to some extent. - - Usage Conventions: - ``` - Code = def_varaible( , , untyped_( - - )); - ``` - - Template metaprogramming in the traditional sense becomes possible with the use of `token_fmt` and parse constructors: - - ``` - char const* token_key, token_value; - char const* template = txt( - Code with {key value} to replace with token_values - ... - ); - char const* gen_code_str = token_fmt( template, num_tokens, token, ... ); - Code = parse_( gen_code_str ); - ``` - - ## Extent of operator overload validation: - - The AST and constructors will be able to validate that the arguments provided for the operator type match the expected form: - * If return type must match a parameter - * If number of parameters is correct - * If added as a member symbol to a class or struct, that operator matches the requirements for the class (types match up) - - The user is responsible for making sure the code types provided are correct - and have the desired specifiers assigned to them beforehand. - - ## Code generation and modification - - There are three provided interfaces: - * Builder - * Editor - * Scanner - - Editor and Scanner are disabled by default, use GEN_FEATURE_EDITOR and GEN_FEATURE_SCANNER to enable them. - - ### Builder is a similar object to the jai language's string_builder. - * The purpose of it is to generate a file. - * A file is specified and opened for writting using the open( file_path) ) fucntion. - * The code is provided via print( code ) function will be seralized to its buffer. - * When all seralization is finished, use the write() comamnd to write the buffer to the file. - - ### Editor is for editing a series of files based on a set of requests provided to it. - * The purpose is to overrite a specific file, it places its contents in a buffer to scan. - * Requests are populated using the following interface: - * add : Add code. - * remove : Remove code. - * replace: Replace code. - - All three have the same parameters with exception to remove which only has SymbolInfo and Policy: - * SymbolInfo: - * File : The file the symbol resides in. - Leave null to indicate to search all files. - * Marker : #define symbol that indicates a location or following signature is valid to manipulate. - Leave null to indicate that the signature should only be used. - * Signature : Use a Code symbol to find a valid location to manipulate, can be further filtered with the marker. - Leave null to indicate that the marker should only be used. - - * Policy : Additional policy info for completing the request (empty for now) - * Code : Code to inject if adding, or replace existing code with. - - Additionally if GEN_FEATURE_EDITOR_REFACTOR is defined, refactor( file_path, specification_path ) wil be made available. - Refactor is based of the refactor library and uses its interface. - It will on call add a request to the queue to run the refactor script on the file. - - ### Scanner allows the user to generate Code ASTs by reading files. - * The purpose is to grab definitions to generate metadata or generate new code from these definitions. - * Requests are populated using the add( SymbolInfo, Policy ) function. - The symbol info is the same as the one used for the editor. So is the case with Policy. - - The file will only be read from, no writting supported. - - One great use case is for example: generating the single-header library for gencpp! - - ### Additional Info (Editor and Scanner) - - When all requests have been populated, call process_requests(). - It will provide an output of receipt data of the results when it completes. - - Files may be added to the Editor and Scanner additionally with add_files( num, files ). - This is intended for when you have requests that are for multiple files. - - Request queue in both Editor and Scanner are cleared once process_requests completes. - - ### On multi-threading: - - Currently supported but want to. The following changes would have to be made: - * Setup static data accesss with fences if more than one thread will generate ASTs ( or keep a different set for each thread) - * Make sure local peristent data of functions are also thread local. - * The builder should be done on a per-thread basis. - * Due to the design of the editor and scanner, it will most likely - be best to make each file a job to process request entries on. - Receipts should have an an array to store per thread. - They can be combined to the final reciepts array when all files have been processed. - - For now single-threaded has a bunch of optimization that most likely have done to it and will be more than capable - for the majority of projects this thing is intended for. (IF you use this on Unreal... well your asking for it...) - - ### Notes on extending with whatever features you want - - This library is very small, and you can easily extend it. - - The untyped codes and builder/editor/scanner can be technically be used to circumvent - any sort of constrictions the library has with: modern c++, templates, macros, etc. - - Typical use case is for getting define constants an old C/C++ library with the scanner: - Code parse_defines() can emit a custom code AST with Macro_Constant type. - - Another would be getting preprocessor or template metaprogramming Codes from Unreal Engine definitions. + Public Address: + https://github.com/Ed94/gencpp */ - #pragma once #include "Bloat.hpp" // Temporarily here for debugging purposes. -#define GEN_BAN_CPP_TEMPLATES #define GEN_DEFINE_DSL #define GEN_DEFINE_LIBRARY_CODE_CONSTANTS // #define GEN_DONT_USE_FATAL @@ -395,21 +27,17 @@ namespace gen { using LogFailType = sw(*)(char const*, ...); -# ifdef GEN_BAN_CPP_TEMPLATES -# define template static_assert("Templates are banned within gen_time scope blocks") -# endif - // By default this library will either crash or exit if an error is detected while generating codes. // Even if set to not use fatal, fatal will still be used for memory failures as the library is unusable when they occur. # ifdef GEN_DONT_USE_FATAL - ct LogFailType log_failure = log_fmt; + constexpr LogFailType log_failure = log_fmt; # else - ct LogFailType log_failure = fatal; + constexpr LogFailType log_failure = fatal; # endif namespace ECode { -# define Define_Types \ + # define Define_Types \ Entry( Untyped ) \ Entry( Access_Public ) \ Entry( Access_Protected ) \ @@ -448,9 +76,9 @@ namespace gen enum Type : u32 { -# define Entry( Type ) Type, + # define Entry( Type ) Type, Define_Types -# undef Entry + # undef Entry Num_Types, Invalid @@ -461,15 +89,15 @@ namespace gen { static char const* lookup[Num_Types] = { -# define Entry( Type ) txt( Type ), + # define Entry( Type ) txt( Type ), Define_Types -# undef Entry + # undef Entry }; return lookup[ type ]; } -# undef Define_Types + #undef Define_Types } using CodeT = ECode::Type; @@ -480,8 +108,8 @@ namespace gen Class }; - ct EnumT EnumClass = EnumT::Class; - ct EnumT EnumRegular = EnumT::Regular; + constexpr EnumT EnumClass = EnumT::Class; + constexpr EnumT EnumRegular = EnumT::Regular; enum class UsingT : u8 { @@ -489,59 +117,59 @@ namespace gen Namespace }; - ct UsingT UsingRegular = UsingT::Regular; - ct UsingT UsingNamespace = UsingT::Namespace; + constexpr UsingT UsingRegular = UsingT::Regular; + constexpr UsingT UsingNamespace = UsingT::Namespace; namespace EOperator { -# define Define_Operators \ - Entry( Assign, = ) \ - Entry( Assign_Add, += ) \ - Entry( Assign_Subtract, -= ) \ - Entry( Assgin_Multiply, *= ) \ - Entry( Assgin_Divide, /= ) \ - Entry( Assgin_Modulo, %= ) \ - Entry( Assgin_BAnd, &= ) \ - Entry( Assgin_BOr, |= ) \ - Entry( Assign_BXOr, ^= ) \ - Entry( Assign_LShift, <<= ) \ - Entry( Assign_RShift, >>= ) \ - Entry( Increment, ++ ) \ - Entry( Decrement, -- ) \ - Entry( Unary_Plus, + ) \ - Entry( Unary_Minus, - ) \ - Entry( Add, + ) \ - Entry( Subtract, - ) \ - Entry( Multiply, * ) \ - Entry( Divide, / ) \ - Entry( Modulo, % ) \ - Entry( BNot, ~ ) \ - Entry( BAnd, & ) \ - Entry( BOr, | ) \ - Entry( BXOr, ^ ) \ - Entry( LShift, << ) \ - Entry( RShift, >> ) \ - Entry( LNot, ! ) \ - Entry( LAnd, && ) \ - Entry( LOr, || ) \ - Entry( Equals, == ) \ - Entry( NotEquals, != ) \ - Entry( Lesser, < ) \ - Entry( Greater, > ) \ - Entry( LesserEqual, <= ) \ - Entry( GreaterEqual, >= ) \ - Entry( Subscript, [] ) \ - Entry( Indirection, * ) \ - Entry( AddressOf, & ) \ - Entry( MemberOfPointer, -> ) \ - Entry( PtrToMemOfPtr, ->* ) \ - Entry( FunctionCall, () ) + # define Define_Operators \ + Entry( Assign, = ) \ + Entry( Assign_Add, += ) \ + Entry( Assign_Subtract, -= ) \ + Entry( Assgin_Multiply, *= ) \ + Entry( Assgin_Divide, /= ) \ + Entry( Assgin_Modulo, %= ) \ + Entry( Assgin_BAnd, &= ) \ + Entry( Assgin_BOr, |= ) \ + Entry( Assign_BXOr, ^= ) \ + Entry( Assign_LShift, <<= ) \ + Entry( Assign_RShift, >>= ) \ + Entry( Increment, ++ ) \ + Entry( Decrement, -- ) \ + Entry( Unary_Plus, + ) \ + Entry( Unary_Minus, - ) \ + Entry( Add, + ) \ + Entry( Subtract, - ) \ + Entry( Multiply, * ) \ + Entry( Divide, / ) \ + Entry( Modulo, % ) \ + Entry( BNot, ~ ) \ + Entry( BAnd, & ) \ + Entry( BOr, | ) \ + Entry( BXOr, ^ ) \ + Entry( LShift, << ) \ + Entry( RShift, >> ) \ + Entry( LNot, ! ) \ + Entry( LAnd, && ) \ + Entry( LOr, || ) \ + Entry( Equals, == ) \ + Entry( NotEquals, != ) \ + Entry( Lesser, < ) \ + Entry( Greater, > ) \ + Entry( LesserEqual, <= ) \ + Entry( GreaterEqual, >= ) \ + Entry( Subscript, [] ) \ + Entry( Indirection, * ) \ + Entry( AddressOf, & ) \ + Entry( MemberOfPointer, -> ) \ + Entry( PtrToMemOfPtr, ->* ) \ + Entry( FunctionCall, () ) enum Type : u32 { -# define Entry( Type_, Token_ ) Type_, + # define Entry( Type_, Token_ ) Type_, Define_Operators -# undef Entry + # undef Entry Comma, Num_Ops, @@ -556,32 +184,32 @@ namespace gen local_persist char const* lookup[ Num_Ops ] = { -# define Entry( Type_, Token_ ) txt(Token_), + # define Entry( Type_, Token_ ) txt(Token_), Define_Operators -# undef Entry + # undef Entry "," }; return lookup[ op ]; } -# undef Define_Operators + #undef Define_Operators } using OperatorT = EOperator::Type; namespace ESpecifier { -# if defined(ZPL_SYSTEM_WINDOWS) -# define API_Export_Code __declspec(dllexport) -# define API_Import_Code __declspec(dllimport) -# define API_Keyword __declspec -# elif defined(ZPL_SYSTEM_MACOS) -# define API_Export_Code __attribute__ ((visibility ("default"))) -# define API_Import_Code __attribute__ ((visibility ("default"))) -# define API_Keyword __attribute__ -# endif + #if defined(ZPL_SYSTEM_WINDOWS) + # define API_Export_Code __declspec(dllexport) + # define API_Import_Code __declspec(dllimport) + # define API_Keyword __declspec + #elif defined(ZPL_SYSTEM_MACOS) + # define API_Export_Code __attribute__ ((visibility ("default"))) + # define API_Import_Code __attribute__ ((visibility ("default"))) + # define API_Keyword __attribute__ + #endif -# define Define_Specifiers \ + #define Define_Specifiers \ Entry( API_Import, API_Export_Code ) \ Entry( API_Export, API_Import_Code ) \ Entry( Attribute, "You cannot stringize an attribute this way" ) \ @@ -614,9 +242,9 @@ namespace gen enum Type : u32 { -# define Entry( Specifier, Code ) Specifier, + # define Entry( Specifier, Code ) Specifier, Define_Specifiers -# undef Entry + # undef Entry Num_Specifiers, Invalid, @@ -628,9 +256,9 @@ namespace gen { local_persist char const* lookup[ Num_Specifiers ] = { -# define Entry( Spec_, Code_ ) txt(Code_), + # define Entry( Spec_, Code_ ) txt(Code_), Define_Specifiers -# undef Entry + # undef Entry }; return lookup[ specifier ]; @@ -661,7 +289,7 @@ namespace gen return Invalid; } -# undef Define_Specifiers + #undef Define_Specifiers } using SpecifierT = ESpecifier::Type; @@ -674,7 +302,7 @@ namespace gen using StringCached = char const*; // Desired width of the AST data structure. - ct u32 AST_POD_Size = 256; + constexpr u32 AST_POD_Size = 256; // TODO: If perf needs it, convert layout an SOA format. /* @@ -688,7 +316,7 @@ namespace gen */ struct AST { -# pragma region Member Functions + #pragma region Member Functions // Used with incremental constructors // Adds and checks entries to see if they are valid additions the type of ast. @@ -702,7 +330,6 @@ namespace gen return Entries[0]; } - forceinline bool check(); AST* duplicate(); @@ -784,13 +411,13 @@ namespace gen forceinline bool typename_is_ptr() { - zpl::assert_crash("not implemented"); + assert_crash("not implemented"); } forceinline bool typename_is_ref() { - zpl::assert_crash("not implemented"); + assert_crash("not implemented"); } forceinline @@ -833,7 +460,8 @@ namespace gen } String to_string() const; -# pragma endregion Member Functions + #pragma endregion Member Functions + constexpr static uw ArrS_Cap = ( AST_POD_Size @@ -848,7 +476,7 @@ namespace gen constexpr static uw ArrSpecs_Cap = ArrS_Cap * (sizeof(AST*) / sizeof(SpecifierT)); -# define Using_Code_POD \ + # define Using_Code_POD \ union { \ AST* ArrStatic[AST::ArrS_Cap]; \ Array(AST*) Entries; \ @@ -871,11 +499,11 @@ namespace gen struct CodePOD { Using_Code_POD -# undef Using_CodePOD; + # undef Using_CodePOD; }; - ct sw size_AST = sizeof(AST); - ct sw size_POD = sizeof(CodePOD); + constexpr sw size_AST = sizeof(AST); + constexpr sw size_POD = sizeof(CodePOD); // Its intended for the AST to have equivalent size to its POD. // All extra functionality within the AST namespace should just be syntatic sugar. @@ -892,12 +520,13 @@ namespace gen */ struct Code { -# pragma region Statics + #pragma region Statics // Used internally for the most part to identify invaidly generated code. static Code Invalid; -# pragma endregion Statics + #pragma endregion Statics -# pragma region Member Functions + #pragma region Member Functions + inline Code body() { if ( ast == nullptr ) @@ -1006,14 +635,14 @@ namespace gen return ast; } -# pragma endregion Member Functions + #pragma endregion Member Functions AST* ast; }; static_assert( sizeof(Code) == sizeof(AST*), "ERROR: Code is not POD" ); // Used when the its desired when omission is allowed in a definition. - ct Code NoCode = { nullptr }; + constexpr Code NoCode = { nullptr }; // extern const Code InvalidCode; /* @@ -1117,8 +746,8 @@ namespace gen # endif # pragma endregion Incremental -# pragma region Parsing -# ifdef GEN_FEATURE_PARSING + #pragma region Parsing + #ifdef GEN_FEATURE_PARSING Code parse_class ( s32 length, char const* class_def ); Code parse_enum ( s32 length, char const* enum_def ); Code parse_execution ( s32 length, char const* exec_def ); @@ -1143,14 +772,30 @@ namespace gen s32 parse_variables ( s32 length, char const* vars_def, Code* out_var_codes ); s32 parse_typedefs ( s32 length, char const* typedef_def, Code* out_typedef_codes ); s32 parse_usings ( s32 length, char const* usings_def, Code* out_using_codes ); -# endif -# pragma endregion Parsing + #endif + #pragma endregion Parsing + + #pragma region Untyped text + sw token_fmt_va( char* buf, uw buf_size, char const* fmt, s32 num_tokens, va_list va ); + + inline + char const* token_fmt( char const* fmt, sw num_tokens, ... ) + { + local_persist thread_local + char buf[ZPL_PRINTF_MAXLEN] = { 0 }; + + va_list va; + va_start(va, fmt); + token_fmt_va(buf, ZPL_PRINTF_MAXLEN, fmt, num_tokens, va); + va_end(va); + + return buf; + } -# pragma region Untyped text Code untyped_str ( s32 length, char const* str); Code untyped_fmt ( char const* fmt, ... ); Code untyped_token_fmt( char const* fmt, s32 num_tokens, ... ); -# pragma endregion Untyped text + #pragma endregion Untyped text struct Builder { @@ -1237,7 +882,7 @@ namespace gen }; #endif -#ifdef GEN_FEATURE_SCANNER +# ifdef GEN_FEATURE_SCANNER struct Scanner { struct RequestEntry @@ -1266,7 +911,7 @@ namespace gen bool process_requests( Array(Receipt) out_receipts ); }; -#endif +# endif #pragma endregion Gen Interface } @@ -1432,18 +1077,18 @@ namespace gen // or string handling while constructing or serializing. // Change them to suit your needs. - ct s32 InitSize_DataArrays = 16; - ct s32 InitSize_StringTable = megabytes(4); - ct s32 InitSize_TypeTable = megabytes(4); + constexpr s32 InitSize_DataArrays = 16; + constexpr s32 InitSize_StringTable = megabytes(4); + constexpr s32 InitSize_TypeTable = megabytes(4); - ct s32 CodePool_NumBlocks = 4096; - ct s32 InitSize_CodeEntiresArray = 512; - ct s32 SizePer_CodeEntriresArena = megabytes(16); - ct s32 SizePer_StringArena = megabytes(32); + constexpr s32 CodePool_NumBlocks = 4096; + constexpr s32 InitSize_CodeEntiresArray = 512; + constexpr s32 SizePer_CodeEntriresArena = megabytes(16); + constexpr s32 SizePer_StringArena = megabytes(32); - ct s32 MaxNameLength = 128; - ct s32 MaxUntypedStrLength = kilobytes(640); - ct s32 StringTable_MaxHashLength = kilobytes(1); + constexpr s32 MaxNameLength = 128; + constexpr s32 MaxUntypedStrLength = kilobytes(640); + constexpr s32 StringTable_MaxHashLength = kilobytes(1); // Predefined Codes. Are set to readonly and are setup during gen::init() @@ -1505,6 +1150,12 @@ namespace gen inline bool AST::add_param( AST* type, s32 length, char const* name ) { + if ( Type != ECode::Function ) + { + log_failure( "gen::AST::add_param: this AST is not a function - %s", debug_str() ); + return Code::Invalid; + } + if ( length <= 0 ) { log_failure( "gen::AST::add_param: Invalid name length provided - %d", length ); diff --git a/project/gen.singleheader.cpp b/project/gen.singleheader.cpp deleted file mode 100644 index e69de29..0000000