From d36c3fa84777d1de0be95235adb9ca668feae44e Mon Sep 17 00:00:00 2001 From: Ed_ Date: Thu, 3 Aug 2023 23:18:33 -0400 Subject: [PATCH] Single header generates again, some more cleanup. Looking into properly dealing with empty lines... I want to preserve the text's empty lines in the AST for serialization purposes (perserve formatting for gapes between definitions). Don't want to introduce the possibility of it breaking though, so will have to ignore empty_lines in a general way (if they are in a bad spot). Attempted to cover that by having TokArray::current() auto-skip empty lines and eat as well if the type doesn't match. --- .gitignore | 4 + project/components/ast.cpp | 5 - project/components/interface.cpp | 4 +- project/components/interface.parsing.cpp | 79 +- project/components/interface.upfront.cpp | 9 +- project/components/temp/ecode.hpp | 4 +- project/components/temp/etoktype.cpp | 33 +- project/dependencies/basic_types.hpp | 2 +- project/dependencies/debug.cpp | 2 +- project/dependencies/hashing.cpp | 2 +- project/dependencies/parsing.cpp | 1 - project/dependencies/printing.hpp | 1 + project/enums/ETokType.csv | 1 + project/file_processors/builder.hpp | 1 + project/file_processors/scanner.hpp | 8 +- project/gen.bootstrap.cpp | 81 +- project/gen/gen_builder.cpp | 1159 ---------------------- project/gen/gen_builder.hpp | 448 --------- scripts/bootstrap.ci.ps1 | 1 + singleheader/gen.singleheader.cpp | 100 +- 20 files changed, 241 insertions(+), 1704 deletions(-) delete mode 100644 project/gen/gen_builder.cpp delete mode 100644 project/gen/gen_builder.hpp diff --git a/.gitignore b/.gitignore index c095119..2ca13b9 100644 --- a/.gitignore +++ b/.gitignore @@ -8,6 +8,10 @@ build/* **/gen/gen.cpp **/gen/gen_dep.hpp **/gen/gen_dep.cpp +**/gen/gen_builder.hpp +**/gen/gen_builder.cpp +**/gen/gen_scanner.hpp +**/gen/gen_scanner.cpp gencpp.hpp gencpp.cpp diff --git a/project/components/ast.cpp b/project/components/ast.cpp index 3112994..af45860 100644 --- a/project/components/ast.cpp +++ b/project/components/ast.cpp @@ -1,5 +1,3 @@ -#pragma region AST - Code Code::Global; Code Code::Invalid; @@ -916,6 +914,3 @@ bool AST::validate_body() #undef CheckEntries } - -#pragma endregion AST - diff --git a/project/components/interface.cpp b/project/components/interface.cpp index dd3d3ce..377ddab 100644 --- a/project/components/interface.cpp +++ b/project/components/interface.cpp @@ -383,9 +383,7 @@ StringCached get_cached_string( StrC str ) return result; } -/* - Used internally to retireve a Code object form the CodePool. - */ +// Used internally to retireve a Code object form the CodePool. Code make_code() { Pool* allocator = & CodePools.back(); diff --git a/project/components/interface.parsing.cpp b/project/components/interface.parsing.cpp index a6f2709..5caa857 100644 --- a/project/components/interface.parsing.cpp +++ b/project/components/interface.parsing.cpp @@ -65,17 +65,19 @@ namespace Parser Token& current() { + while ( Arr[Idx].Type == TokType::Empty_Line ) + Idx++; + return Arr[Idx]; } Token& previous() { - return Arr[Idx - 1]; - } + s32 idx = this->Idx; + while ( Arr[Idx].Type == TokType::Empty_Line ) + idx--; - Token* next() - { - return Idx + 1 < Arr.num() ? &Arr[Idx + 1] : nullptr; + return Arr[idx - 1]; } Token& operator []( s32 idx ) @@ -168,6 +170,12 @@ namespace Parser return false; } + if ( Arr[Idx].Type == TokType::Empty_Line && type != TokType::Empty_Line ) + { + Idx++; + return log_fmt( "Auto-skipping empty line (%d, %d)\n", current().Line, current().Column ); + } + if ( Arr[Idx].Type != type ) { String token_str = String::make( GlobalAllocator, { Arr[Idx].Length, Arr[Idx].Text } ); @@ -203,7 +211,7 @@ namespace Parser if ( current == '\n' ) \ { \ line++; \ - column = 0; \ + column = 1; \ } \ else \ { \ @@ -249,13 +257,25 @@ namespace Parser { Token token = { nullptr, 0, TokType::Invalid, line, column, false }; - if ( line == 4921 ) - { - log_fmt("here"); - } - bool is_define = false; + if ( column == 1 ) + { + token.Text = scanner; + + if ( current == '\r') + token.Length = 1; + + if ( current == '\n' ) + { + token.Type = TokType::Empty_Line; + token.Length ++; + + Tokens.append( token ); + continue; + } + } + SkipWhitespace(); if ( left <= 0 ) break; @@ -1022,13 +1042,16 @@ if ( def.Ptr == nullptr ) \ return CodeInvalid; \ } -# define nexttok Context.Tokens.next() # define currtok Context.Tokens.current() # define prevtok Context.Tokens.previous() # define eat( Type_ ) Context.Tokens.__eat( Type_ ) # define left ( Context.Tokens.Arr.num() - Context.Tokens.Idx ) -# define check( Type_ ) ( left && currtok.Type == Type_ ) +# define check( Type_ ) \ + ( left \ + && (currtok.Type == TokType::Empty_Line ? \ + eat( TokType::Empty_Line) : true) \ + && currtok.Type == Type_ ) # define push_scope() \ StackNode scope { nullptr, currtok, NullToken, txt_StrC( __func__ ) }; \ @@ -2402,6 +2425,12 @@ CodeBody parse_class_struct_body( Parser::TokType which ) switch ( currtok.Type ) { + case TokType::Empty_Line: + // Empty lines are auto skipped by Tokens.current() + member = untyped_str( Context.Tokens.Arr[ Context.Tokens.Idx] ); + eat( TokType::Empty_Line ); + break; + case TokType::Comment: member = def_comment( currtok ); eat( TokType::Comment ); @@ -2756,6 +2785,12 @@ CodeBody parse_global_nspace( CodeT which ) switch ( currtok.Type ) { + case TokType::Empty_Line: + // Empty lines are auto skipped by Tokens.current() + member = untyped_str( Context.Tokens.Arr[ Context.Tokens.Idx] ); + eat( TokType::Empty_Line ); + break; + case TokType::Comment: member = def_comment( currtok ); eat( TokType::Comment ); @@ -3072,6 +3107,12 @@ CodeEnum parse_enum( bool inplace_def ) { switch ( currtok.Type ) { + case TokType::Empty_Line: + // Empty lines are auto skipped by Tokens.current() + member = untyped_str( Context.Tokens.Arr[ Context.Tokens.Idx] ); + eat( TokType::Empty_Line ); + break; + case TokType::Comment: member = def_comment( currtok ); eat( TokType::Comment ); @@ -4027,11 +4068,6 @@ CodeTypedef parse_typedef() eat( TokType::Decl_Typedef ); - if ( currtok.Line == 2196 ) - { - log_fmt("here"); - } - constexpr bool from_typedef = true; if ( check( TokType::Preprocess_Macro )) @@ -4144,6 +4180,12 @@ CodeUnion parse_union( bool inplace_def ) Code member = { nullptr }; switch ( currtok.Type ) { + case TokType::Empty_Line: + // Empty lines are auto skipped by Tokens.current() + member = untyped_str( Context.Tokens.Arr[ Context.Tokens.Idx] ); + eat( TokType::Empty_Line ); + break; + case TokType::Comment: member = def_comment( currtok ); eat( TokType::Comment ); @@ -4422,7 +4464,6 @@ CodeVar parse_variable( StrC def ) // Undef helper macros # undef check_parse_args -# undef nexttok # undef currtok # undef prevtok # undef eat diff --git a/project/components/interface.upfront.cpp b/project/components/interface.upfront.cpp index 42b6024..5421fc4 100644 --- a/project/components/interface.upfront.cpp +++ b/project/components/interface.upfront.cpp @@ -1,3 +1,5 @@ +#pragma region Upfront + enum class OpValidateResult : u32 { Fail, @@ -375,7 +377,7 @@ OpValidateResult operator__validate( OperatorT op, CodeParam params_code, CodeTy /* -The implementaiton of the upfront constructors involves bascially doing three things: +The implementaiton of the upfront constructors involves doing three things: * Validate the arguments given to construct the intended type of AST is valid. * Construct said AST type. * Lock the AST (set to readonly) and return the valid object. @@ -2130,4 +2132,9 @@ CodeBody def_union_body( s32 num, CodeUnion* codes ) # undef name_check # undef null_check # undef null_or_invalid_check +# undef def_body_start +# undef def_body_code_array_start + + +#pragma endregion Upfront diff --git a/project/components/temp/ecode.hpp b/project/components/temp/ecode.hpp index 20828c7..7075761 100644 --- a/project/components/temp/ecode.hpp +++ b/project/components/temp/ecode.hpp @@ -53,8 +53,8 @@ namespace ECode Entry( Template ) \ Entry( Typedef ) \ Entry( Typename ) \ - Entry( Union ) \ - Entry( Union_Body) \ + Entry( Union ) \ + Entry( Union_Body) \ Entry( Using ) \ Entry( Using_Namespace ) \ Entry( Variable ) diff --git a/project/components/temp/etoktype.cpp b/project/components/temp/etoktype.cpp index f4b1d73..614cf33 100644 --- a/project/components/temp/etoktype.cpp +++ b/project/components/temp/etoktype.cpp @@ -26,7 +26,7 @@ namespace Parser Entry( Ampersand_DBL, "&&" ) \ Entry( Assign_Classifer, ":" ) \ Entry( Attribute_Open, "[[" ) \ - Entry( Attribute_Close, "]]" ) \ + Entry( Attribute_Close, "]]" ) \ Entry( BraceCurly_Open, "{" ) \ Entry( BraceCurly_Close, "}" ) \ Entry( BraceSquare_Open, "[" ) \ @@ -50,6 +50,7 @@ namespace Parser Entry( Decl_Typedef, "typedef" ) \ Entry( Decl_Using, "using" ) \ Entry( Decl_Union, "union" ) \ + Entry( Empty_Line, "__empty_line__" ) \ Entry( Identifier, "__identifier__" ) \ Entry( Module_Import, "import" ) \ Entry( Module_Export, "export" ) \ @@ -63,8 +64,8 @@ namespace Parser Entry( Preprocess_Else, "else") \ Entry( Preprocess_EndIf, "endif") \ Entry( Preprocess_Include, "include" ) \ - Entry( Preprocess_Pragma, "pragma") \ - Entry( Preprocess_Content, "__macro_content__") \ + Entry( Preprocess_Pragma, "pragma") \ + Entry( Preprocess_Content, "__macro_content__") \ Entry( Preprocess_Macro, "__macro__") \ Entry( Preprocess_Unsupported, "__unsupported__" ) \ Entry( Spec_Alignas, "alignas" ) \ @@ -72,10 +73,10 @@ namespace Parser Entry( Spec_Consteval, "consteval" ) \ Entry( Spec_Constexpr, "constexpr" ) \ Entry( Spec_Constinit, "constinit" ) \ - Entry( Spec_Explicit, "explicit" ) \ + Entry( Spec_Explicit, "explicit" ) \ Entry( Spec_Extern, "extern" ) \ - Entry( Spec_Final, "final" ) \ - Entry( Spec_Global, "global" ) \ + Entry( Spec_Final, "final" ) \ + Entry( Spec_Global, "global" ) \ Entry( Spec_Inline, "inline" ) \ Entry( Spec_Internal_Linkage, "internal" ) \ Entry( Spec_LocalPersist, "local_persist" ) \ @@ -87,20 +88,20 @@ namespace Parser Entry( Spec_Volatile, "volatile") \ Entry( Star, "*" ) \ Entry( Statement_End, ";" ) \ - Entry( StaticAssert, "static_assert" ) \ + Entry( StaticAssert, "static_assert" ) \ Entry( String, "__string__" ) \ - Entry( Type_Unsigned, "unsigned" ) \ + Entry( Type_Unsigned, "unsigned" ) \ Entry( Type_Signed, "signed" ) \ Entry( Type_Short, "short" ) \ Entry( Type_Long, "long" ) \ - Entry( Type_char, "char" ) \ - Entry( Type_int, "int" ) \ - Entry( Type_double, "double" ) \ - Entry( Type_MS_int8, "__int8" ) \ - Entry( Type_MS_int16, "__int16" ) \ - Entry( Type_MS_int32, "__int32" ) \ - Entry( Type_MS_int64, "__int64" ) \ - Entry( Type_MS_W64, "_W64" ) \ + Entry( Type_char, "char" ) \ + Entry( Type_int, "int" ) \ + Entry( Type_double, "double" ) \ + Entry( Type_MS_int8, "__int8" ) \ + Entry( Type_MS_int16, "__int16" ) \ + Entry( Type_MS_int32, "__int32" ) \ + Entry( Type_MS_int64, "__int64" ) \ + Entry( Type_MS_W64, "_W64" ) \ Entry( Varadic_Argument, "..." ) \ Entry( __Attributes_Start, "__attrib_start__" ) diff --git a/project/dependencies/basic_types.hpp b/project/dependencies/basic_types.hpp index 942bfcf..66d54e2 100644 --- a/project/dependencies/basic_types.hpp +++ b/project/dependencies/basic_types.hpp @@ -117,5 +117,5 @@ typedef s8 b8; typedef s16 b16; typedef s32 b32; -#pragma region Basic Types +#pragma endregion Basic Types diff --git a/project/dependencies/debug.cpp b/project/dependencies/debug.cpp index d73829c..5f7e28f 100644 --- a/project/dependencies/debug.cpp +++ b/project/dependencies/debug.cpp @@ -1,4 +1,4 @@ -#pragma endregion Debug +#pragma region Debug void assert_handler( char const* condition, char const* file, s32 line, char const* msg, ... ) { diff --git a/project/dependencies/hashing.cpp b/project/dependencies/hashing.cpp index 7989e70..c2a306e 100644 --- a/project/dependencies/hashing.cpp +++ b/project/dependencies/hashing.cpp @@ -82,5 +82,5 @@ u64 crc64( void const* data, sw len ) return result; } -#pragma region Hashing +#pragma endregion Hashing diff --git a/project/dependencies/parsing.cpp b/project/dependencies/parsing.cpp index d86609f..6d4d110 100644 --- a/project/dependencies/parsing.cpp +++ b/project/dependencies/parsing.cpp @@ -799,7 +799,6 @@ ADT_Error adt_str_to_number_strict( ADT_Node* node ) # define GEN_CSV_ASSERT( msg ) #endif - u8 csv_parse_delimiter( CSV_Object* root, char* text, AllocatorInfo allocator, b32 has_header, char delim ) { CSV_Error error = ECSV_Error__NONE; diff --git a/project/dependencies/printing.hpp b/project/dependencies/printing.hpp index 5509388..0608beb 100644 --- a/project/dependencies/printing.hpp +++ b/project/dependencies/printing.hpp @@ -58,3 +58,4 @@ sw fatal(char const* fmt, ...) } #pragma endregion Printing + diff --git a/project/enums/ETokType.csv b/project/enums/ETokType.csv index 4f6c717..1e175ba 100644 --- a/project/enums/ETokType.csv +++ b/project/enums/ETokType.csv @@ -32,6 +32,7 @@ Decl_Template, "template" Decl_Typedef, "typedef" Decl_Using, "using" Decl_Union, "union" +Empty_Line, "__empty_line__" Identifier, "__identifier__" Module_Import, "import" Module_Export, "export" diff --git a/project/file_processors/builder.hpp b/project/file_processors/builder.hpp index b457ebe..6571c40 100644 --- a/project/file_processors/builder.hpp +++ b/project/file_processors/builder.hpp @@ -12,3 +12,4 @@ struct Builder void write(); }; + diff --git a/project/file_processors/scanner.hpp b/project/file_processors/scanner.hpp index e454aa0..82524cf 100644 --- a/project/file_processors/scanner.hpp +++ b/project/file_processors/scanner.hpp @@ -1,8 +1,3 @@ -#pragma once -#include "gen.hpp" - -GEN_NS_BEGIN - Code scan_file( char const* path ) { FileInfo file; @@ -28,6 +23,7 @@ Code scan_file( char const* path ) return untyped_str( str ); } +#if 0 struct Policy { // Nothing for now. @@ -69,5 +65,5 @@ struct Scanner bool process_requests( Array out_receipts ); }; +#endif -GEN_NS_END diff --git a/project/gen.bootstrap.cpp b/project/gen.bootstrap.cpp index f052347..2fefdad 100644 --- a/project/gen.bootstrap.cpp +++ b/project/gen.bootstrap.cpp @@ -44,10 +44,10 @@ int gen_main() Builder deps_header = Builder::open("gen/gen_dep.hpp"); deps_header.print_fmt( generation_notice ); - deps_header.print_fmt("// This file is intended to be included within gen.hpp (There is no pragma diagnostic ignores)\n\n"); - deps_header.print_fmt("#pragma once\n\n"); + deps_header.print_fmt( "// This file is intended to be included within gen.hpp (There is no pragma diagnostic ignores)\n\n" ); + deps_header.print_fmt( "#pragma once\n\n" ); deps_header.print( header_start ); - deps_header.print_fmt( "GEN_NS_BEGIN\n\n"); + deps_header.print_fmt( "GEN_NS_BEGIN\n\n" ); deps_header.print( macros ); deps_header.print( basic_types ); @@ -61,13 +61,13 @@ int gen_main() deps_header.print( file_handling ); deps_header.print( timing ); - deps_header.print_fmt( "GEN_NS_END\n\n"); + deps_header.print_fmt( "GEN_NS_END\n\n" ); deps_header.write(); } // gen_dep.cpp { - Code src_start = scan_file( "dependencies/src_start.cpp" ); + Code src_start = scan_file( "dependencies/src_start.cpp" ); Code debug = scan_file( "dependencies/debug.cpp" ); Code string_ops = scan_file( "dependencies/string_ops.cpp" ); Code printing = scan_file( "dependencies/printing.cpp" ); @@ -78,11 +78,11 @@ int gen_main() Code timing = scan_file( "dependencies/timing.cpp" ); Builder - deps_impl = Builder::open("gen/gen_dep.cpp"); + deps_impl = Builder::open( "gen/gen_dep.cpp" ); deps_impl.print_fmt( generation_notice ); - deps_impl.print_fmt("// This file is intended to be included within gen.cpp (There is no pragma diagnostic ignores)\n\n"); + deps_impl.print_fmt( "// This file is intended to be included within gen.cpp (There is no pragma diagnostic ignores)\n\n" ); deps_impl.print( src_start ); - deps_impl.print_fmt( "GEN_NS_BEGIN\n\n"); + deps_impl.print_fmt( "GEN_NS_BEGIN\n\n" ); deps_impl.print( debug ); deps_impl.print( string_ops ); @@ -93,7 +93,7 @@ int gen_main() deps_impl.print( file_handling ); deps_impl.print( timing ); - deps_impl.print_fmt( "GEN_NS_END\n\n"); + deps_impl.print_fmt( "GEN_NS_END\n\n" ); deps_impl.write(); } @@ -115,30 +115,32 @@ int gen_main() Builder header = Builder::open( "gen/gen.hpp" ); header.print_fmt( generation_notice ); - header.print_fmt("#pragma once\n\n"); + header.print_fmt( "#pragma once\n\n" ); header.print( push_ignores ); header.print( header_start ); - header.print_fmt( "GEN_NS_BEGIN\n\n"); + header.print_fmt( "GEN_NS_BEGIN\n\n" ); - header.print_fmt("#pragma region Types\n\n"); + header.print_fmt( "#pragma region Types\n\n" ); header.print( types ); header.print( ecode ); header.print( eoperator ); header.print( especifier ); - header.print_fmt("#pragma endregion Types\n\n"); + header.print_fmt( "#pragma endregion Types\n\n" ); - header.print_fmt("#pragma region AST\n\n"); + header.print_fmt( "#pragma region AST\n\n" ); header.print( ast ); header.print( ast_types ); - header.print_fmt("#pragma endregion AST\n\n"); + header.print_fmt( "#pragma endregion AST\n\n" ); header.print( interface ); + header.print_fmt( "#pragma region Inlines\n\n" ); header.print( inlines ); header.print( ast_inlines ); + header.print_fmt( "#pragma endregion Inlines\n\n" ); header.print( header_end ); - header.print_fmt( "GEN_NS_END\n\n"); + header.print_fmt( "GEN_NS_END\n\n" ); header.print( pop_ignores ); header.write(); } @@ -147,7 +149,7 @@ int gen_main() { Code src_start = scan_file( "components/src_start.cpp" ); CodeInclude header = def_include( txt_StrC("gen.hpp") ); - Code data = scan_file( "components/static_data.cpp" ); + Code static_data = scan_file( "components/static_data.cpp" ); Code ast_case_macros = scan_file( "components/ast_case_macros.cpp" ); Code ast = scan_file( "components/ast.cpp" ); Code interface = scan_file( "components/interface.cpp" ); @@ -166,14 +168,22 @@ int gen_main() src.print( header ); src.print_fmt( "\nGEN_NS_BEGIN\n\n"); - src.print( data ); + src.print( static_data ); + + src.print_fmt( "#pragma region AST\n\n" ); src.print( ast_case_macros ); src.print( ast ); + src.print_fmt( "#pragma endregion AST\n\n" ); + + src.print_fmt( "#pragma region Interface\n\n" ); src.print( interface ); src.print( upfront ); + src.print_fmt( "#pragma region Parsing\n\n" ); src.print( parser_nspace ); src.print( parsing ); src.print( untyped ); + src.print_fmt( "#pragma endregion Parsing\n\n" ); + src.print_fmt( "#pragma endregion Interface\n\n" ); src.print_fmt( "GEN_NS_END\n\n"); src.print( pop_ignores ); @@ -182,7 +192,6 @@ int gen_main() // gen_builder.hpp { - Code parsing = scan_file( "dependencies/parsing.hpp" ); Code builder = scan_file( "file_processors/builder.hpp" ); Builder @@ -190,7 +199,6 @@ int gen_main() header.print_fmt( generation_notice ); header.print( def_include( txt_StrC("gen.hpp") )); header.print_fmt( "\nGEN_NS_BEGIN\n\n" ); - header.print( parsing ); header.print( builder ); header.print_fmt( "\nGEN_NS_END\n\n" ); header.write(); @@ -198,19 +206,48 @@ int gen_main() // gen_builder.cpp { - Code parsing = scan_file( "dependencies/parsing.cpp" ); Code builder = scan_file( "file_processors/builder.cpp" ); Builder src = Builder::open( "gen/gen_builder.cpp" ); + src.print_fmt( generation_notice ); src.print( def_include( txt_StrC("gen_builder.hpp") ) ); src.print_fmt( "\nGEN_NS_BEGIN\n\n" ); - src.print( parsing ); src.print( builder ); src.print_fmt( "\nGEN_NS_END\n\n" ); src.write(); } + // gen_scanner.hpp + { + Code parsing = scan_file( "dependencies/parsing.hpp" ); + Code scanner = scan_file( "file_processors/scanner.hpp" ); + + Builder + header = Builder::open( "gen/gen_scanner.hpp" ); + header.print( def_include( txt_StrC("gen.hpp") ) ); + header.print_fmt( "\nGEN_NS_BEGIN\n\n" ); + header.print( parsing ); + header.print( scanner ); + header.print_fmt( "\nGEN_NS_END\n\n" ); + header.write(); + } + + // gen_scanner.cpp + { + Code parsing = scan_file( "dependencies/parsing.cpp" ); + // Code scanner = scan_file( "file_processors/scanner.cpp" ); + + Builder + src = Builder::open( "gen/gen_scanner.cpp" ); + src.print( def_include( txt_StrC("gen_scanner.hpp") ) ); + src.print_fmt( "\nGEN_NS_BEGIN\n\n" ); + src.print( parsing ); + // src.print( scanner ); + src.print_fmt( "\nGEN_NS_END\n\n" ); + src.write(); + } + gen::deinit(); return 0; } diff --git a/project/gen/gen_builder.cpp b/project/gen/gen_builder.cpp deleted file mode 100644 index b727689..0000000 --- a/project/gen/gen_builder.cpp +++ /dev/null @@ -1,1159 +0,0 @@ -#include "gen_builder.hpp" - -GEN_NS_BEGIN - -#pragma region ADT - -#define _adt_fprintf( s_, fmt_, ... ) \ - do \ - { \ - if ( str_fmt_file( s_, fmt_, ##__VA_ARGS__ ) < 0 ) \ - return EADT_ERROR_OUT_OF_MEMORY; \ - } while ( 0 ) - -u8 adt_make_branch( ADT_Node* node, AllocatorInfo backing, char const* name, b32 is_array ) -{ - ADT_Type type = EADT_TYPE_OBJECT; - if ( is_array ) - type = EADT_TYPE_ARRAY; - - ADT_Node* parent = node->parent; - zero_item( node ); - - node->type = type; - node->name = name; - node->parent = parent; - node->nodes = Array< ADT_Node >::init( backing ); - - if ( ! node->nodes ) - return EADT_ERROR_OUT_OF_MEMORY; - - return 0; -} - -u8 adt_destroy_branch( ADT_Node* node ) -{ - GEN_ASSERT_NOT_NULL( node ); - if ( ( node->type == EADT_TYPE_OBJECT || node->type == EADT_TYPE_ARRAY ) && node->nodes ) - { - for ( sw i = 0; i < node->nodes.num(); ++i ) - { - adt_destroy_branch( node->nodes + i ); - } - - node->nodes.free(); - } - return 0; -} - -u8 adt_make_leaf( ADT_Node* node, char const* name, ADT_Type type ) -{ - GEN_ASSERT( type != EADT_TYPE_OBJECT && type != EADT_TYPE_ARRAY ); - - ADT_Node* parent = node->parent; - zero_item( node ); - - node->type = type; - node->name = name; - node->parent = parent; - return 0; -} - -ADT_Node* adt_find( ADT_Node* node, char const* name, b32 deep_search ) -{ - if ( node->type != EADT_TYPE_OBJECT ) - { - return NULL; - } - - for ( sw i = 0; i < node->nodes.num(); i++ ) - { - if ( ! str_compare( node->nodes[ i ].name, name ) ) - { - return ( node->nodes + i ); - } - } - - if ( deep_search ) - { - for ( sw i = 0; i < node->nodes.num(); i++ ) - { - ADT_Node* res = adt_find( node->nodes + i, name, deep_search ); - - if ( res != NULL ) - return res; - } - } - - return NULL; -} - -internal ADT_Node* _adt_get_value( ADT_Node* node, char const* value ) -{ - switch ( node->type ) - { - case EADT_TYPE_MULTISTRING : - case EADT_TYPE_STRING : - { - if ( node->string && ! str_compare( node->string, value ) ) - { - return node; - } - } - break; - case EADT_TYPE_INTEGER : - case EADT_TYPE_REAL : - { - char back[ 4096 ] = { 0 }; - FileInfo tmp; - - /* allocate a file descriptor for a memory-mapped number to string conversion, input source buffer is not cloned, however. */ - file_stream_open( &tmp, heap(), ( u8* )back, size_of( back ), EFileStream_WRITABLE ); - adt_print_number( &tmp, node ); - - sw fsize = 0; - u8* buf = file_stream_buf( &tmp, &fsize ); - - if ( ! str_compare( ( char const* )buf, value ) ) - { - file_close( &tmp ); - return node; - } - - file_close( &tmp ); - } - break; - default : - break; /* node doesn't support value based lookup */ - } - - return NULL; -} - -internal ADT_Node* _adt_get_field( ADT_Node* node, char* name, char* value ) -{ - for ( sw i = 0; i < node->nodes.num(); i++ ) - { - if ( ! str_compare( node->nodes[ i ].name, name ) ) - { - ADT_Node* child = &node->nodes[ i ]; - if ( _adt_get_value( child, value ) ) - { - return node; /* this object does contain a field of a specified value! */ - } - } - } - - return NULL; -} - -ADT_Node* adt_query( ADT_Node* node, char const* uri ) -{ - GEN_ASSERT_NOT_NULL( uri ); - - if ( *uri == '/' ) - { - uri++; - } - - if ( *uri == 0 ) - { - return node; - } - - if ( ! node || ( node->type != EADT_TYPE_OBJECT && node->type != EADT_TYPE_ARRAY ) ) - { - return NULL; - } - -#if defined EADT_URI_DEBUG || 0 - str_fmt_out( "uri: %s\n", uri ); -#endif - - char * p = ( char* )uri, *b = p, *e = p; - ADT_Node* found_node = NULL; - - b = p; - p = e = ( char* )str_skip( p, '/' ); - char* buf = str_fmt_buf( "%.*s", ( int )( e - b ), b ); - - /* handle field value lookup */ - if ( *b == '[' ) - { - char *l_p = buf + 1, *l_b = l_p, *l_e = l_p, *l_b2 = l_p, *l_e2 = l_p; - l_e = ( char* )str_skip( l_p, '=' ); - l_e2 = ( char* )str_skip( l_p, ']' ); - - if ( ( ! *l_e && node->type != EADT_TYPE_ARRAY ) || ! *l_e2 ) - { - GEN_ASSERT_MSG( 0, "Invalid field value lookup" ); - return NULL; - } - - *l_e2 = 0; - - /* [field=value] */ - if ( *l_e ) - { - *l_e = 0; - l_b2 = l_e + 1; - - /* run a value comparison against our own fields */ - if ( node->type == EADT_TYPE_OBJECT ) - { - found_node = _adt_get_field( node, l_b, l_b2 ); - } - - /* run a value comparison against any child that is an object node */ - else if ( node->type == EADT_TYPE_ARRAY ) - { - for ( sw i = 0; i < node->nodes.num(); i++ ) - { - ADT_Node* child = &node->nodes[ i ]; - if ( child->type != EADT_TYPE_OBJECT ) - { - continue; - } - - found_node = _adt_get_field( child, l_b, l_b2 ); - - if ( found_node ) - break; - } - } - } - /* [value] */ - else - { - for ( sw i = 0; i < node->nodes.num(); i++ ) - { - ADT_Node* child = &node->nodes[ i ]; - if ( _adt_get_value( child, l_b2 ) ) - { - found_node = child; - break; /* we found a matching value in array, ignore the rest of it */ - } - } - } - - /* go deeper if uri continues */ - if ( *e ) - { - return adt_query( found_node, e + 1 ); - } - } - /* handle field name lookup */ - else if ( node->type == EADT_TYPE_OBJECT ) - { - found_node = adt_find( node, buf, false ); - - /* go deeper if uri continues */ - if ( *e ) - { - return adt_query( found_node, e + 1 ); - } - } - /* handle array index lookup */ - else - { - sw idx = ( sw )str_to_i64( buf, NULL, 10 ); - if ( idx >= 0 && idx < node->nodes.num() ) - { - found_node = &node->nodes[ idx ]; - - /* go deeper if uri continues */ - if ( *e ) - { - return adt_query( found_node, e + 1 ); - } - } - } - - return found_node; -} - -ADT_Node* adt_alloc_at( ADT_Node* parent, sw index ) -{ - if ( ! parent || ( parent->type != EADT_TYPE_OBJECT && parent->type != EADT_TYPE_ARRAY ) ) - { - return NULL; - } - - if ( ! parent->nodes ) - return NULL; - - if ( index < 0 || index > parent->nodes.num() ) - return NULL; - - ADT_Node o = { 0 }; - o.parent = parent; - if ( ! parent->nodes.append_at( o, index ) ) - return NULL; - - return parent->nodes + index; -} - -ADT_Node* adt_alloc( ADT_Node* parent ) -{ - if ( ! parent || ( parent->type != EADT_TYPE_OBJECT && parent->type != EADT_TYPE_ARRAY ) ) - { - return NULL; - } - - if ( ! parent->nodes ) - return NULL; - - return adt_alloc_at( parent, parent->nodes.num() ); -} - -b8 adt_set_obj( ADT_Node* obj, char const* name, AllocatorInfo backing ) -{ - return adt_make_branch( obj, backing, name, 0 ); -} - -b8 adt_set_arr( ADT_Node* obj, char const* name, AllocatorInfo backing ) -{ - return adt_make_branch( obj, backing, name, 1 ); -} - -b8 adt_set_str( ADT_Node* obj, char const* name, char const* value ) -{ - adt_make_leaf( obj, name, EADT_TYPE_STRING ); - obj->string = value; - return true; -} - -b8 adt_set_flt( ADT_Node* obj, char const* name, f64 value ) -{ - adt_make_leaf( obj, name, EADT_TYPE_REAL ); - obj->real = value; - return true; -} - -b8 adt_set_int( ADT_Node* obj, char const* name, s64 value ) -{ - adt_make_leaf( obj, name, EADT_TYPE_INTEGER ); - obj->integer = value; - return true; -} - -ADT_Node* adt_move_node_at( ADT_Node* node, ADT_Node* new_parent, sw index ) -{ - GEN_ASSERT_NOT_NULL( node ); - GEN_ASSERT_NOT_NULL( new_parent ); - ADT_Node* old_parent = node->parent; - ADT_Node* new_node = adt_alloc_at( new_parent, index ); - *new_node = *node; - new_node->parent = new_parent; - if ( old_parent ) - { - adt_remove_node( node ); - } - return new_node; -} - -ADT_Node* adt_move_node( ADT_Node* node, ADT_Node* new_parent ) -{ - GEN_ASSERT_NOT_NULL( node ); - GEN_ASSERT_NOT_NULL( new_parent ); - GEN_ASSERT( new_parent->type == EADT_TYPE_ARRAY || new_parent->type == EADT_TYPE_OBJECT ); - return adt_move_node_at( node, new_parent, new_parent->nodes.num() ); -} - -void adt_swap_nodes( ADT_Node* node, ADT_Node* other_node ) -{ - GEN_ASSERT_NOT_NULL( node ); - GEN_ASSERT_NOT_NULL( other_node ); - ADT_Node* parent = node->parent; - ADT_Node* other_parent = other_node->parent; - sw index = ( pointer_diff( parent->nodes, node ) / size_of( ADT_Node ) ); - sw index2 = ( pointer_diff( other_parent->nodes, other_node ) / size_of( ADT_Node ) ); - ADT_Node temp = parent->nodes[ index ]; - temp.parent = other_parent; - other_parent->nodes[ index2 ].parent = parent; - parent->nodes[ index ] = other_parent->nodes[ index2 ]; - other_parent->nodes[ index2 ] = temp; -} - -void adt_remove_node( ADT_Node* node ) -{ - GEN_ASSERT_NOT_NULL( node ); - GEN_ASSERT_NOT_NULL( node->parent ); - ADT_Node* parent = node->parent; - sw index = ( pointer_diff( parent->nodes, node ) / size_of( ADT_Node ) ); - parent->nodes.remove_at( index ); -} - -ADT_Node* adt_append_obj( ADT_Node* parent, char const* name ) -{ - ADT_Node* o = adt_alloc( parent ); - if ( ! o ) - return NULL; - if ( adt_set_obj( o, name, parent->nodes.get_header()->Allocator ) ) - { - adt_remove_node( o ); - return NULL; - } - return o; -} - -ADT_Node* adt_append_arr( ADT_Node* parent, char const* name ) -{ - ADT_Node* o = adt_alloc( parent ); - if ( ! o ) - return NULL; - if ( adt_set_arr( o, name, parent->nodes.get_header()->Allocator ) ) - { - adt_remove_node( o ); - return NULL; - } - return o; -} - -ADT_Node* adt_append_str( ADT_Node* parent, char const* name, char const* value ) -{ - ADT_Node* o = adt_alloc( parent ); - if ( ! o ) - return NULL; - adt_set_str( o, name, value ); - return o; -} - -ADT_Node* adt_append_flt( ADT_Node* parent, char const* name, f64 value ) -{ - ADT_Node* o = adt_alloc( parent ); - if ( ! o ) - return NULL; - adt_set_flt( o, name, value ); - return o; -} - -ADT_Node* adt_append_int( ADT_Node* parent, char const* name, s64 value ) -{ - ADT_Node* o = adt_alloc( parent ); - if ( ! o ) - return NULL; - adt_set_int( o, name, value ); - return o; -} - -/* parser helpers */ -char* adt_parse_number_strict( ADT_Node* node, char* base_str ) -{ - GEN_ASSERT_NOT_NULL( node ); - GEN_ASSERT_NOT_NULL( base_str ); - char *p = base_str, *e = p; - - while ( *e ) - ++e; - - while ( *p && ( str_find( "eE.+-", *p ) || char_is_hex_digit( *p ) ) ) - { - ++p; - } - - if ( p >= e ) - { - return adt_parse_number( node, base_str ); - } - - return base_str; -} - -char* adt_parse_number( ADT_Node* node, char* base_str ) -{ - GEN_ASSERT_NOT_NULL( node ); - GEN_ASSERT_NOT_NULL( base_str ); - char *p = base_str, *e = p; - - s32 base = 0; - s32 base2 = 0; - u8 base2_offset = 0; - s8 exp = 0, orig_exp = 0; - u8 neg_zero = 0; - u8 lead_digit = 0; - ADT_Type node_type = EADT_TYPE_UNINITIALISED; - u8 node_props = 0; - - /* skip false positives and special cases */ - if ( ! ! str_find( "eE", *p ) || ( ! ! str_find( ".+-", *p ) && ! char_is_hex_digit( *( p + 1 ) ) && *( p + 1 ) != '.' ) ) - { - return ++base_str; - } - - node_type = EADT_TYPE_INTEGER; - neg_zero = false; - - sw ib = 0; - char buf[ 48 ] = { 0 }; - - if ( *e == '+' ) - ++e; - else if ( *e == '-' ) - { - buf[ ib++ ] = *e++; - } - - if ( *e == '.' ) - { - node_type = EADT_TYPE_REAL; - node_props = EADT_PROPS_IS_PARSED_REAL; - lead_digit = false; - buf[ ib++ ] = '0'; - do - { - buf[ ib++ ] = *e; - } while ( char_is_digit( *++e ) ); - } - else - { - if ( ! str_compare( e, "0x", 2 ) || ! str_compare( e, "0X", 2 ) ) - { - node_props = EADT_PROPS_IS_HEX; - } - - /* bail if ZPL_ADT_PROPS_IS_HEX is unset but we get 'x' on input */ - if ( char_to_lower( *e ) == 'x' && ( node_props != EADT_PROPS_IS_HEX ) ) - { - return ++base_str; - } - - while ( char_is_hex_digit( *e ) || char_to_lower( *e ) == 'x' ) - { - buf[ ib++ ] = *e++; - } - - if ( *e == '.' ) - { - node_type = EADT_TYPE_REAL; - lead_digit = true; - u32 step = 0; - - do - { - buf[ ib++ ] = *e; - ++step; - } while ( char_is_digit( *++e ) ); - - if ( step < 2 ) - { - buf[ ib++ ] = '0'; - } - } - } - - /* check if we have a dot here, this is a false positive (IP address, ...) */ - if ( *e == '.' ) - { - return ++base_str; - } - - f32 eb = 10; - char expbuf[ 6 ] = { 0 }; - sw expi = 0; - - if ( *e && ! ! str_find( "eE", *e ) ) - { - ++e; - if ( *e == '+' || *e == '-' || char_is_digit( *e ) ) - { - if ( *e == '-' ) - { - eb = 0.1f; - } - if ( ! char_is_digit( *e ) ) - { - ++e; - } - while ( char_is_digit( *e ) ) - { - expbuf[ expi++ ] = *e++; - } - } - - orig_exp = exp = ( u8 )str_to_i64( expbuf, NULL, 10 ); - } - - if ( node_type == EADT_TYPE_INTEGER ) - { - node->integer = str_to_i64( buf, 0, 0 ); -#ifndef GEN_PARSER_DISABLE_ANALYSIS - /* special case: negative zero */ - if ( node->integer == 0 && buf[ 0 ] == '-' ) - { - neg_zero = true; - } -#endif - while ( orig_exp-- > 0 ) - { - node->integer *= ( s64 )eb; - } - } - else - { - node->real = str_to_f64( buf, 0 ); - -#ifndef GEN_PARSER_DISABLE_ANALYSIS - char *q = buf, *base_string = q, *base_string2 = q; - base_string = zpl_cast( char* ) str_skip( base_string, '.' ); - *base_string = '\0'; - base_string2 = base_string + 1; - char* base_string_off = base_string2; - while ( *base_string_off++ == '0' ) - base2_offset++; - - base = ( s32 )str_to_i64( q, 0, 0 ); - base2 = ( s32 )str_to_i64( base_string2, 0, 0 ); - if ( exp ) - { - exp = exp * ( ! ( eb == 10.0f ) ? -1 : 1 ); - node_props = EADT_PROPS_IS_EXP; - } - - /* special case: negative zero */ - if ( base == 0 && buf[ 0 ] == '-' ) - { - neg_zero = true; - } -#endif - while ( orig_exp-- > 0 ) - { - node->real *= eb; - } - } - - node->type = node_type; - node->props = node_props; - -#ifndef GEN_PARSER_DISABLE_ANALYSIS - node->base = base; - node->base2 = base2; - node->base2_offset = base2_offset; - node->exp = exp; - node->neg_zero = neg_zero; - node->lead_digit = lead_digit; -#else - unused( base ); - unused( base2 ); - unused( base2_offset ); - unused( exp ); - unused( neg_zero ); - unused( lead_digit ); -#endif - return e; -} - -ADT_Error adt_print_number( FileInfo* file, ADT_Node* node ) -{ - GEN_ASSERT_NOT_NULL( file ); - GEN_ASSERT_NOT_NULL( node ); - if ( node->type != EADT_TYPE_INTEGER && node->type != EADT_TYPE_REAL ) - { - return EADT_ERROR_INVALID_TYPE; - } - -#ifndef GEN_PARSER_DISABLE_ANALYSIS - if ( node->neg_zero ) - { - _adt_fprintf( file, "-" ); - } -#endif - - switch ( node->type ) - { - case EADT_TYPE_INTEGER : - { - if ( node->props == EADT_PROPS_IS_HEX ) - { - _adt_fprintf( file, "0x%llx", ( long long )node->integer ); - } - else - { - _adt_fprintf( file, "%lld", ( long long )node->integer ); - } - } - break; - - case EADT_TYPE_REAL : - { - if ( node->props == EADT_PROPS_NAN ) - { - _adt_fprintf( file, "NaN" ); - } - else if ( node->props == EADT_PROPS_NAN_NEG ) - { - _adt_fprintf( file, "-NaN" ); - } - else if ( node->props == EADT_PROPS_INFINITY ) - { - _adt_fprintf( file, "Infinity" ); - } - else if ( node->props == EADT_PROPS_INFINITY_NEG ) - { - _adt_fprintf( file, "-Infinity" ); - } - else if ( node->props == EADT_PROPS_TRUE ) - { - _adt_fprintf( file, "true" ); - } - else if ( node->props == EADT_PROPS_FALSE ) - { - _adt_fprintf( file, "false" ); - } - else if ( node->props == EADT_PROPS_NULL ) - { - _adt_fprintf( file, "null" ); -#ifndef GEN_PARSER_DISABLE_ANALYSIS - } - else if ( node->props == EADT_PROPS_IS_EXP ) - { - _adt_fprintf( file, "%lld.%0*d%llde%lld", ( long long )node->base, node->base2_offset, 0, ( long long )node->base2, ( long long )node->exp ); - } - else if ( node->props == EADT_PROPS_IS_PARSED_REAL ) - { - if ( ! node->lead_digit ) - _adt_fprintf( file, ".%0*d%lld", node->base2_offset, 0, ( long long )node->base2 ); - else - _adt_fprintf( file, "%lld.%0*d%lld", ( long long int )node->base2_offset, 0, ( int )node->base, ( long long )node->base2 ); -#endif - } - else - { - _adt_fprintf( file, "%f", node->real ); - } - } - break; - } - - return EADT_ERROR_NONE; -} - -ADT_Error adt_print_string( FileInfo* file, ADT_Node* node, char const* escaped_chars, char const* escape_symbol ) -{ - GEN_ASSERT_NOT_NULL( file ); - GEN_ASSERT_NOT_NULL( node ); - GEN_ASSERT_NOT_NULL( escaped_chars ); - if ( node->type != EADT_TYPE_STRING && node->type != EADT_TYPE_MULTISTRING ) - { - return EADT_ERROR_INVALID_TYPE; - } - - /* escape string */ - char const *p = node->string, *b = p; - - if ( ! p ) - return EADT_ERROR_NONE; - - do - { - p = str_skip_any( p, escaped_chars ); - _adt_fprintf( file, "%.*s", pointer_diff( b, p ), b ); - if ( *p && ! ! str_find( escaped_chars, *p ) ) - { - _adt_fprintf( file, "%s%c", escape_symbol, *p ); - p++; - } - b = p; - } while ( *p ); - - return EADT_ERROR_NONE; -} - -ADT_Error adt_str_to_number( ADT_Node* node ) -{ - GEN_ASSERT( node ); - - if ( node->type == EADT_TYPE_REAL || node->type == EADT_TYPE_INTEGER ) - return EADT_ERROR_ALREADY_CONVERTED; /* this is already converted/parsed */ - if ( node->type != EADT_TYPE_STRING && node->type != EADT_TYPE_MULTISTRING ) - { - return EADT_ERROR_INVALID_TYPE; - } - - adt_parse_number( node, ( char* )node->string ); - - return EADT_ERROR_NONE; -} - -ADT_Error adt_str_to_number_strict( ADT_Node* node ) -{ - GEN_ASSERT( node ); - - if ( node->type == EADT_TYPE_REAL || node->type == EADT_TYPE_INTEGER ) - return EADT_ERROR_ALREADY_CONVERTED; /* this is already converted/parsed */ - if ( node->type != EADT_TYPE_STRING && node->type != EADT_TYPE_MULTISTRING ) - { - return EADT_ERROR_INVALID_TYPE; - } - - adt_parse_number_strict( node, ( char* )node->string ); - - return EADT_ERROR_NONE; -} - -#undef _adt_fprintf - -#pragma endregion ADT - -#pragma region CSV - -#ifdef GEN_CSV_DEBUG -# define GEN_CSV_ASSERT( msg ) GEN_PANIC( msg ) -#else -# define GEN_CSV_ASSERT( msg ) -#endif - - -u8 csv_parse_delimiter( CSV_Object* root, char* text, AllocatorInfo allocator, b32 has_header, char delim ) -{ - CSV_Error error = ECSV_Error__NONE; - GEN_ASSERT_NOT_NULL( root ); - GEN_ASSERT_NOT_NULL( text ); - zero_item( root ); - - adt_make_branch( root, allocator, NULL, has_header ? false : true ); - - char* currentChar = text; - char* beginChar; - char* endChar; - - sw columnIndex = 0; - sw totalColumnIndex = 0; - - do - { - char delimiter = 0; - currentChar = zpl_cast( char* ) str_trim( currentChar, false ); - - if ( *currentChar == 0 ) - break; - - ADT_Node rowItem = { 0 }; - rowItem.type = EADT_TYPE_STRING; - -#ifndef GEN_PARSER_DISABLE_ANALYSIS - rowItem.name_style = EADT_NAME_STYLE_NO_QUOTES; -#endif - - /* handle string literals */ - if ( *currentChar == '"' ) - { - currentChar += 1; - beginChar = currentChar; - endChar = currentChar; - rowItem.string = beginChar; -#ifndef GEN_PARSER_DISABLE_ANALYSIS - rowItem.name_style = EADT_NAME_STYLE_DOUBLE_QUOTE; -#endif - do - { - endChar = zpl_cast( char* ) str_skip( endChar, '"' ); - - if ( *endChar && *( endChar + 1 ) == '"' ) - { - endChar += 2; - } - else - break; - } while ( *endChar ); - - if ( *endChar == 0 ) - { - GEN_CSV_ASSERT( "unmatched quoted string" ); - error = ECSV_Error__UNEXPECTED_END_OF_INPUT; - return error; - } - - *endChar = 0; - currentChar = zpl_cast( char* ) str_trim( endChar + 1, true ); - delimiter = *currentChar; - - /* unescape escaped quotes (so that unescaped text escapes :) */ - { - char* escapedChar = beginChar; - do - { - if ( *escapedChar == '"' && *( escapedChar + 1 ) == '"' ) - { - mem_move( escapedChar, escapedChar + 1, str_len( escapedChar ) ); - } - escapedChar++; - } while ( *escapedChar ); - } - } - else if ( *currentChar == delim ) - { - delimiter = *currentChar; - rowItem.string = ""; - } - else if ( *currentChar ) - { - /* regular data */ - beginChar = currentChar; - endChar = currentChar; - rowItem.string = beginChar; - - do - { - endChar++; - } while ( *endChar && *endChar != delim && *endChar != '\n' ); - - if ( *endChar ) - { - currentChar = zpl_cast( char* ) str_trim( endChar, true ); - - while ( char_is_space( *( endChar - 1 ) ) ) - { - endChar--; - } - - delimiter = *currentChar; - *endChar = 0; - } - else - { - delimiter = 0; - currentChar = endChar; - } - - /* check if number and process if so */ - b32 skip_number = false; - char* num_p = beginChar; - - // We only consider hexadecimal values if they start with 0x - if ( str_len( num_p ) > 2 && num_p[ 0 ] == '0' && ( num_p[ 1 ] == 'x' || num_p[ 1 ] == 'X' ) ) - { - num_p += 2; // skip '0x' prefix - do - { - if ( ! char_is_hex_digit( *num_p ) ) - { - skip_number = true; - break; - } - } while ( *num_p++ ); - } - else - { - skip_number = true; - } - - if ( ! skip_number ) - { - adt_str_to_number( &rowItem ); - } - } - - if ( columnIndex >= root->nodes.num() ) - { - adt_append_arr( root, NULL ); - } - - root->nodes[ columnIndex ].nodes.append( rowItem ); - - if ( delimiter == delim ) - { - columnIndex++; - currentChar++; - } - else if ( delimiter == '\n' || delimiter == 0 ) - { - /* check if number of rows is not mismatched */ - if ( totalColumnIndex < columnIndex ) - totalColumnIndex = columnIndex; - - else if ( totalColumnIndex != columnIndex ) - { - GEN_CSV_ASSERT( "mismatched rows" ); - error = ECSV_Error__MISMATCHED_ROWS; - return error; - } - - columnIndex = 0; - - if ( delimiter != 0 ) - currentChar++; - } - } while ( *currentChar ); - - if ( root->nodes.num() == 0 ) - { - GEN_CSV_ASSERT( "unexpected end of input. stream is empty." ); - error = ECSV_Error__UNEXPECTED_END_OF_INPUT; - return error; - } - - /* consider first row as a header. */ - if ( has_header ) - { - for ( sw i = 0; i < root->nodes.num(); i++ ) - { - CSV_Object* col = root->nodes + i; - CSV_Object* hdr = col->nodes; - col->name = hdr->string; - col->nodes.remove_at( 0 ); - } - } - - return error; -} - -void csv_free( CSV_Object* obj ) -{ - adt_destroy_branch( obj ); -} - -void _csv_write_record( FileInfo* file, CSV_Object* node ) -{ - switch ( node->type ) - { - case EADT_TYPE_STRING : - { -#ifndef GEN_PARSER_DISABLE_ANALYSIS - switch ( node->name_style ) - { - case EADT_NAME_STYLE_DOUBLE_QUOTE : - { - str_fmt_file( file, "\"" ); - adt_print_string( file, node, "\"", "\"" ); - str_fmt_file( file, "\"" ); - } - break; - - case EADT_NAME_STYLE_NO_QUOTES : - { -#endif - str_fmt_file( file, "%s", node->string ); -#ifndef GEN_PARSER_DISABLE_ANALYSIS - } - break; - } -#endif - } - break; - - case EADT_TYPE_REAL : - case EADT_TYPE_INTEGER : - { - adt_print_number( file, node ); - } - break; - } -} - -void _csv_write_header( FileInfo* file, CSV_Object* header ) -{ - CSV_Object temp = *header; - temp.string = temp.name; - temp.type = EADT_TYPE_STRING; - _csv_write_record( file, &temp ); -} - -void csv_write_delimiter( FileInfo* file, CSV_Object* obj, char delimiter ) -{ - GEN_ASSERT_NOT_NULL( file ); - GEN_ASSERT_NOT_NULL( obj ); - GEN_ASSERT( obj->nodes ); - sw cols = obj->nodes.num(); - if ( cols == 0 ) - return; - - sw rows = obj->nodes[ 0 ].nodes.num(); - if ( rows == 0 ) - return; - - b32 has_headers = obj->nodes[ 0 ].name != NULL; - - if ( has_headers ) - { - for ( sw i = 0; i < cols; i++ ) - { - _csv_write_header( file, &obj->nodes[ i ] ); - if ( i + 1 != cols ) - { - str_fmt_file( file, "%c", delimiter ); - } - } - str_fmt_file( file, "\n" ); - } - - for ( sw r = 0; r < rows; r++ ) - { - for ( sw i = 0; i < cols; i++ ) - { - _csv_write_record( file, &obj->nodes[ i ].nodes[ r ] ); - if ( i + 1 != cols ) - { - str_fmt_file( file, "%c", delimiter ); - } - } - str_fmt_file( file, "\n" ); - } -} - -String csv_write_string_delimiter( AllocatorInfo a, CSV_Object* obj, char delimiter ) -{ - FileInfo tmp; - file_stream_new( &tmp, a ); - csv_write_delimiter( &tmp, obj, delimiter ); - sw fsize; - u8* buf = file_stream_buf( &tmp, &fsize ); - String output = String::make_length( a, ( char* )buf, fsize ); - file_close( &tmp ); - return output; -} - -#pragma endregion CSV - -Builder Builder::open( char const* path ) -{ - Builder result; - - FileError error = file_open_mode( &result.File, EFileMode_WRITE, path ); - - if ( error != EFileError_NONE ) - { - log_failure( "gen::File::open - Could not open file: %s", path ); - return result; - } - - result.Buffer = String::make_reserve( GlobalAllocator, Builder_StrBufferReserve ); - - return result; -} - -void Builder::pad_lines( s32 num ) -{ - Buffer.append( "\n" ); -} - -void Builder::print( Code code ) -{ - Buffer.append( code->to_string() ); -} - -void Builder::print_fmt( char const* fmt, ... ) -{ - sw res; - char buf[ GEN_PRINTF_MAXLEN ] = { 0 }; - - va_list va; - va_start( va, fmt ); - res = str_fmt_va( buf, count_of( buf ) - 1, fmt, va ) - 1; - va_end( va ); - - Buffer.append( buf, res ); -} - -void Builder::write() -{ - bool result = file_write( &File, Buffer, Buffer.length() ); - - if ( result == false ) - log_failure( "gen::File::write - Failed to write to file: %s", file_name( &File ) ); - - file_close( &File ); - Buffer.free(); -} - -GEN_NS_END diff --git a/project/gen/gen_builder.hpp b/project/gen/gen_builder.hpp deleted file mode 100644 index 8512d39..0000000 --- a/project/gen/gen_builder.hpp +++ /dev/null @@ -1,448 +0,0 @@ -// This file was generated automatially by gen.bootstrap.cpp (See: https://github.com/Ed94/gencpp) - -#include "gen.hpp" - -GEN_NS_BEGIN - -#pragma region ADT - -enum ADT_Type : u32 -{ - EADT_TYPE_UNINITIALISED, /* node was not initialised, this is a programming error! */ - EADT_TYPE_ARRAY, - EADT_TYPE_OBJECT, - EADT_TYPE_STRING, - EADT_TYPE_MULTISTRING, - EADT_TYPE_INTEGER, - EADT_TYPE_REAL, -}; - -enum ADT_Props : u32 -{ - EADT_PROPS_NONE, - EADT_PROPS_NAN, - EADT_PROPS_NAN_NEG, - EADT_PROPS_INFINITY, - EADT_PROPS_INFINITY_NEG, - EADT_PROPS_FALSE, - EADT_PROPS_TRUE, - EADT_PROPS_NULL, - EADT_PROPS_IS_EXP, - EADT_PROPS_IS_HEX, - - // Used internally so that people can fill in real numbers they plan to write. - EADT_PROPS_IS_PARSED_REAL, -}; - -enum ADT_NamingStyle : u32 -{ - EADT_NAME_STYLE_DOUBLE_QUOTE, - EADT_NAME_STYLE_SINGLE_QUOTE, - EADT_NAME_STYLE_NO_QUOTES, -}; - -enum ADT_AssignStyle : u32 -{ - EADT_ASSIGN_STYLE_COLON, - EADT_ASSIGN_STYLE_EQUALS, - EADT_ASSIGN_STYLE_LINE, -}; - -enum ADT_DelimStyle : u32 -{ - EADT_DELIM_STYLE_COMMA, - EADT_DELIM_STYLE_LINE, - EADT_DELIM_STYLE_NEWLINE, -}; - -enum ADT_Error : u32 -{ - EADT_ERROR_NONE, - EADT_ERROR_INTERNAL, - EADT_ERROR_ALREADY_CONVERTED, - EADT_ERROR_INVALID_TYPE, - EADT_ERROR_OUT_OF_MEMORY, -}; - -struct ADT_Node -{ - char const* name; - struct ADT_Node* parent; - - /* properties */ - ADT_Type type : 4; - u8 props : 4; -#ifndef GEN_PARSER_DISABLE_ANALYSIS - u8 cfg_mode : 1; - u8 name_style : 2; - u8 assign_style : 2; - u8 delim_style : 2; - u8 delim_line_width : 4; - u8 assign_line_width : 4; -#endif - - /* adt data */ - union - { - char const* string; - Array< ADT_Node > nodes; ///< zpl_array - - struct - { - union - { - f64 real; - s64 integer; - }; - -#ifndef GEN_PARSER_DISABLE_ANALYSIS - /* number analysis */ - s32 base; - s32 base2; - u8 base2_offset : 4; - s8 exp : 4; - u8 neg_zero : 1; - u8 lead_digit : 1; -#endif - }; - }; -}; - -/* ADT NODE LIMITS - * delimiter and assignment segment width is limited to 128 whitespace symbols each. - * real number limits decimal position to 128 places. - * real number exponent is limited to 64 digits. - */ - -/** - * @brief Initialise an ADT object or array - * - * @param node - * @param backing Memory allocator used for descendants - * @param name Node's name - * @param is_array - * @return error code - */ -u8 adt_make_branch( ADT_Node* node, AllocatorInfo backing, char const* name, b32 is_array ); - -/** - * @brief Destroy an ADT branch and its descendants - * - * @param node - * @return error code - */ -u8 adt_destroy_branch( ADT_Node* node ); - -/** - * @brief Initialise an ADT leaf - * - * @param node - * @param name Node's name - * @param type Node's type (use zpl_adt_make_branch for container nodes) - * @return error code - */ -u8 adt_make_leaf( ADT_Node* node, char const* name, ADT_Type type ); - - -/** - * @brief Fetch a node using provided URI string. - * - * This method uses a basic syntax to fetch a node from the ADT. The following features are available - * to retrieve the data: - * - * - "a/b/c" navigates through objects "a" and "b" to get to "c" - * - "arr/[foo=123]/bar" iterates over "arr" to find any object with param "foo" that matches the value "123", then gets its field called "bar" - * - "arr/3" retrieves the 4th element in "arr" - * - "arr/[apple]" retrieves the first element of value "apple" in "arr" - * - * @param node ADT node - * @param uri Locator string as described above - * @return zpl_adt_node* - * - * @see code/apps/examples/json_get.c - */ -ADT_Node* adt_query( ADT_Node* node, char const* uri ); - -/** - * @brief Find a field node within an object by the given name. - * - * @param node - * @param name - * @param deep_search Perform search recursively - * @return zpl_adt_node * node - */ -ADT_Node* adt_find( ADT_Node* node, char const* name, b32 deep_search ); - -/** - * @brief Allocate an unitialised node within a container at a specified index. - * - * @param parent - * @param index - * @return zpl_adt_node * node - */ -ADT_Node* adt_alloc_at( ADT_Node* parent, sw index ); - -/** - * @brief Allocate an unitialised node within a container. - * - * @param parent - * @return zpl_adt_node * node - */ -ADT_Node* adt_alloc( ADT_Node* parent ); - -/** - * @brief Move an existing node to a new container at a specified index. - * - * @param node - * @param new_parent - * @param index - * @return zpl_adt_node * node - */ -ADT_Node* adt_move_node_at( ADT_Node* node, ADT_Node* new_parent, sw index ); - -/** - * @brief Move an existing node to a new container. - * - * @param node - * @param new_parent - * @return zpl_adt_node * node - */ -ADT_Node* adt_move_node( ADT_Node* node, ADT_Node* new_parent ); - -/** - * @brief Swap two nodes. - * - * @param node - * @param other_node - * @return - */ -void adt_swap_nodes( ADT_Node* node, ADT_Node* other_node ); - -/** - * @brief Remove node from container. - * - * @param node - * @return - */ -void adt_remove_node( ADT_Node* node ); - -/** - * @brief Initialise a node as an object - * - * @param obj - * @param name - * @param backing - * @return - */ -b8 adt_set_obj( ADT_Node* obj, char const* name, AllocatorInfo backing ); - -/** - * @brief Initialise a node as an array - * - * @param obj - * @param name - * @param backing - * @return - */ -b8 adt_set_arr( ADT_Node* obj, char const* name, AllocatorInfo backing ); - -/** - * @brief Initialise a node as a string - * - * @param obj - * @param name - * @param value - * @return - */ -b8 adt_set_str( ADT_Node* obj, char const* name, char const* value ); - -/** - * @brief Initialise a node as a float - * - * @param obj - * @param name - * @param value - * @return - */ -b8 adt_set_flt( ADT_Node* obj, char const* name, f64 value ); - -/** - * @brief Initialise a node as a signed integer - * - * @param obj - * @param name - * @param value - * @return - */ -b8 adt_set_int( ADT_Node* obj, char const* name, s64 value ); - -/** - * @brief Append a new node to a container as an object - * - * @param parent - * @param name - * @return* - */ -ADT_Node* adt_append_obj( ADT_Node* parent, char const* name ); - -/** - * @brief Append a new node to a container as an array - * - * @param parent - * @param name - * @return* - */ -ADT_Node* adt_append_arr( ADT_Node* parent, char const* name ); - -/** - * @brief Append a new node to a container as a string - * - * @param parent - * @param name - * @param value - * @return* - */ -ADT_Node* adt_append_str( ADT_Node* parent, char const* name, char const* value ); - -/** - * @brief Append a new node to a container as a float - * - * @param parent - * @param name - * @param value - * @return* - */ -ADT_Node* adt_append_flt( ADT_Node* parent, char const* name, f64 value ); - -/** - * @brief Append a new node to a container as a signed integer - * - * @param parent - * @param name - * @param value - * @return* - */ -ADT_Node* adt_append_int( ADT_Node* parent, char const* name, s64 value ); - -/* parser helpers */ - -/** - * @brief Parses a text and stores the result into an unitialised node. - * - * @param node - * @param base - * @return* - */ -char* adt_parse_number( ADT_Node* node, char* base ); - -/** - * @brief Parses a text and stores the result into an unitialised node. - * This function expects the entire input to be a number. - * - * @param node - * @param base - * @return* - */ -char* adt_parse_number_strict( ADT_Node* node, char* base_str ); - -/** - * @brief Parses and converts an existing string node into a number. - * - * @param node - * @return - */ -ADT_Error adt_str_to_number( ADT_Node* node ); - -/** - * @brief Parses and converts an existing string node into a number. - * This function expects the entire input to be a number. - * - * @param node - * @return - */ -ADT_Error adt_str_to_number_strict( ADT_Node* node ); - -/** - * @brief Prints a number into a file stream. - * - * The provided file handle can also be a memory mapped stream. - * - * @see zpl_file_stream_new - * @param file - * @param node - * @return - */ -ADT_Error adt_print_number( FileInfo* file, ADT_Node* node ); - -/** - * @brief Prints a string into a file stream. - * - * The provided file handle can also be a memory mapped stream. - * - * @see zpl_file_stream_new - * @param file - * @param node - * @param escaped_chars - * @param escape_symbol - * @return - */ -ADT_Error adt_print_string( FileInfo* file, ADT_Node* node, char const* escaped_chars, char const* escape_symbol ); - -#pragma endregion ADT - -#pragma region CSV - -enum CSV_Error : u32 -{ - ECSV_Error__NONE, - ECSV_Error__INTERNAL, - ECSV_Error__UNEXPECTED_END_OF_INPUT, - ECSV_Error__MISMATCHED_ROWS, -}; - -typedef ADT_Node CSV_Object; - -GEN_DEF_INLINE u8 csv_parse( CSV_Object* root, char* text, AllocatorInfo allocator, b32 has_header ); -u8 csv_parse_delimiter( CSV_Object* root, char* text, AllocatorInfo allocator, b32 has_header, char delim ); -void csv_free( CSV_Object* obj ); - -GEN_DEF_INLINE void csv_write( FileInfo* file, CSV_Object* obj ); -GEN_DEF_INLINE String csv_write_string( AllocatorInfo a, CSV_Object* obj ); -void csv_write_delimiter( FileInfo* file, CSV_Object* obj, char delim ); -String csv_write_string_delimiter( AllocatorInfo a, CSV_Object* obj, char delim ); - -/* inline */ - -GEN_IMPL_INLINE u8 csv_parse( CSV_Object* root, char* text, AllocatorInfo allocator, b32 has_header ) -{ - return csv_parse_delimiter( root, text, allocator, has_header, ',' ); -} - -GEN_IMPL_INLINE void csv_write( FileInfo* file, CSV_Object* obj ) -{ - csv_write_delimiter( file, obj, ',' ); -} - -GEN_IMPL_INLINE String csv_write_string( AllocatorInfo a, CSV_Object* obj ) -{ - return csv_write_string_delimiter( a, obj, ',' ); -} - -#pragma endregion CSV - -struct Builder -{ - FileInfo File; - String Buffer; - - static Builder open( char const* path ); - - void pad_lines( s32 num ); - - void print( Code ); - void print_fmt( char const* fmt, ... ); - - void write(); -}; - -GEN_NS_END diff --git a/scripts/bootstrap.ci.ps1 b/scripts/bootstrap.ci.ps1 index b895073..0b85269 100644 --- a/scripts/bootstrap.ci.ps1 +++ b/scripts/bootstrap.ci.ps1 @@ -64,6 +64,7 @@ Push-location $path_project 'gen.hpp', 'gen.cpp', 'gen_dep.hpp', 'gen_dep.cpp', 'gen_builder.hpp', 'gen_builder.cpp' + 'gen_scanner.hpp', 'gen_scanner.cpp' ) $exclude = $null diff --git a/singleheader/gen.singleheader.cpp b/singleheader/gen.singleheader.cpp index 81240ed..6db6989 100644 --- a/singleheader/gen.singleheader.cpp +++ b/singleheader/gen.singleheader.cpp @@ -2,9 +2,17 @@ #define GEN_ENFORCE_STRONG_CODE_TYPES #define GEN_EXPOSE_BACKEND #include "gen.cpp" -#include "file_processors/scanner.hpp" + #include "helpers/helper.hpp" +GEN_NS_BEGIN +#include "dependencies/parsing.cpp" +GEN_NS_END + +#include "file_processors/builder.hpp" +#include "file_processors/builder.cpp" +#include "file_processors/scanner.hpp" + using namespace gen; constexpr char const* generation_notice = @@ -41,30 +49,25 @@ global bool generate_scanner = true; int gen_main() { +#define project_dir "../project/" gen::init(); -#define project_dir "../project/" - - Code push_ignores = scan_file( project_dir "helpers/push_ignores.inline.hpp" ); - Code pop_ignores = scan_file( project_dir "helpers/pop_ignores.inline.hpp" ); - - Code header_start = scan_file( "components/header_start.hpp" ); + Code push_ignores = scan_file( project_dir "helpers/push_ignores.inline.hpp" ); + Code pop_ignores = scan_file( project_dir "helpers/pop_ignores.inline.hpp" ); + Code single_header_start = scan_file( "components/header_start.hpp" ); Builder header = Builder::open( "gen/gen.hpp" ); - header.print( generation_notice ); - header.print( push_ignores ); - + header.print_fmt( generation_notice ); header.print_fmt("#pragma once\n\n"); + header.print( push_ignores ); // Headers { - header.print( header_start ); + header.print( single_header_start ); if ( generate_gen_dep ) { - header.print_fmt( roll_own_dependencies_guard_start ); - Code header_start = scan_file( project_dir "dependencies/header_start.hpp" ); Code macros = scan_file( project_dir "dependencies/macros.hpp" ); Code basic_types = scan_file( project_dir "dependencies/basic_types.hpp" ); @@ -78,8 +81,10 @@ int gen_main() Code file_handling = scan_file( project_dir "dependencies/file_handling.hpp" ); Code timing = scan_file( project_dir "dependencies/timing.hpp" ); + header.print_fmt( roll_own_dependencies_guard_start ); header.print( header_start ); header.print_fmt( "GEN_NS_BEGIN\n\n" ); + header.print( macros ); header.print( basic_types ); header.print( debug ); @@ -91,8 +96,15 @@ int gen_main() header.print( string ); header.print( file_handling ); header.print( timing ); - header.print_fmt( "GEN_NS_END\n" ); + if ( generate_scanner ) + { + header.print_fmt( "pragma region Parsing\n\n" ); + header.print( scan_file( project_dir "dependencies/parsing.hpp" ) ); + header.print_fmt( "pragma endregion Parsing\n\n" ); + } + + header.print_fmt( "GEN_NS_END\n" ); header.print_fmt( roll_own_dependencies_guard_end ); } @@ -124,10 +136,27 @@ int gen_main() header.print( interface ); - header.print_fmt( inlines ); - header.print_fmt( ast_inlines ); + header.print_fmt( "#pragma region Inlines\n\n" ); + header.print( inlines ); + header.print( ast_inlines ); + header.print_fmt( "#pragma endregion Inlines\n\n" ); header.print( header_end ); + + if ( generate_builder ) + { + header.print_fmt( "#pragma region Builder\n\n" ); + header.print( scan_file( project_dir "file_processors/builder.hpp" ) ); + header.print_fmt( "#pragma endregion Builder\n\n" ); + } + + if ( generate_scanner ) + { + header.print_fmt( "#pragma region Scanner\n\n" ); + header.print( scan_file( project_dir "file_processors/scanner.hpp" ) ); + header.print_fmt( "#pragma endregion Scanner\n\n" ); + } + header.print_fmt( "GEN_NS_END\n" ); } @@ -137,7 +166,7 @@ int gen_main() if ( generate_gen_dep ) { - Code impl_start = scan_file( project_dir "dependencies/impl_start.cpp" ); + Code impl_start = scan_file( project_dir "dependencies/src_start.cpp" ); Code debug = scan_file( project_dir "dependencies/debug.cpp" ); Code string_ops = scan_file( project_dir "dependencies/string_ops.cpp" ); Code printing = scan_file( project_dir "dependencies/printing.cpp" ); @@ -162,11 +191,18 @@ int gen_main() header.print( file_handling ); header.print( timing ); + if ( generate_scanner ) + { + header.print_fmt( "#pragma region Parsing\n\n" ); + header.print( scan_file( project_dir "dependencies/parsing.cpp" ) ); + header.print_fmt( "#pragma endregion Parsing\n\n" ); + } + header.print_fmt( "GEN_NS_END\n"); header.print_fmt( roll_own_dependencies_guard_end ); } - Code data = scan_file( project_dir "components/static_data.cpp" ); + Code static_data = scan_file( project_dir "components/static_data.cpp" ); Code ast_case_macros = scan_file( project_dir "components/ast_case_macros.cpp" ); Code ast = scan_file( project_dir "components/ast.cpp" ); Code interface = scan_file( project_dir "components/interface.cpp" ); @@ -178,14 +214,39 @@ int gen_main() CodeNamespace parser_nspace = def_namespace( name(Parser), def_namespace_body( args(etoktype)) ); header.print_fmt( "GEN_NS_BEGIN\n\n"); - header.print( data ); + header.print( static_data ); + + header.print_fmt( "#pragma region AST\n\n" ); header.print( ast_case_macros ); header.print( ast ); + header.print_fmt( "#pragma endregion AST\n\n" ); + + header.print_fmt( "#pragma region Interface\n\n" ); header.print( interface ); header.print( upfront ); + header.print_fmt( "#pragma region Parsing\n\n" ); header.print( parser_nspace ); header.print( parsing ); + header.print_fmt( "#pragma endregion Parsing\n\n" ); header.print( untyped ); + header.print_fmt( "#pragma endregion Interface\n\n"); + + if ( generate_builder ) + { + header.print_fmt( "#pragma region Builder\n\n" ); + header.print( scan_file( project_dir "file_processors/builder.cpp" ) ); + header.print_fmt( "#pragma endregion Builder\n\n" ); + } + +#if 0 + if ( generate_scanner ) + { + header.print_fmt( "#pragma region Scanner\n\n" ); + header.print( scan_file( project_dir "file_processors/scanner.cpp" ) ); + header.print_fmt( "#pragma endregion Scanner\n\n" ); + } +#endif + header.print_fmt( "GEN_NS_END\n"); header.print_fmt( "%s\n", (char const*) implementation_guard_end ); @@ -196,4 +257,5 @@ int gen_main() gen::deinit(); return 0; +#undef project_dir }