From ba1dd1894a91c3a2de26e67e65c1874b9df08e94 Mon Sep 17 00:00:00 2001 From: Ed_ Date: Tue, 3 Dec 2024 18:47:12 -0500 Subject: [PATCH] WIP (Broken): Major changes to handling Strings in ast (StringCached defined as StrC) --- project/auxillary/builder.cpp | 2 +- project/auxillary/scanner.hpp | 7 +- project/components/ast.cpp | 51 +++-- project/components/code_serialization.cpp | 16 +- project/components/code_types.hpp | 6 +- project/components/gen/ecode.hpp | 3 +- project/components/gen/eoperator.hpp | 3 +- project/components/gen/especifier.hpp | 3 +- project/components/gen/etoktype.cpp | 3 +- project/components/interface.cpp | 10 +- project/components/interface.untyped.cpp | 4 +- project/components/interface.upfront.cpp | 2 +- project/components/lexer.cpp | 12 +- project/components/parser.cpp | 18 +- project/dependencies/containers.hpp | 8 +- project/dependencies/macros.hpp | 6 +- project/dependencies/strings.hpp | 218 +++++++++++++++++----- project/helpers/helper.hpp | 32 ++-- 18 files changed, 255 insertions(+), 149 deletions(-) diff --git a/project/auxillary/builder.cpp b/project/auxillary/builder.cpp index 1960306..f85cef0 100644 --- a/project/auxillary/builder.cpp +++ b/project/auxillary/builder.cpp @@ -43,7 +43,7 @@ void Builder::print_fmt( char const* fmt, ... ) va_end( va ); // log_fmt( "$%s - print_fmt: %.*s\n", File.filename, res > 80 ? 80 : res, buf ); - append( & Buffer, buf, res ); + append( (String*) & Buffer, (char const*)buf, res ); } void Builder::write() diff --git a/project/auxillary/scanner.hpp b/project/auxillary/scanner.hpp index 0cac91c..8ae44fa 100644 --- a/project/auxillary/scanner.hpp +++ b/project/auxillary/scanner.hpp @@ -39,7 +39,7 @@ Code scan_file( char const* path ) const StrC def_intellisense = txt("GEN_INTELLISENSE_DIRECTIVES" ); bool found_directive = false; - char const* scanner = str.Data; + char const* scanner = (char const*)str; s32 left = fsize; while ( left ) { @@ -94,7 +94,7 @@ Code scan_file( char const* path ) move_fwd(); // sptr skip_size = fsize - left; - if ( (scanner + 2) >= ( str.Data + fsize ) ) + if ( (scanner + 2) >= ( (char const*) str + fsize ) ) { mem_move( str, scanner, left ); get_header(str)->Length = left; @@ -106,7 +106,6 @@ Code scan_file( char const* path ) break; } - } move_fwd(); @@ -117,7 +116,7 @@ Code scan_file( char const* path ) } file_close( & file ); - return untyped_str( str ); + return untyped_str( to_strc(str) ); } #if 0 diff --git a/project/components/ast.cpp b/project/components/ast.cpp index b1d5cd2..681c997 100644 --- a/project/components/ast.cpp +++ b/project/components/ast.cpp @@ -621,9 +621,9 @@ bool is_equal( Code self, Code other ) "AST : %S\n" \ "Other: %S\n" \ , debug_str(self) \ - ,debug_str(other) \ + ,debug_str(other) \ ); \ - \ + \ return false; \ } @@ -634,7 +634,7 @@ bool is_equal( Code self, Code other ) "AST : %S\n" \ "Other: %S\n" \ , debug_str(self) \ - ,debug_str(other) \ + ,debug_str(other) \ ); \ \ return false; \ @@ -647,15 +647,15 @@ bool is_equal( Code self, Code other ) "AST : %S\n" \ "Other: %S\n" \ , debug_str(self) \ - ,debug_str(other) \ + ,debug_str(other) \ ); \ \ log_fmt("Content cannot be trusted to be unique with this check " \ "so it must be verified by eye for now\n" \ "AST Content:\n%S\n" \ "Other Content:\n%S\n" \ - , visualize_whitespace(self->content) \ - , visualize_whitespace(other->content) \ + , visualize_whitespace(self->content, GlobalAllocator) \ + , visualize_whitespace(other->content, GlobalAllocator) \ ); \ } @@ -669,14 +669,14 @@ bool is_equal( Code self, Code other ) "Other: %s\n" \ "For ast member: %s\n" \ , debug_str(self) \ - , debug_str(other) \ - , debug_str(self->ast) \ + , debug_str(other) \ + , debug_str(self->ast) \ ); \ \ return false; \ } \ \ - if ( ! is_equal(self->ast, other->ast ) ) \ + if ( ! is_equal(self->ast, other->ast ) ) \ { \ log_fmt( "\nAST::is_equal: Failed for " #ast"\n" \ "AST : %S\n" \ @@ -684,9 +684,9 @@ bool is_equal( Code self, Code other ) "For ast member: %S\n" \ "other's ast member: %S\n" \ , debug_str(self) \ - , debug_str(other) \ - , debug_str(self->ast) \ - , debug_str(other->ast) \ + , debug_str(other) \ + , debug_str(self->ast) \ + , debug_str(other->ast) \ ); \ \ return false; \ @@ -711,7 +711,6 @@ bool is_equal( Code self, Code other ) case CT_Untyped: { check_member_content( Content ); - return true; } @@ -1147,19 +1146,19 @@ bool is_equal( Code self, Code other ) bool validate_body(Code self) { -#define CheckEntries( Unallowed_Types ) \ - do \ - { \ - for ( Code entry : cast(CodeBody, self) ) \ - { \ - switch ( entry->Type ) \ - { \ - Unallowed_Types \ - log_failure( "AST::validate_body: Invalid entry in body %s", GEN_NS debug_str(entry) ); \ - return false; \ - } \ - } \ - } \ +#define CheckEntries( Unallowed_Types ) \ + do \ + { \ + for ( Code entry : cast(CodeBody, self) ) \ + { \ + switch ( entry->Type ) \ + { \ + Unallowed_Types \ + log_failure( "AST::validate_body: Invalid entry in body %s", GEN_NS debug_str(entry) ); \ + return false; \ + } \ + } \ + } \ while (0); switch ( self->Type ) diff --git a/project/components/code_serialization.cpp b/project/components/code_serialization.cpp index 6a5ef38..bb9b43a 100644 --- a/project/components/code_serialization.cpp +++ b/project/components/code_serialization.cpp @@ -4,7 +4,7 @@ #endif String to_string(CodeAttributes attributes) { - return GEN_NS duplicate( attributes->Content, GlobalAllocator ); + return {(char*) duplicate( attributes->Content, GlobalAllocator ).Ptr}; } String to_string(CodeBody body) @@ -68,7 +68,7 @@ void to_string_export( CodeBody body, String* result ) String to_string(CodeComment comment) { - return GEN_NS duplicate( comment->Content, GlobalAllocator ); + return {(char*) duplicate( comment->Content, GlobalAllocator ).Ptr}; } String to_string(CodeConstructor self) @@ -105,7 +105,7 @@ void to_string_def(CodeConstructor self, String* result ) append_fmt( result, " : %S", GEN_NS to_string(self->InitializerList) ); if ( self->InlineCmt ) - append_fmt( result, " // %S", self->InlineCmt->Content ); + append_fmt( result, " // %s", self->InlineCmt->Content.Ptr ); append_fmt( result, "\n{\n%S\n}\n", GEN_NS to_string(self->Body) ); } @@ -129,7 +129,7 @@ void to_string_fwd(CodeConstructor self, String* result ) append_fmt( result, " = %S", GEN_NS to_string(self->Body) ); if ( self->InlineCmt ) - append_fmt( result, "; // %S\n", self->InlineCmt->Content ); + append_fmt( result, "; // %s\n", self->InlineCmt->Content.ptr ); else append( result, ";\n" ); } @@ -187,7 +187,7 @@ void to_string_def( CodeClass self, String* result ) if ( ast->InlineCmt ) { - append_fmt( result, " // %S", ast->InlineCmt->Content ); + append_fmt( result, " // %s", ast->InlineCmt->Content.Ptr ); } append_fmt( result, "\n{\n%S\n}", GEN_NS to_string(ast->Body) ); @@ -417,7 +417,7 @@ void to_string_class_fwd(CodeEnum self, String* result ) String to_string(CodeExec exec) { - return GEN_NS duplicate( exec->Content, GlobalAllocator ); + return {(char*) duplicate( exec->Content, GlobalAllocator ).Ptr}; } void to_string(CodeExtern self, String* result ) @@ -774,7 +774,7 @@ void to_string_def(CodeOpCast self, String* result ) } } - if ( self->Name && length(self->Name) ) + if ( self->Name && self->Name.Len ) append_fmt( result, "%Soperator %S()", self->Name, to_string(self->ValueType) ); else append_fmt( result, "operator %S()", to_string(self->ValueType) ); @@ -792,7 +792,7 @@ void to_string_def(CodeOpCast self, String* result ) return; } - if ( self->Name && length(self->Name) ) + if ( self->Name && self->Name.Len ) append_fmt( result, "%Soperator %S()\n{\n%S\n}\n", self->Name, to_string(self->ValueType), GEN_NS to_string(self->Body) ); else append_fmt( result, "operator %S()\n{\n%S\n}\n", to_string(self->ValueType), GEN_NS to_string(self->Body) ); diff --git a/project/components/code_types.hpp b/project/components/code_types.hpp index 6c5a380..3bdd601 100644 --- a/project/components/code_types.hpp +++ b/project/components/code_types.hpp @@ -211,9 +211,9 @@ struct CodeSpecifiers #if GEN_SUPPORT_CPP_MEMBER_FEATURES Using_Code( CodeSpecifiers ); - bool append( SpecifierT spec ) { return GEN_NS append(* this, spec); } - s32 has( SpecifierT spec ) { return GEN_NS has(* this, spec); } - s32 remove( SpecifierT to_remove ) { return GEN_NS remove(* this, to_remove); } + bool append( Specifier spec ) { return GEN_NS append(* this, spec); } + s32 has( Specifier spec ) { return GEN_NS has(* this, spec); } + s32 remove( Specifier to_remove ) { return GEN_NS remove(* this, to_remove); } String to_string() { return GEN_NS to_string(* this ); } void to_string( String& result ) { return GEN_NS to_string(* this, & result); } #endif diff --git a/project/components/gen/ecode.hpp b/project/components/gen/ecode.hpp index a024735..20dda85 100644 --- a/project/components/gen/ecode.hpp +++ b/project/components/gen/ecode.hpp @@ -5,8 +5,6 @@ // This file was generated automatially by gencpp's bootstrap.cpp (See: https://github.com/Ed94/gencpp) -typedef enum CodeType_Def CodeType; - enum CodeType_Def : u32 { CT_Invalid, @@ -71,6 +69,7 @@ enum CodeType_Def : u32 CT_Variable, CT_NumTypes }; +typedef enum CodeType_Def CodeType; inline StrC to_str( CodeType type ) { diff --git a/project/components/gen/eoperator.hpp b/project/components/gen/eoperator.hpp index a2f63b3..742e54f 100644 --- a/project/components/gen/eoperator.hpp +++ b/project/components/gen/eoperator.hpp @@ -5,8 +5,6 @@ // This file was generated automatially by gencpp's bootstrap.cpp (See: https://github.com/Ed94/gencpp) -typedef enum Operator_Def Operator; - enum Operator_Def : u32 { Op_Invalid, @@ -58,6 +56,7 @@ enum Operator_Def : u32 Op_DeleteArray, NumOps }; +typedef enum Operator_Def Operator; inline StrC to_str( Operator op ) { diff --git a/project/components/gen/especifier.hpp b/project/components/gen/especifier.hpp index c00b99f..04e6f24 100644 --- a/project/components/gen/especifier.hpp +++ b/project/components/gen/especifier.hpp @@ -5,8 +5,6 @@ // This file was generated automatially by gencpp's bootstrap.cpp (See: https://github.com/Ed94/gencpp) -typedef enum Specifier_Def Specifier; - enum Specifier_Def : u32 { Spec_Invalid, @@ -37,6 +35,7 @@ enum Specifier_Def : u32 Spec_Volatile, Spec_NumSpecifiers }; +typedef enum Specifier_Def Specifier; inline bool is_trailing( Specifier specifier ) { diff --git a/project/components/gen/etoktype.cpp b/project/components/gen/etoktype.cpp index 99da505..7116eaf 100644 --- a/project/components/gen/etoktype.cpp +++ b/project/components/gen/etoktype.cpp @@ -8,8 +8,6 @@ GEN_NS_PARSER_BEGIN #define GEN_DEFINE_ATTRIBUTE_TOKENS Entry( Tok_Attribute_API_Export, "GEN_API_Export_Code" ) Entry( Tok_Attribute_API_Import, "GEN_API_Import_Code" ) -typedef enum TokType_Def TokType; - enum TokType_Def : u32 { Tok_Invalid, @@ -111,6 +109,7 @@ enum TokType_Def : u32 Tok_Attribute_API_Import, Tok_NumTokens }; +typedef enum TokType_Def TokType; inline StrC to_str( TokType type ) { diff --git a/project/components/interface.cpp b/project/components/interface.cpp index b0ece77..e0d6c69 100644 --- a/project/components/interface.cpp +++ b/project/components/interface.cpp @@ -74,9 +74,9 @@ void* Global_Allocator_Proc( void* allocator_data, AllocType type, ssize size, s internal void define_constants() { - Code_Global = make_code(); - scast(String, Code_Global->Name) = get_cached_string( txt("Global Code") ); - scast(String, Code_Global->Content) = Code_Global->Name; + Code_Global = make_code(); + Code_Global->Name = get_cached_string( txt("Global Code") ); + Code_Global->Content = Code_Global->Name; Code_Invalid = make_code(); set_global(Code_Invalid); @@ -403,9 +403,9 @@ StringCached get_cached_string( StrC str ) } String result = string_make( get_string_allocator( str.Len ), str ); - set(& StringCache, key, result ); + set(& StringCache, key, { length(result), result } ); - return result; + return { length(result), result }; } // Used internally to retireve a Code object form the CodePool. diff --git a/project/components/interface.untyped.cpp b/project/components/interface.untyped.cpp index 6e6e9d6..2c1a976 100644 --- a/project/components/interface.untyped.cpp +++ b/project/components/interface.untyped.cpp @@ -11,13 +11,13 @@ ssize token_fmt_va( char* buf, usize buf_size, s32 num_tokens, va_list va ) local_persist Arena tok_map_arena; - HashTable tok_map; + HashTable(StrC) tok_map; { local_persist char tok_map_mem[ TokenFmt_TokenMap_MemSize ]; tok_map_arena = arena_init_from_memory( tok_map_mem, sizeof(tok_map_mem) ); - tok_map = hashtable_init( allocator_info(& tok_map_arena) ); + tok_map = hashtable_init(StrC, allocator_info(& tok_map_arena) ); s32 left = num_tokens - 1; diff --git a/project/components/interface.upfront.cpp b/project/components/interface.upfront.cpp index 55bfc75..ae386b0 100644 --- a/project/components/interface.upfront.cpp +++ b/project/components/interface.upfront.cpp @@ -485,7 +485,7 @@ CodeComment def_comment( StrC content ) Code result = make_code(); result->Type = CT_Comment; - result->Name = get_cached_string( cmt_formatted ); + result->Name = get_cached_string( { length(cmt_formatted), cmt_formatted } ); result->Content = result->Name; free(& cmt_formatted); diff --git a/project/components/lexer.cpp b/project/components/lexer.cpp index 5eb32d1..92bb0a9 100644 --- a/project/components/lexer.cpp +++ b/project/components/lexer.cpp @@ -357,7 +357,7 @@ s32 lex_preprocessor_directive( LexContext* ctx ) , current , preprocess_content.Line , preprocess_content.Column - , directive_str.Data + , (char*) directive_str ); return Lex_ReturnNull; } @@ -578,11 +578,11 @@ TokArray lex( StrC content ) return { {}, 0 }; } - foreach( StringCached, entry, PreprocessorDefines ) + foreach( StringCached*, entry, PreprocessorDefines ) { s32 length = 0; - char const* scanner = entry.Data; - while ( GEN_NS length(entry) > length && (char_is_alphanumeric( *scanner ) || *scanner == '_') ) + char const* scanner = * entry; + while ( entry->Len > length && (char_is_alphanumeric( *scanner ) || *scanner == '_') ) { c.scanner++; length ++; @@ -592,8 +592,8 @@ TokArray lex( StrC content ) length++; } - u64 key = crc32( entry.Data, length ); - set(& c.defines, key, (StrC)entry ); + u64 key = crc32( * entry, length ); + set(& c.defines, key, (StrC) * entry ); } clear(Tokens); diff --git a/project/components/parser.cpp b/project/components/parser.cpp index 39ec443..348886d 100644 --- a/project/components/parser.cpp +++ b/project/components/parser.cpp @@ -669,7 +669,7 @@ CodeAttributes parse_attributes() Code result = make_code(); result->Type = CT_PlatformAttributes; - result->Name = get_cached_string( name_stripped ); + result->Name = get_cached_string( { length(name_stripped), name_stripped } ); result->Content = result->Name; // result->Token = @@ -1047,10 +1047,10 @@ CodeBody parse_class_struct_body( TokType which, Token name ) if ( attributes ) { - String fused = string_make_reserve( GlobalAllocator, length(attributes->Content) + length(more_attributes->Content) ); + String fused = string_make_reserve( GlobalAllocator, attributes->Content.Len + more_attributes->Content.Len ); append_fmt( & fused, "%S %S", attributes->Content, more_attributes->Content ); - attributes->Name = get_cached_string(fused); + attributes->Name = get_cached_string({ length(fused), fused }); attributes->Content = attributes->Name; // } @@ -1344,7 +1344,7 @@ CodeDefine parse_define() return define; } - define->Content = get_cached_string( strip_formatting( to_str(currtok), strip_formatting_dont_preserve_newlines ) ); + define->Content = get_cached_string( to_strc( strip_formatting( to_str(currtok), strip_formatting_dont_preserve_newlines )) ); eat( Tok_Preprocess_Content ); // #define @@ -1494,7 +1494,7 @@ CodeFn parse_function_after_name( CodeFn result = (CodeFn) make_code(); - result->Name = get_cached_string( name_stripped ); + result->Name = get_cached_string( to_strc(name_stripped) ); result->ModuleFlags = mflags; if ( body ) @@ -2730,7 +2730,7 @@ CodeParam parse_params( bool use_template_capture ) eat( currtok.Type ); } - value = untyped_str( strip_formatting( to_str(value_tok), strip_formatting_dont_preserve_newlines ) ); + value = untyped_str( to_strc(strip_formatting( to_str(value_tok), strip_formatting_dont_preserve_newlines )) ); // ( = } } @@ -2845,7 +2845,7 @@ CodeParam parse_params( bool use_template_capture ) eat( currtok.Type ); } - value = untyped_str( strip_formatting( to_str(value_tok), strip_formatting_dont_preserve_newlines ) ); + value = untyped_str( to_strc(strip_formatting( to_str(value_tok), strip_formatting_dont_preserve_newlines )) ); // ( = , = } // ( = , = , .. @@ -4132,7 +4132,7 @@ CodeOpCast parse_operator_cast( CodeSpecifiers specifiers ) Code type = parse_type(); // :: ... operator - Context.Scope->Name = { type->Name.Data, length(type->Name) }; + Context.Scope->Name = { type->Name.Ptr, type->Name.Len }; eat( Tok_Capture_Start ); eat( Tok_Capture_End ); @@ -4820,7 +4820,7 @@ else if ( currtok.Type == Tok_DeclType ) } #endif - result->Name = get_cached_string( name_stripped ); + result->Name = get_cached_string( to_strc(name_stripped) ); if ( attributes ) result->Attributes = attributes; diff --git a/project/dependencies/containers.hpp b/project/dependencies/containers.hpp index dcafca4..19c2e9b 100644 --- a/project/dependencies/containers.hpp +++ b/project/dependencies/containers.hpp @@ -152,7 +152,7 @@ bool append(Array* array, Type value) if (header->Num == header->Capacity) { - if (!grow(array, header->Capacity)) + if ( ! grow(array, header->Capacity)) return false; header = get_header(* array); } @@ -170,12 +170,12 @@ bool append(Array* array, Type* items, usize item_num) if (header->Num + item_num > header->Capacity) { - if (!grow(array, header->Capacity + item_num)) + if ( ! grow(array, header->Capacity + item_num)) return false; header = get_header(array); } - mem_copy(array.Data + header->Num, items, item_num * sizeof(Type)); + mem_copy((Type*)array + header->Num, items, item_num * sizeof(Type)); header->Num += item_num; return true; @@ -296,7 +296,7 @@ bool grow(Array* array, usize min_capacity) usize new_capacity = array_grow_formula(header->Capacity); if (new_capacity < min_capacity) - new_capacity = min_capacity; + new_capacity = min_capacity; return set_capacity(array, new_capacity); } diff --git a/project/dependencies/macros.hpp b/project/dependencies/macros.hpp index f9da4d1..ef686da 100644 --- a/project/dependencies/macros.hpp +++ b/project/dependencies/macros.hpp @@ -212,11 +212,7 @@ // This is intended to only really be used internally or with the C-library variant // C++ users can just use the for-range directly. -#if GEN_COMPILER_C -# define foreach(Type, entry_id, iterable) for ( Type entry_id = begin(iterable); entry_id != end(iterable); entry_id = next(iterable, entry_id) ) -#else -# define foreach(Type, entry_id, iterable) for ( Type entry_id : iterable ) -#endif +#define foreach(Type, entry_id, iterable) for ( Type entry_id = begin(iterable); entry_id != end(iterable); entry_id = next(iterable, entry_id) ) #if GEN_COMPILER_C # if __STDC_VERSION__ >= 202311L diff --git a/project/dependencies/strings.hpp b/project/dependencies/strings.hpp index 7a51d4b..5d74e81 100644 --- a/project/dependencies/strings.hpp +++ b/project/dependencies/strings.hpp @@ -5,6 +5,16 @@ #pragma region Strings +struct StrC; + +bool are_equal (StrC lhs, StrC rhs); +char const* back (StrC str); +bool contains (StrC str, StrC substring); +StrC duplicate (StrC str, AllocatorInfo allocator); +b32 starts_with (StrC str, StrC substring); +StrC to_str (char const* bad_string); +StrC visualize_whitespace(StrC str, AllocatorInfo allocator); + // Constant string with length. struct StrC { @@ -14,15 +24,71 @@ struct StrC #if ! GEN_COMPILER_C operator char const* () const { return Ptr; } char const& operator[]( ssize index ) const { return Ptr[index]; } -#endif + +#if GEN_SUPPORT_CPP_MEMBER_FUNCTIONS + bool is_equal (StrC rhs) { return are_equal(* this, rhs); } + char* back () { return back(* this); } + bool contains (StrC substring) { return contains(* this, substring); } + String duplicate (AllocatorInfo allocator) { return duplicate(* this, allocator); } + b32 starts_with (StrC substring) { return starts_with(* this, substring); } + String visualize_whitespace() { return visualize_whitespace(* this); } +#endif // GEN_SUPPORT_CPP_MEMBER_FUNCTIONS +#endif // GEN_COMPILERC }; #define cast_to_strc( str ) * rcast( StrC*, (str) - sizeof(ssize) ) #define txt( text ) StrC { sizeof( text ) - 1, ( text ) } +inline char const* begin(StrC str) { return str.Ptr; } +inline char const* end (StrC str) { return str.Ptr + str.Len; } +inline char const* next (StrC str, char const* iter) { return iter + 1; } + inline -StrC to_str( char const* str ) { - return { str_len( str ), str }; +bool are_equal(StrC lhs, StrC rhs) +{ + if (lhs.Len != rhs.Len) + return false; + + for (ssize idx = 0; idx < lhs.Len; ++idx) + if (lhs[idx] != rhs[idx]) + return false; + + return true; +} + +inline +char const* back(StrC str) { + return & str.Ptr[str.Len - 1]; +} + +inline +bool contains(StrC str, StrC substring) +{ + if (substring.Len > str.Len) + return false; + + ssize main_len = str.Len; + ssize sub_len = substring.Len; + for (ssize idx = 0; idx <= main_len - sub_len; ++idx) + { + if (str_compare(str.Ptr + idx, substring.Ptr, sub_len) == 0) + return true; + } + return false; +} + +inline +b32 starts_with(StrC str, StrC substring) { + if (substring.Len > str.Len) + return false; + + b32 result = str_compare(str.Ptr, substring.Ptr, substring.Len) == 0; + return result; +} + +inline +StrC to_str( char const* bad_str ) { + return { str_len( bad_str ), bad_str }; } // Dynamic String @@ -31,7 +97,12 @@ StrC to_str( char const* str ) { // I kept it for simplicty of porting but its not necessary to keep it that way. #pragma region String struct StringHeader; + +#if GEN_COMPILER_C || ! GEN_SUPPORT_CPP_MEMBER_FEATURES +typedef char* String; +#else struct String; +#endif usize string_grow_formula(usize value); @@ -65,6 +136,7 @@ b32 starts_with (String const str, StrC substring); b32 starts_with (String const str, String substring); void skip_line (String str); void strip_space (String str); +StrC to_strc (String str); void trim (String str, char const* cut_set); void trim_space (String str); String visualize_whitespace(String const str); @@ -75,15 +147,14 @@ struct StringHeader { ssize Length; }; -#if ! GEN_COMPILER_C +#if ! GEN_COMPILER_C && GEN_SUPPORT_CPP_MEMBER_FEATURES struct String { char* Data; - forceinline operator bool() { return Data != nullptr; } forceinline operator char*() { return Data; } forceinline operator char const*() const { return Data; } - forceinline operator StrC() const { return { GEN_NS length(* this), Data }; } + forceinline operator StrC() const { return { GEN_NS length(* this), Data }; } String const& operator=(String const& other) const { if (this == &other) @@ -95,13 +166,17 @@ struct String return *this; } - forceinline char& operator[](ssize index) { return Data[index]; } + forceinline char& operator[](ssize index) { return Data[index]; } forceinline char const& operator[](ssize index) const { return Data[index]; } - forceinline char* begin() const { return Data; } - forceinline char* end() const { return Data + GEN_NS length(* this); } + bool operator==(std::nullptr_t) const { return Data == nullptr; } + bool operator!=(std::nullptr_t) const { return Data != nullptr; } + friend bool operator==(std::nullptr_t, const String str) { return str.Data == nullptr; } + friend bool operator!=(std::nullptr_t, const String str) { return str.Data != nullptr; } + + forceinline char* begin() const { return Data; } + forceinline char* end() const { return Data + GEN_NS length(* this); } -#if GEN_SUPPORT_CPP_MEMBER_FEATURES #pragma region Member Mapping forceinline static String make(AllocatorInfo allocator, char const* str) { return GEN_NS string_make(allocator, str); } forceinline static String make(AllocatorInfo allocator, StrC str) { return GEN_NS string_make(allocator, str); } @@ -151,6 +226,7 @@ struct String forceinline b32 starts_with(String substring) const { return GEN_NS starts_with(* this, substring); } forceinline void skip_line() { GEN_NS skip_line(* this); } forceinline void strip_space() { GEN_NS strip_space(* this); } + forceinline StrC to_strc() { return { length(), Data}; } forceinline void trim(char const* cut_set) { GEN_NS trim(* this, cut_set); } forceinline void trim_space() { GEN_NS trim_space(* this); } forceinline String visualize_whitespace() const { return GEN_NS visualize_whitespace(* this); } @@ -168,26 +244,25 @@ struct String return GEN_NS append(this, buf, res); } #pragma endregion Member Mapping -#endif }; #endif -#if GEN_SUPPORT_CPP_REFERENCES -bool make_space_for(String& str, char const* to_append, ssize add_len); -bool append(String& str, char c); -bool append(String& str, char const* str_to_append); -bool append(String& str, char const* str_to_append, ssize length); -bool append(String& str, StrC str_to_append); -bool append(String& str, const String other); -bool append_fmt(String& str, char const* fmt, ...); -char& back(String& str); -void clear(String& str); -void free(String& str); -#endif +inline char* begin(String str) { return ((char*) str); } +inline char* end (String str) { return ((char*) str + length(str)); } +inline char* next (String str, char* iter) { return ((char*) iter + 1); } -inline char* begin(String str) { return str; } -inline char* end(String str) { return scast(char*, str) + length(str); } -inline char* next(String str) { return scast(char*, str) + 1; } +#if GEN_SUPPORT_CPP_REFERENCES +inline bool make_space_for(String& str, char const* to_append, ssize add_len); +inline bool append(String& str, char c); +inline bool append(String& str, char const* str_to_append); +inline bool append(String& str, char const* str_to_append, ssize length); +inline bool append(String& str, StrC str_to_append); +inline bool append(String& str, const String other); +inline bool append_fmt(String& str, char const* fmt, ...); +inline char& back(String& str); +inline void clear(String& str); +inline void free(String& str); +#endif inline usize string_grow_formula(usize value) { @@ -249,7 +324,7 @@ String string_join(AllocatorInfo allocator, char const** parts, ssize num_parts, inline bool append(String* str, char c) { GEN_ASSERT(str != nullptr); - return append(str, &c, 1); + return append( str, (char const*)& c, (ssize)1); } inline @@ -288,9 +363,9 @@ bool append(String* str, StrC str_to_append) { } inline -bool append(String* str, const String other) { +bool append(String* str, String const other) { GEN_ASSERT(str != nullptr); - return append(str, other, length(other)); + return append(str, (char const*)other, length(other)); } bool append_fmt(String* str, char const* fmt, ...) { @@ -303,7 +378,7 @@ bool append_fmt(String* str, char const* fmt, ...) { res = str_fmt_va(buf, count_of(buf) - 1, fmt, va) - 1; va_end(va); - return append(str, buf, res); + return append(str, (char const*)buf, res); } inline @@ -464,7 +539,7 @@ b32 starts_with(String const str, StrC substring) { if (substring.Len > length(str)) return false; - b32 result = str_compare(str.Data, substring.Ptr, substring.Len) == 0; + b32 result = str_compare(str, substring.Ptr, substring.Len) == 0; return result; } @@ -473,7 +548,7 @@ b32 starts_with(String const str, String substring) { if (length(substring) > length(str)) return false; - b32 result = str_compare(str.Data, substring.Data, length(substring) - 1) == 0; + b32 result = str_compare(str, substring, length(substring) - 1) == 0; return result; } @@ -481,18 +556,18 @@ inline void skip_line(String str) { #define current (*scanner) - char* scanner = str.Data; + char* scanner = str; while (current != '\r' && current != '\n') { ++scanner; } - s32 new_length = scanner - str.Data; + s32 new_length = scanner - str; if (current == '\r') { new_length += 1; } - mem_move(str.Data, scanner, new_length); + mem_move((char*)str, scanner, new_length); StringHeader* header = get_header(str); header->Length = new_length; @@ -518,29 +593,34 @@ void strip_space(String str) write_pos[0] = '\0'; // Null-terminate the modified string // Update the length if needed - get_header(str)->Length = write_pos - str.Data; + get_header(str)->Length = write_pos - str; +} + +inline +StrC to_strc(String str) { + return { length(str), (char const*)str }; } inline void trim(String str, char const* cut_set) { - ssize len = 0; + ssize len = 0; - char* start_pos = str.Data; - char* end_pos = str.Data + length(str) - 1; + char* start_pos = str; + char* end_pos = scast(char*, str) + length(str) - 1; - while (start_pos <= end_pos && char_first_occurence(cut_set, *start_pos)) - start_pos++; + while (start_pos <= end_pos && char_first_occurence(cut_set, *start_pos)) + start_pos++; - while (end_pos > start_pos && char_first_occurence(cut_set, *end_pos)) - end_pos--; + while (end_pos > start_pos && char_first_occurence(cut_set, *end_pos)) + end_pos--; - len = scast(ssize, (start_pos > end_pos) ? 0 : ((end_pos - start_pos) + 1)); + len = scast(ssize, (start_pos > end_pos) ? 0 : ((end_pos - start_pos) + 1)); - if (str.Data != start_pos) - mem_move(str.Data, start_pos, len); + if (str != start_pos) + mem_move(str, start_pos, len); - str.Data[len] = '\0'; + str[len] = '\0'; get_header(str)->Length = len; } @@ -556,7 +636,7 @@ String visualize_whitespace(String const str) StringHeader* header = (StringHeader*)(scast(char const*, str) - sizeof(StringHeader)); String result = string_make_reserve(header->Allocator, length(str) * 2); // Assume worst case for space requirements. - for (auto c : str) switch (c) + foreach (char*, c, str) switch ( * c ) { case ' ': append(& result, txt("·")); @@ -590,11 +670,47 @@ struct String_POD { }; static_assert( sizeof( String_POD ) == sizeof( String ), "String is not a POD" ); -// Implements basic string interning. Data structure is based off the ZPL Hashtable. -typedef HashTable StringTable; +inline +StrC duplicate(StrC str, AllocatorInfo allocator) { + String result = string_make_length(allocator, str.Ptr, str.Len); + return { get_header(result)->Length, result }; +} + +inline +StrC visualize_whitespace(StrC str, AllocatorInfo allocator) +{ + String result = string_make_reserve(allocator, str.Len * 2); // Assume worst case for space requirements. + foreach (char const*, c, str) switch ( * c ) + { + case ' ': + append(& result, txt("·")); + break; + case '\t': + append(& result, txt("→")); + break; + case '\n': + append(& result, txt("↵")); + break; + case '\r': + append(& result, txt("⏎")); + break; + case '\v': + append(& result, txt("⇕")); + break; + case '\f': + append(& result, txt("⌂")); + break; + default: + append(& result, c); + break; + } + return to_strc(result); +} // Represents strings cached with the string table. // Should never be modified, if changed string is desired, cache_string( str ) another. -typedef String const StringCached; +typedef StrC StringCached; +// Implements basic string interning. Data structure is based off the ZPL Hashtable. +typedef HashTable StringTable; #pragma endregion Strings diff --git a/project/helpers/helper.hpp b/project/helpers/helper.hpp index 9060585..e27cc14 100644 --- a/project/helpers/helper.hpp +++ b/project/helpers/helper.hpp @@ -31,11 +31,11 @@ CodeBody gen_ecode( char const* path ) append_fmt( & to_str_entries, "{ sizeof(\"%s\"), \"%s\" },\n", code, code ); } - CodeEnum enum_code = parse_enum(gen::token_fmt_impl((3 + 1) / 2, "entries", (StrC)enum_entries, "enum CodeType_Def : u32 { CT_NumTypes };")); + CodeEnum enum_code = parse_enum(gen::token_fmt_impl((3 + 1) / 2, "entries", to_strc(enum_entries), "enum CodeType_Def : u32 { CT_NumTypes };")); #pragma push_macro("local_persist") #undef local_persist - CodeFn to_str = parse_function( token_fmt( "entries", (StrC)to_str_entries, stringize( + CodeFn to_str = parse_function( token_fmt( "entries", to_strc(to_str_entries), stringize( inline StrC to_str( CodeType type ) { @@ -53,7 +53,7 @@ CodeBody gen_ecode( char const* path ) //CodeUsing code_t = def_using( name(CodeT), def_type( name(ECode::Type) ) ); CodeTypedef code_t = parse_typedef(code(typedef enum CodeType_Def CodeType; )); - return def_global_body( args( code_t, enum_code, to_str, fmt_newline ) ); + return def_global_body( args( enum_code, code_t, to_str, fmt_newline ) ); } CodeBody gen_eoperator( char const* path ) @@ -81,7 +81,7 @@ CodeBody gen_eoperator( char const* path ) append_fmt( & to_str_entries, "{ sizeof(\"%s\"), \"%s\" },\n", entry_to_str, entry_to_str); } - CodeEnum enum_code = parse_enum(token_fmt("entries", (StrC)enum_entries, stringize( + CodeEnum enum_code = parse_enum(token_fmt("entries", to_strc(enum_entries), stringize( enum Operator_Def : u32 { @@ -91,7 +91,7 @@ CodeBody gen_eoperator( char const* path ) #pragma push_macro("local_persist") #undef local_persist - CodeFn to_str = parse_function(token_fmt("entries", (StrC)to_str_entries, stringize( + CodeFn to_str = parse_function(token_fmt("entries", to_strc(to_str_entries), stringize( inline StrC to_str( Operator op ) { @@ -109,7 +109,7 @@ CodeBody gen_eoperator( char const* path ) //CodeUsing operator_t = def_using( name(OperatorT), def_type( name(EOperator::Type) ) ); CodeTypedef operator_t = parse_typedef(code( typedef enum Operator_Def Operator; )); - return def_global_body( args( operator_t, enum_code, to_str, fmt_newline ) ); + return def_global_body( args( enum_code, operator_t, to_str, fmt_newline ) ); } CodeBody gen_especifier( char const* path ) @@ -137,7 +137,7 @@ CodeBody gen_especifier( char const* path ) append_fmt( & to_str_entries, "{ sizeof(\"%s\"), \"%s\" },\n", entry_to_str, entry_to_str); } - CodeEnum enum_code = parse_enum(token_fmt("entries", (StrC)enum_entries, stringize( + CodeEnum enum_code = parse_enum(token_fmt("entries", to_strc(enum_entries), stringize( enum Specifier_Def : u32 { @@ -145,7 +145,7 @@ CodeBody gen_especifier( char const* path ) }; ))); - CodeFn is_trailing = parse_function(token_fmt("specifier", (StrC)to_str_entries, stringize( + CodeFn is_trailing = parse_function(token_fmt("specifier", to_strc(to_str_entries), stringize( inline bool is_trailing( Specifier specifier ) { @@ -163,7 +163,7 @@ CodeBody gen_especifier( char const* path ) #undef do_once_end #undef forceinline #undef neverinline - CodeFn to_str = parse_function(token_fmt("entries", (StrC)to_str_entries, stringize( + CodeFn to_str = parse_function(token_fmt("entries", to_strc(to_str_entries), stringize( inline StrC to_str( Specifier type ) { @@ -176,7 +176,7 @@ CodeBody gen_especifier( char const* path ) } ))); - CodeFn to_type = parse_function( token_fmt( "entries", (StrC)to_str_entries, stringize( + CodeFn to_type = parse_function( token_fmt( "entries", to_strc(to_str_entries), stringize( inline Specifier to_specifier( StrC str ) { @@ -214,7 +214,7 @@ CodeBody gen_especifier( char const* path ) //CodeUsing specifier_t = def_using( name(SpecifierT), def_type( name(ESpecifier::Type) ) ); CodeTypedef specifier_t = parse_typedef( code(typedef enum Specifier_Def Specifier; )); - return def_global_body( args( specifier_t, enum_code, is_trailing, to_str, to_type, fmt_newline ) ); + return def_global_body( args( enum_code, specifier_t, is_trailing, to_str, to_type, fmt_newline ) ); } CodeBody gen_etoktype( char const* etok_path, char const* attr_path ) @@ -271,11 +271,11 @@ CodeBody gen_etoktype( char const* etok_path, char const* attr_path ) #pragma push_macro("GEN_DEFINE_ATTRIBUTE_TOKENS") #undef GEN_DEFINE_ATTRIBUTE_TOKENS - CodeDefine attribute_entires_def = def_define( name(GEN_DEFINE_ATTRIBUTE_TOKENS), attribute_define_entries ); + CodeDefine attribute_entires_def = def_define( name(GEN_DEFINE_ATTRIBUTE_TOKENS), to_strc(attribute_define_entries) ); #pragma pop_macro("GEN_DEFINE_ATTRIBUTE_TOKENS") // We cannot parse this enum, it has Attribute names as enums - CodeEnum enum_code = parse_enum(token_fmt("entries", (StrC)enum_entries, "attribute_toks", (StrC)attribute_entries, stringize( + CodeEnum enum_code = parse_enum(token_fmt("entries", to_str(enum_entries), "attribute_toks", to_str(attribute_entries), stringize( enum TokType_Def : u32 { @@ -290,7 +290,7 @@ CodeBody gen_etoktype( char const* etok_path, char const* attr_path ) #undef local_persist #undef do_once_start #undef do_once_end - CodeFn to_str = parse_function(token_fmt("entries", (StrC)to_str_entries, "attribute_toks", (StrC)to_str_attributes, stringize( + CodeFn to_str = parse_function(token_fmt("entries", to_strc(to_str_entries), "attribute_toks", to_strc(to_str_attributes), stringize( inline StrC to_str( TokType type ) { @@ -304,7 +304,7 @@ CodeBody gen_etoktype( char const* etok_path, char const* attr_path ) } ))); - CodeFn to_type = parse_function( token_fmt( "entries", (StrC)to_str_entries, stringize( + CodeFn to_type = parse_function( token_fmt( "entries", to_strc(to_str_entries), stringize( inline TokType to_toktype( StrC str ) { @@ -341,8 +341,8 @@ CodeBody gen_etoktype( char const* etok_path, char const* attr_path ) return def_global_body( args( attribute_entires_def, - td_toktype, enum_code, + td_toktype, to_str, to_type ));