From b00c1ae5224fd62412ec601c57c29f1126f50e3f Mon Sep 17 00:00:00 2001 From: Ed_ Date: Thu, 27 Jul 2023 02:51:36 -0400 Subject: [PATCH] ECode, ESpecifier, and ETokType are now all generated. There is a redundant pattern for generating all three (as expected). I'll use it to define a general way of doing this sort of behavior. --- project/components/gen.ast_case_macros.cpp | 19 +- project/components/gen.etoktype.cpp | 181 +++++++++++++++++++ project/components/gen.interface.parsing.cpp | 179 ------------------ project/gen.bootstrap.cpp | 4 + project/gen.cpp | 1 + project/helpers/gen.helper.hpp | 126 ++++++++++++- scripts/.clang-format | 4 +- singleheader/gen.singleheader.cpp | 4 + 8 files changed, 334 insertions(+), 184 deletions(-) create mode 100644 project/components/gen.etoktype.cpp diff --git a/project/components/gen.ast_case_macros.cpp b/project/components/gen.ast_case_macros.cpp index 256edb7..83e18e1 100644 --- a/project/components/gen.ast_case_macros.cpp +++ b/project/components/gen.ast_case_macros.cpp @@ -58,7 +58,24 @@ case Typename: # define AST_BODY_EXPORT_UNALLOWED_TYPES AST_BODY_GLOBAL_UNALLOWED_TYPES -# define AST_BODY_NAMESPACE_UNALLOWED_TYPES AST_BODY_GLOBAL_UNALLOWED_TYPES +# define AST_BODY_NAMESPACE_UNALLOWED_TYPES \ + case Access_Public: \ + case Access_Protected: \ + case Access_Private: \ + case PlatformAttributes: \ + case Class_Body: \ + case Enum_Body: \ + case Execution: \ + case Friend: \ + case Function_Body: \ + case Namespace_Body: \ + case Operator_Member: \ + case Operator_Member_Fwd: \ + case Parameters: \ + case Specifiers: \ + case Struct_Body: \ + case Typename: + # define AST_BODY_EXTERN_LINKAGE_UNALLOWED_TYPES AST_BODY_GLOBAL_UNALLOWED_TYPES # define AST_BODY_STRUCT_UNALLOWED_TYPES AST_BODY_CLASS_UNALLOWED_TYPES diff --git a/project/components/gen.etoktype.cpp b/project/components/gen.etoktype.cpp new file mode 100644 index 0000000..f7a25eb --- /dev/null +++ b/project/components/gen.etoktype.cpp @@ -0,0 +1,181 @@ +namespace Parser +{ +/* + This is a simple lexer that focuses on tokenizing only tokens relevant to the library. + It will not be capable of lexing C++ code with unsupported features. + + For the sake of scanning files, it can scan preprocessor directives + + Attributes_Start is only used to indicate the start of the user_defined attribute list. +*/ + +# define Define_TokType \ + Entry( Access_Private, "private" ) \ + Entry( Access_Protected, "protected" ) \ + Entry( Access_Public, "public" ) \ + Entry( Access_MemberSymbol, "." ) \ + Entry( Access_StaticSymbol, "::") \ + Entry( Ampersand, "&" ) \ + Entry( Ampersand_DBL, "&&" ) \ + Entry( Assign_Classifer, ":" ) \ + Entry( Attribute_Open, "[[" ) \ + Entry( Attribute_Close, "]]" ) \ + Entry( BraceCurly_Open, "{" ) \ + Entry( BraceCurly_Close, "}" ) \ + Entry( BraceSquare_Open, "[" ) \ + Entry( BraceSquare_Close, "]" ) \ + Entry( Capture_Start, "(" ) \ + Entry( Capture_End, ")" ) \ + Entry( Comment, "__comment__" ) \ + Entry( Char, "__char__" ) \ + Entry( Comma, "," ) \ + Entry( Decl_Class, "class" ) \ + Entry( Decl_GNU_Attribute, "__attribute__" ) \ + Entry( Decl_MSVC_Attribute, "__declspec" ) \ + Entry( Decl_Enum, "enum" ) \ + Entry( Decl_Extern_Linkage, "extern" ) \ + Entry( Decl_Friend, "friend" ) \ + Entry( Decl_Module, "module" ) \ + Entry( Decl_Namespace, "namespace" ) \ + Entry( Decl_Operator, "operator" ) \ + Entry( Decl_Struct, "struct" ) \ + Entry( Decl_Template, "template" ) \ + Entry( Decl_Typedef, "typedef" ) \ + Entry( Decl_Using, "using" ) \ + Entry( Decl_Union, "union" ) \ + Entry( Identifier, "__identifier__" ) \ + Entry( Module_Import, "import" ) \ + Entry( Module_Export, "export" ) \ + Entry( Number, "number" ) \ + Entry( Operator, "operator" ) \ + Entry( Preprocessor_Directive, "#") \ + Entry( Preprocessor_Include, "include" ) \ + Entry( Spec_Alignas, "alignas" ) \ + Entry( Spec_Const, "const" ) \ + Entry( Spec_Consteval, "consteval" ) \ + Entry( Spec_Constexpr, "constexpr" ) \ + Entry( Spec_Constinit, "constinit" ) \ + Entry( Spec_Explicit, "explicit" ) \ + Entry( Spec_Extern, "extern" ) \ + Entry( Spec_Final, "final" ) \ + Entry( Spec_Global, "global" ) \ + Entry( Spec_Inline, "inline" ) \ + Entry( Spec_Internal_Linkage, "internal" ) \ + Entry( Spec_LocalPersist, "local_persist" ) \ + Entry( Spec_Mutable, "mutable" ) \ + Entry( Spec_Override, "override" ) \ + Entry( Spec_Static, "static" ) \ + Entry( Spec_ThreadLocal, "thread_local" ) \ + Entry( Spec_Volatile, "volatile") \ + Entry( Star, "*" ) \ + Entry( Statement_End, ";" ) \ + Entry( String, "__string__" ) \ + Entry( Type_Unsigned, "unsigned" ) \ + Entry( Type_Signed, "signed" ) \ + Entry( Type_Short, "short" ) \ + Entry( Type_Long, "long" ) \ + Entry( Type_char, "char" ) \ + Entry( Type_int, "int" ) \ + Entry( Type_double, "double" ) \ + Entry( Varadic_Argument, "..." ) \ + Entry( Attributes_Start, "__attrib_start__" ) + + enum class TokType : u32 + { + # define Entry( Name_, Str_ ) Name_, + Define_TokType + GEN_Define_Attribute_Tokens + # undef Entry + Num, + Invalid + }; + + struct Token + { + char const* Text; + sptr Length; + TokType Type; + bool IsAssign; + + operator bool() + { + return Text && Length && Type != TokType::Invalid; + } + + operator StrC() + { + return { Length, Text }; + } + }; + + internal inline + TokType get_tok_type( char const* word, s32 length ) + { + local_persist + StrC lookup[(u32)TokType::Num] = + { + # define Entry( Name_, Str_ ) { sizeof(Str_), Str_ }, + Define_TokType + GEN_Define_Attribute_Tokens + # undef Entry + }; + + for ( u32 index = 0; index < (u32)TokType::Num; index++ ) + { + s32 lookup_len = lookup[index].Len - 1; + char const* lookup_str = lookup[index].Ptr; + + if ( lookup_len != length ) + continue; + + if ( str_compare( word, lookup_str, lookup_len ) == 0 ) + return scast(TokType, index); + } + + return TokType::Invalid; + } + + internal inline + char const* str_tok_type( TokType type ) + { + local_persist + char const* lookup[(u32)TokType::Num] = + { + # define Entry( Name_, Str_ ) Str_, + Define_TokType + GEN_Define_Attribute_Tokens + # undef Entry + }; + + return lookup[(u32)type]; + } + +# undef Define_TokType + + internal inline + bool tok_is_specifier( Token const& tok ) + { + return (tok.Type <= TokType::Star && tok.Type >= TokType::Spec_Alignas) + || tok.Type == TokType::Ampersand + || tok.Type == TokType::Ampersand_DBL + ; + } + + internal inline + bool tok_is_access_specifier( Token const& tok ) + { + return tok.Type >= TokType::Access_Private && tok.Type <= TokType::Access_Public; + } + + internal inline + AccessSpec tok_to_access_specifier( Token const& tok ) + { + return scast(AccessSpec, tok.Type); + } + + internal inline + bool tok_is_attribute( Token const& tok ) + { + return tok.Type > TokType::Attributes_Start; + } +} // Parser \ No newline at end of file diff --git a/project/components/gen.interface.parsing.cpp b/project/components/gen.interface.parsing.cpp index b3b518e..52f8ea8 100644 --- a/project/components/gen.interface.parsing.cpp +++ b/project/components/gen.interface.parsing.cpp @@ -4,185 +4,6 @@ These constructors are the most implementation intensive other than the editor o namespace Parser { -/* - This is a simple lexer that focuses on tokenizing only tokens relevant to the library. - It will not be capable of lexing C++ code with unsupported features. - - For the sake of scanning files, it can scan preprocessor directives - - Attributes_Start is only used to indicate the start of the user_defined attribute list. -*/ - -# define Define_TokType \ - Entry( Access_Private, "private" ) \ - Entry( Access_Protected, "protected" ) \ - Entry( Access_Public, "public" ) \ - Entry( Access_MemberSymbol, "." ) \ - Entry( Access_StaticSymbol, "::") \ - Entry( Ampersand, "&" ) \ - Entry( Ampersand_DBL, "&&" ) \ - Entry( Assign_Classifer, ":" ) \ - Entry( Attribute_Open, "[[" ) \ - Entry( Attribute_Close, "]]" ) \ - Entry( BraceCurly_Open, "{" ) \ - Entry( BraceCurly_Close, "}" ) \ - Entry( BraceSquare_Open, "[" ) \ - Entry( BraceSquare_Close, "]" ) \ - Entry( Capture_Start, "(" ) \ - Entry( Capture_End, ")" ) \ - Entry( Comment, "__comment__" ) \ - Entry( Char, "__char__" ) \ - Entry( Comma, "," ) \ - Entry( Decl_Class, "class" ) \ - Entry( Decl_GNU_Attribute, "__attribute__" ) \ - Entry( Decl_MSVC_Attribute, "__declspec" ) \ - Entry( Decl_Enum, "enum" ) \ - Entry( Decl_Extern_Linkage, "extern" ) \ - Entry( Decl_Friend, "friend" ) \ - Entry( Decl_Module, "module" ) \ - Entry( Decl_Namespace, "namespace" ) \ - Entry( Decl_Operator, "operator" ) \ - Entry( Decl_Struct, "struct" ) \ - Entry( Decl_Template, "template" ) \ - Entry( Decl_Typedef, "typedef" ) \ - Entry( Decl_Using, "using" ) \ - Entry( Decl_Union, "union" ) \ - Entry( Identifier, "__identifier__" ) \ - Entry( Module_Import, "import" ) \ - Entry( Module_Export, "export" ) \ - Entry( Number, "number" ) \ - Entry( Operator, "operator" ) \ - Entry( Preprocessor_Directive, "#") \ - Entry( Preprocessor_Include, "include" ) \ - Entry( Spec_Alignas, "alignas" ) \ - Entry( Spec_Const, "const" ) \ - Entry( Spec_Consteval, "consteval" ) \ - Entry( Spec_Constexpr, "constexpr" ) \ - Entry( Spec_Constinit, "constinit" ) \ - Entry( Spec_Explicit, "explicit" ) \ - Entry( Spec_Extern, "extern" ) \ - Entry( Spec_Final, "final" ) \ - Entry( Spec_Global, "global" ) \ - Entry( Spec_Inline, "inline" ) \ - Entry( Spec_Internal_Linkage, "internal" ) \ - Entry( Spec_LocalPersist, "local_persist" ) \ - Entry( Spec_Mutable, "mutable" ) \ - Entry( Spec_Override, "override" ) \ - Entry( Spec_Static, "static" ) \ - Entry( Spec_ThreadLocal, "thread_local" ) \ - Entry( Spec_Volatile, "volatile") \ - Entry( Star, "*" ) \ - Entry( Statement_End, ";" ) \ - Entry( String, "__string__" ) \ - Entry( Type_Unsigned, "unsigned" ) \ - Entry( Type_Signed, "signed" ) \ - Entry( Type_Short, "short" ) \ - Entry( Type_Long, "long" ) \ - Entry( Type_char, "char" ) \ - Entry( Type_int, "int" ) \ - Entry( Type_double, "double" ) \ - Entry( Varadic_Argument, "..." ) \ - Entry( Attributes_Start, "__attrib_start__" ) - - enum class TokType : u32 - { - # define Entry( Name_, Str_ ) Name_, - Define_TokType - GEN_Define_Attribute_Tokens - # undef Entry - Num, - Invalid - }; - - struct Token - { - char const* Text; - sptr Length; - TokType Type; - bool IsAssign; - - operator bool() - { - return Text && Length && Type != TokType::Invalid; - } - - operator StrC() - { - return { Length, Text }; - } - }; - - internal inline - TokType get_tok_type( char const* word, s32 length ) - { - local_persist - StrC lookup[(u32)TokType::Num] = - { - # define Entry( Name_, Str_ ) { sizeof(Str_), Str_ }, - Define_TokType - GEN_Define_Attribute_Tokens - # undef Entry - }; - - for ( u32 index = 0; index < (u32)TokType::Num; index++ ) - { - s32 lookup_len = lookup[index].Len - 1; - char const* lookup_str = lookup[index].Ptr; - - if ( lookup_len != length ) - continue; - - if ( str_compare( word, lookup_str, lookup_len ) == 0 ) - return scast(TokType, index); - } - - return TokType::Invalid; - } - - internal inline - char const* str_tok_type( TokType type ) - { - local_persist - char const* lookup[(u32)TokType::Num] = - { - # define Entry( Name_, Str_ ) Str_, - Define_TokType - GEN_Define_Attribute_Tokens - # undef Entry - }; - - return lookup[(u32)type]; - } - -# undef Define_TokType - - internal inline - bool tok_is_specifier( Token const& tok ) - { - return (tok.Type <= TokType::Star && tok.Type >= TokType::Spec_Alignas) - || tok.Type == TokType::Ampersand - || tok.Type == TokType::Ampersand_DBL - ; - } - - internal inline - bool tok_is_access_specifier( Token const& tok ) - { - return tok.Type >= TokType::Access_Private && tok.Type <= TokType::Access_Public; - } - - internal inline - AccessSpec tok_to_access_specifier( Token const& tok ) - { - return scast(AccessSpec, tok.Type); - } - - internal inline - bool tok_is_attribute( Token const& tok ) - { - return tok.Type > TokType::Attributes_Start; - } - struct TokArray { Array Arr; diff --git a/project/gen.bootstrap.cpp b/project/gen.bootstrap.cpp index 4f0131d..cbfbfad 100644 --- a/project/gen.bootstrap.cpp +++ b/project/gen.bootstrap.cpp @@ -167,6 +167,9 @@ int gen_main() Code parsing = scan_file( "components/gen.interface.parsing.cpp" ); Code untyped = scan_file( "components/gen.untyped.cpp" ); + CodeBody etoktype = gen_etoktype( "./components/ETokType.csv" ); + CodeNamespace parser_nspace = def_namespace( name(Parser), def_namespace_body( args(etoktype)) ); + Code builder = scan_file( "filesystem/gen.builder.cpp" ); Builder @@ -182,6 +185,7 @@ int gen_main() impl.print( ast ); impl.print( interface ); impl.print( upfront ); + impl.print( parser_nspace ); impl.print( parsing ); impl.print( untyped ); diff --git a/project/gen.cpp b/project/gen.cpp index 75f9608..a90f524 100644 --- a/project/gen.cpp +++ b/project/gen.cpp @@ -23,6 +23,7 @@ GEN_NS_BEGIN #include "components/gen.interface.cpp" #include "components/gen.interface.upfront.cpp" +#include "components/gen.etoktype.cpp" #include "components/gen.interface.parsing.cpp" #include "components/gen.untyped.cpp" diff --git a/project/helpers/gen.helper.hpp b/project/helpers/gen.helper.hpp index 6634939..ff2d3dd 100644 --- a/project/helpers/gen.helper.hpp +++ b/project/helpers/gen.helper.hpp @@ -209,7 +209,129 @@ CodeBody gen_especifier( char const* path ) return def_global_body( args( nspace, specifier_t ) ); } -CodeBody gen_etoktype() +CodeBody gen_etoktype( char const* path ) { - return CodeInvalid; + char scratch_mem[kilobytes(4)]; + Arena scratch = Arena::init_from_memory( scratch_mem, sizeof(scratch_mem) ); + + file_read_contents( scratch, zero_terminate, path ); + + CSV_Object csv_nodes; + csv_parse( &csv_nodes, scratch_mem, GlobalAllocator, false ); + + Array enum_strs = csv_nodes.nodes[0].nodes; + Array str_strs = csv_nodes.nodes[1].nodes; + + String enum_entries = String::make_reserve( GlobalAllocator, kilobytes(1) ); + String to_str_entries = String::make_reserve( GlobalAllocator, kilobytes(1) ); + + for (uw idx = 0; idx < enum_strs.num(); idx++) + { + char const* enum_str = enum_strs[idx].string; + char const* entry_to_str = str_strs [idx].string; + + enum_entries.append_fmt( "%s,\n", enum_str ); + to_str_entries.append_fmt( "{ sizeof(\"%s\"), \"%s\" },\n", entry_to_str, entry_to_str); + } + + CodeEnum enum_code = parse_enum(token_fmt("entries", (StrC)enum_entries, stringize( + enum Type : u32 + { + + NumTokenTypes + }; + ))); + +#pragma push_macro( "local_persist" ) +#pragma push_macro( "do_once_start" ) +#pragma push_macro( "do_once_end" ) +#undef local_persist +#undef do_once_start +#undef do_once_end + + CodeFn to_str = parse_function(token_fmt("entries", (StrC)to_str_entries, stringize( + StrC to_str( Type type ) + { + local_persist + StrC lookup[] { + + NumTokens + }; + + return lookup[ type ]; + } + ))); + + CodeFn to_type = parse_function( token_fmt( "entries", (StrC)to_str_entries, stringize( + Type to_type( StrC str ) + { + local_persist + u32 keymap[ NumTokens ]; + do_once_start + for ( u32 index = 0; index < NumTokens; index++ ) + { + StrC enum_str = to_str( (Type)index ); + + // We subtract 1 to remove the null terminator + // This is because the tokens lexed are not null terminated. + keymap[index] = crc32( enum_str.Ptr, enum_str.Len - 1); + } + do_once_end + + u32 hash = crc32( str.Ptr, str.Len ); + + for ( u32 index = 0; index < NumTokens; index++ ) + { + if ( keymap[index] == hash ) + return (Type)index; + } + + return Invalid; + } + ))); + +#pragma pop_macro( "local_persist" ) +#pragma pop_macro( "do_once_start" ) +#pragma pop_macro( "do_once_end" ) + + CodeFn is_specifier = parse_function( token_fmt( "entries", (StrC)to_str_entries, stringize( + bool tok_is_specifier( Type type ) + { + return (tok.Type <= TokType::Star && tok.Type >= TokType::Spec_Alignas) + || tok.Type == TokType::Ampersand + || tok.Type == TokType::Ampersand_DBL + ; + } + ))); + + CodeFn is_access_specifier = parse_function( token_fmt( "entries", (StrC)to_str_entries, stringize( + bool tok_is_access_specifier( Type type ) + { + return tok.Type >= TokType::Access_Private && tok.Type <= TokType::Access_Public; + } + ))); + +#pragma push_macro( "internal" ) +#pragma push_macro( "scast" ) + CodeFn to_access_specifier = parse_function( token_fmt( "entries", (StrC)to_str_entries, stringize( + internal inline + AccessSpec tok_to_access_specifier( Type type ) + { + return scast(AccessSpec, tok.Type); + } + ))); +#pragma pop_macro( "internal" ) +#pragma pop_macro( "scast" ) + + CodeFn is_attribute = parse_function( token_fmt( "entries", (StrC)to_str_entries, stringize( + bool tok_is_attribute( const Token type ) + { + return tok.Type >= TokType::Attr_Alignas && tok.Type <= TokType::Attr_Visibility; + } + ))); + + CodeNamespace nspace = def_namespace( name(ETokType), def_namespace_body( args( enum_code, to_str, to_type, is_specifier, is_access_specifier, to_access_specifier, is_attribute ) ) ); + CodeUsing td_toktype = def_using( name(TokTypeT), def_type( name(ETokType::Type) ) ); + + return def_global_body( args( nspace, td_toktype ) ); } diff --git a/scripts/.clang-format b/scripts/.clang-format index 923a600..b092188 100644 --- a/scripts/.clang-format +++ b/scripts/.clang-format @@ -15,7 +15,7 @@ AlignConsecutiveDeclarations: AcrossComments AlignConsecutiveMacros: Enabled: true AcrossEmptyLines: true - AcrossComments: false + AcrossComments: true AlignEscapedNewlines: Left AlignOperands: DontAlign @@ -80,7 +80,7 @@ CompactNamespaces: true ConstructorInitializerAllOnOneLineOrOnePerLine: true ConstructorInitializerIndentWidth : 4 -ContinuationIndentWidth: 4 +ContinuationIndentWidth: 0 Cpp11BracedListStyle: false diff --git a/singleheader/gen.singleheader.cpp b/singleheader/gen.singleheader.cpp index 85085bb..398c747 100644 --- a/singleheader/gen.singleheader.cpp +++ b/singleheader/gen.singleheader.cpp @@ -184,6 +184,9 @@ int gen_main() Code parsing = scan_file( project_dir "components/gen.interface.parsing.cpp" ); Code untyped = scan_file( project_dir "components/gen.untyped.cpp" ); + CodeBody etoktype = gen_etoktype( project_dir "components/ETokType.csv" ); + CodeNamespace parser_nspace = def_namespace( name(Parser), def_namespace_body( args(etoktype)) ); + Code builder = scan_file( project_dir "filesystem/gen.builder.cpp" ); header.print_fmt( "GEN_NS_BEGIN\n\n"); @@ -192,6 +195,7 @@ int gen_main() header.print( ast ); header.print( interface ); header.print( upfront ); + header.print( parser_nspace ); header.print( parsing ); header.print( untyped ); header.print( builder );