From bc45aa80b69202c7ed19cb77f0dca07d03ff0867 Mon Sep 17 00:00:00 2001 From: Ed_ Date: Sun, 5 May 2024 00:09:38 -0400 Subject: [PATCH] gen_src supports swapping AST_KINDS macros in parser.hpp - Minor fixes to gencpp - Adjust .clang-format to be similar to the codebase's conventions --- codegen/ast_kinds.hpp | 656 ++++++++++++++++++++++++++++++++++++++++ codegen/gen_src.cpp | 234 +++++++++++++- codegen/gencpp/gen.cpp | 3 +- codegen/token_kinds.hpp | 0 codegen/type_kinds.hpp | 0 scripts/.clang-format | 8 +- 6 files changed, 894 insertions(+), 7 deletions(-) create mode 100644 codegen/ast_kinds.hpp create mode 100644 codegen/token_kinds.hpp create mode 100644 codegen/type_kinds.hpp diff --git a/codegen/ast_kinds.hpp b/codegen/ast_kinds.hpp new file mode 100644 index 000000000..70c234acf --- /dev/null +++ b/codegen/ast_kinds.hpp @@ -0,0 +1,656 @@ +/* +Ast_KINDS swap content +These are not to be used directly. They are parsed by gen_src.cpp + +Everything within the Ast_Kinds region has a specific format pair: + 1. char const* strings + 2. struct or typedef definition. + +The string descriptions are put into the Ast_strings lookup table. +The actual struct definitions used will have the Ast prefix-namespace appended to them. +*/ + +char const* desc_Ident = "identifier"; +struct Ident { + Token token; + Entity *entity; +}; + +char const* desc_Implicit = "implicit"; +typedef Token Implicit; + +char const* desc_Uninit = "uninitialized value"; +typedef Token Uninit; + +char const* desc_BasicLit = "basic literal"; +struct BasicLit { + Token token; +}; + +char const* desc_BasicDirective = "basic directive"; +struct BasicDirective { + Token token; + Token name; +}; + +char const* desc_Ellipsis = "ellipsis"; +struct Ellipsis { + Token token; + Ast *expr; +}; + +char const* desc_ProcGroup = "procedure group"; +struct ProcGroup { + Token token; + Token open; + Token close; + Slice args; +}; + +char const* desc_ProcLit = "procedure literal"; +struct ProcLit { + Ast *type; + Ast *body; + u64 tags; + ProcInlining inlining; + Token where_token; + Slice where_clauses; + DeclInfo *decl; +}; + +char const* desc_CompountLit = "compound literal"; +struct CompoundLit { + Ast *type; + Slice elems; + Token open, close; + i64 max_count; + Ast *tag; +}; + +#pragma region Expressions +char const* desc_ExprBegin = ""; +typedef bool _ExprBegin; + +char const* desc_BadExpr = "bad expression"; +struct BadExpr { + Token begin, end; +}; + +char const* desc_TagExpr = "tag expression"; +struct TagExpr { + Token token, name; + Ast *expr; +}; + +char const* desc_UnaryExpr = "unary expression"; +struct UnaryExpr { + Token op; + Ast *expr; +}; + +char const* desc_BinaryExpr = "binary expression"; +struct BinaryExpr { + Token op; + Ast *left, *right; +}; + +char const* desc_ParenExpr = "parentheses expression"; +struct ParenExpr { + Ast *expr; + Token open, close; +}; + +char const* desc_SelectorExpr = "selector expression"; +struct SelectorExpr { + Token token; + Ast *expr, *selector; + u8 swizzle_count; /*maximum of 4 components, if set, count >= 2*/ + u8 swizzle_indices; /*2 bits per component*/ + bool is_bit_field; +}; + +char const* desc_ImplicitSelectorExpr = "implicit selector expression"; +struct ImplicitSelectorExpr { + Token token; + Ast *selector; +}; + +char const* desc_SelectorCallExpr = "selector call expression"; +struct SelectorCallExpr { + Token token; + Ast *expr, *call; + bool modified_call; +}; + +char const* desc_IndentExpr = "index expression"; +struct IndexExpr { + Ast *expr, *index; + Token open, close; +}; + +char const* desc_DerefExpr = "dereference expression"; +struct DerefExpr { + Ast *expr; + Token op; +}; + +char const* desc_SliceExpr = "slice expression"; +struct SliceExpr { + Ast *expr; + Token open, close; + Token interval; + Ast *low, *high; +}; + +char const* desc_CallExpr = "call expression"; +struct CallExpr { + Ast *proc; + Slice args; + Token open; + Token close; + Token ellipsis; + ProcInlining inlining; + bool optional_ok_one; + bool was_selector; + AstSplitArgs *split_args; +}; + +char const* desc_FieldValue = "field value"; +struct FieldValue { + Token eq; + Ast *field, *value; +}; + +char const* desc_EnumFieldValue = "enum field value"; +struct EnumFieldValue { + Ast *name; + Ast *value; + CommentGroup *docs; + CommentGroup *comment; +}; + +char const* desc_TernaryIfExpr = "ternary if expression"; +struct TernaryIfExpr { + Ast *x, *cond, *y; +}; + +char const* desc_TernaryWhenExpr = "ternary when expression"; +struct TernaryWhenExpr { + Ast *x, *cond, *y; +}; + +char const* desc_OrElseExpr = "or_else expression"; +struct OrElseExpr { + Ast *x; + Token token; + Ast *y; +}; + +char const* desc_OrReturnExpr = "or_return expression"; +struct OrReturnExpr { + Ast *expr; + Token token; +}; + +char const* desc_OrBranchExpr = "or branch expression"; +struct OrBranchExpr { + Ast *expr; + Token token; + Ast *label; +}; + +char const* desc_TypeAssertion = "type assertion"; +struct TypeAssertion { + Ast *expr; + Token dot; + Ast *type; + Type *type_hint; + bool ignores[2]; +}; + +char const* desc_TypeCAst = "type cAst"; +struct TypeCast { + Token token; + Ast *type, *expr; +}; + +char const* desc_AutoCAst = "auto_cAst"; +struct AutoCast { + Token token; + Ast *expr; +}; + +char const* desc_InlineAsmExpr = "inline asm expression"; +struct InlineAsmExpr { + Token token; + Token open, close; + Slice param_types; + Ast *return_type; + Ast *asm_string; + Ast *constraints_string; + bool has_side_effects; + bool is_align_stack; + InlineAsmDialectKind dialect; +}; + +char const* desc_MatrixIndexExpr = "matrix index expression"; +struct MatrixIndexExpr { + Ast *expr, *row_index, *column_index; + Token open, close; +}; + +char const* desc__ExprEnd = ""; +typedef bool _ExprEnd; +#pragma endregion Expressions + +#pragma region Statements +char const* desc__StmtBegin = ""; +typedef bool _StmtBegin; + +char const* desc_BadStmt = "bad statement"; +struct BadStmt { + Token begin, end; +}; + +char const* desc_EmptyStmt = "empty statement"; +struct EmptyStmt { + Token token; +}; + +char const* desc_ExprStmt = "expression statement"; +struct ExprStmt { + Ast* expr; +}; + +char const* desc_AssignStmt = "assign statement"; +struct AssignStmt { + Token op; + Slice lhs, rhs; +}; + +#pragma region Complex Statements +char const* desc__ComplexStmtBegin = ""; +typedef bool _ComplexStmtBegin; + +char const* desc_BlockStmt = "block statement"; +struct BlockStmt { + Scope *scope; + Slice stmts; + Ast *label; + Token open, close; +}; + +char const* desc_IfStmt = "if statement"; +struct IfStmt { + Scope *scope; + Token token; + Ast *label; + Ast *init; + Ast *cond; + Ast *body; + Ast *else_stmt; +}; + +char const* desc_WhenStmt = "when statement"; +struct WhenStmt { + Token token; + Ast *cond; + Ast *body; + Ast *else_stmt; + bool is_cond_determined; + bool determined_cond; +}; + +char const* desc_ReturnStmt = "return statement"; +struct ReturnStmt { + Token token; + Slice results; +}; + +char const* desc_ForStmt = "for statement"; +struct ForStmt { + Scope *scope; + Token token; + Ast *label; + Ast *init; + Ast *cond; + Ast *post; + Ast *body; +}; + +char const* desc_RangeStmt = "range statement"; +struct RangeStmt { + Scope *scope; + Token token; + Ast *label; + Slice vals; + Token in_token; + Ast *expr; + Ast *body; + bool reverse; +}; + +char const* desc_UnrollRangeStmt = "unroll range statement"; +struct UnrollRangeStmt { + Scope *scope; + Token unroll_token; + Token for_token; + Ast *val0; + Ast *val1; + Token in_token; + Ast *expr; + Ast *body; +}; + +char const* desc_CaseClause = "case clause"; +struct CaseClause { + Scope *scope; + Token token; + Slice list; + Slice stmts; + Entity *implicit_entity; +}; + +char const* desc_SwitchStmt = "switch statement"; +struct SwitchStmt { + Scope *scope; + Token token; + Ast *label; + Ast *init; + Ast *tag; + Ast *body; + bool partial; +}; + +char const* desc_TypeSwitchStmt = "type switch statement"; +struct TypeSwitchStmt { + Scope *scope; + Token token; + Ast *label; + Ast *tag; + Ast *body; + bool partial; +}; + +char const* desc_DeferStmt = "defer statement"; +struct DeferStmt { + Token token; + Ast* stmt; +}; + +char const* desc_BranchStmt = "branch statement"; +struct BranchStmt { + Token token; + Ast* label; +}; + +char const* desc_UsingStmt = "using statement"; +struct UsingStmt { + Token token; + Slice list; +}; + +char const* desc__ComplexStmtEnd = ""; +typedef bool _ComplexStmtEnd; +#pragma endregion Complex Statements + +char const* desc__StmtEnd = ""; +typedef bool _StmtEnd; +#pragma endregion Statements + +#pragma region Declarations +char const* desc__DeclBegin = ""; +typedef bool _DeclBegin; + +char const* desc_BadDecl = "bad declaration"; +struct BadDecl { + Token begin, end; +}; + +char const* desc_ForeignBlockDecl = "foreign block declaration"; +struct ForeignBlockDecl { + Token token; + Ast *foreign_library; + Ast *body; + Array attributes; + CommentGroup *docs; +}; + +char const* desc_Label = "label"; +struct Label { + Token token; + Ast *name; +}; + +char const* desc_ValueDecl = "value declaration"; +struct ValueDecl { + Slice names; + Ast *type; + Slice values; + Array attributes; + CommentGroup *docs; + CommentGroup *comment; + bool is_using; + bool is_mutable; +}; + +char const* desc_PackageDecl = "package declaration"; +struct PackageDecl { + Token token; + Token name; + CommentGroup *docs; + CommentGroup *comment; +}; + +char const* desc_ImportDecl = "import declaration"; +struct ImportDecl { + AstPackage *package; + Token token; + Token relpath; + String fullpath; + Token import_name; + Array attributes; + CommentGroup *docs; + CommentGroup *comment; +}; + +char const* desc_ForeignImportDecl = "foreign import declaration"; +struct ForeignImportDecl { + Token token; + Slice filepaths; + Token library_name; + String collection_name; + Slice fullpaths; + Array attributes; + CommentGroup *docs; + CommentGroup *comment; +}; + +char const* desc__DeclEnd = ""; +typedef bool _DeclEnd; +#pragma endregion Declarations + +char const* desc_Attribute = "attribute"; +struct Attribute { + Token token; + Slice elems; + Token open, close; +}; + +char const* desc_Field = "field"; +struct Field { + Slice names; + Ast *type; + Ast *default_value; + Token tag; + u32 flags; + CommentGroup *docs; + CommentGroup *comment; +}; + +char const* desc_BitFieldField = "bit field field"; +struct BitFieldField { + Ast *name; + Ast *type; + Ast *bit_size; + Token tag; + CommentGroup *docs; + CommentGroup *comment; +}; + +char const* desc_FieldList = "field list"; +struct FieldList { + Token token; + Slice list; +}; + +#pragma region Types +char const* desc__TypeBegin = ""; +typedef bool _TypeBegin; + +char const* desc_TypeidType = "typeid"; +struct TypeidType +{ + Token token; + Ast *specialization; +}; + +char const* desc_HelperType = "helper type"; +struct HelperType { + Token token; + Ast *type; +}; + +char const* desc_DistinctType = "distinct type"; +struct DistinctType { + Token token; + Ast *type; +}; + +char const* desc_PolyType = "polymorphic type"; +struct PolyType { + Token token; + Ast *type; + Ast *specialization; +}; + +char const* desc_ProcType = "procedure type"; +struct ProcType { + Scope *scope; + Token token; + Ast *params; + Ast *results; + u64 tags; + ProcCallingConvention calling_convention; + bool generic; + bool diverging; +}; + +char const* desc_PointerType = "pointer type"; +struct PointerType { + Token token; + Ast *type; + Ast *tag; +}; + +char const* desc_RelativeType = "relative type"; +struct RelativeType { + Ast *tag; + Ast *type; +}; + +char const* desc_MultiPointerType = "multi pointer type"; +struct MultiPointerType { + Token token; + Ast *type; +}; + +char const* desc_ArrayType = "array type"; +struct ArrayType { + Token token; + Ast *count; + Ast *elem; + Ast *tag; +}; + +char const* desc_DynamicType = "dynamic array type"; +struct DynamicArrayType { + Token token; + Ast *elem; + Ast *tag; +}; + +char const* desc_StructType = "struct type"; +struct StructType { + Scope *scope; + Token token; + Slice fields; + isize field_count; + Ast *polymorphic_params; + Ast *align; + Ast *field_align; + Token where_token; + Slice where_clauses; + bool is_packed; + bool is_raw_union; + bool is_no_copy; +}; + +char const* desc_UnionType = "union type"; +struct UnionType { + Scope *scope; + Token token; + Slice variants; + Ast *polymorphic_params; + Ast *align; + UnionTypeKind kind; + Token where_token; + Slice where_clauses; +}; + +char const* desc_EnumType = "enum type"; +struct EnumType { + Scope *scope; + Token token; + Ast *base_type; + Slice fields; /* FieldValue */ + bool is_using; +}; + +char const* dec_BitSetType = "bitset type"; +struct BitSetType { + Token token; + Ast *elem; + Ast *underlying; +}; + +char const* desc_BitfieldType = "bitfield type"; +struct BitFieldType { + Scope *scope; + Token token; + Ast *backing_type; + Token open; + Slice fields; /* BitFieldField */ + Token close; +}; + +char const* desc_MapType = "map type"; +struct MapType { + Token token; + Ast *count; + Ast *key; + Ast *value; +}; + +char const* desc_MatrixType = "matrix type"; +struct MatrixType { + Token token; + Ast *row_count; + Ast *column_count; + Ast *elem; + bool is_row_major; +}; + +char const* desc__TypeEnd = ""; +typedef bool _TypeEnd; +#pragma endregion Types diff --git a/codegen/gen_src.cpp b/codegen/gen_src.cpp index 290261d39..e2095c688 100644 --- a/codegen/gen_src.cpp +++ b/codegen/gen_src.cpp @@ -10,8 +10,12 @@ using namespace gen; #endif #pragma region Directories + // Program assumes its working directory is the src directory -#define path_src "" +#define path_root "" +#define path_codegen path_root "codegen/" +#define path_src path_root "src/" + #pragma endregion Directories inline @@ -43,12 +47,240 @@ void format_file( char const* path ) #undef cf_verbse } +struct Odin_AstKind { + StringCached desc; + Code def; +}; + +Array get_odin_ast_kinds() +{ + local_persist Array kinds = Array::init_reserve(GlobalAllocator, kilobytes(64)); + { + local_persist s32 done_once = 0; + if (done_once) + return kinds; + ++ done_once; + } + + CodeType t_char_const_ptr = parse_type(code(char const*)); + + CodeBody ast_kinds_header = parse_file( path_codegen "ast_kinds.hpp" ); + for ( Code code = ast_kinds_header.begin(); code != ast_kinds_header.end(); ++ code ) + { + switch (code->Type) + { + using namespace ECode; + case Comment: + case Preprocess_Pragma: + // Ignore + continue; + + case Variable: + { + Odin_AstKind entry { {nullptr}, {} }; + + CodeVar var = code.cast(); + if ( ! var->ValueType.is_equal( t_char_const_ptr ) ) + { + __debugbreak(); + log_failure("Expected all globally defined variables to be char cons* type"); + return kinds; + } + if ( ! var->Value || ! var->Value->Content ) + { + __debugbreak(); + log_failure("Expected all globally defined variable to have a string assigned to it"); + return kinds; + } + + // Grab the description + entry.desc = var->Value->Content; + ++ code; + + // Grab the definition + if ( code->Type != Struct && code->Type != Typedef ) + { + __debugbreak(); + log_failure("Expected a struct or typedef for the entry definition"); + return kinds; + } + + entry.def = code; + kinds.append(entry); + } + continue; + + case Struct: + __debugbreak(); + log_failure("Expected a description definition as char const* first"); + return kinds; + break; + } + } + return kinds; +} + int gen_main() { gen::init(); log_fmt("Generating code for Odin's src\n"); + // Remove TOKEN_KINDS usage in tokenizer.cpp + if (0) + { + } + + // Remove AST_KINDS macro usage in parser.hpp + if (1) + { + CodeBody src_parser_header = parse_file( path_src "parser.hpp" ); + CodeBody body = def_body( ECode::Global_Body ); + + body.append( def_comment(txt("NOTICE(github: Ed94): This is a generated variant of parser.hpp using /codegen/gen_src.cpp"))); + body.append(fmt_newline); + + Array ast_kinds = get_odin_ast_kinds(); + + for (Code code = src_parser_header.begin(); code != src_parser_header.end(); ++ code) + { + switch (code->Type) + { + case ECode::Preprocess_Define: + if ( code->Name.starts_with( txt("AST_KINDS"))) { + // Skip, we don't want it. + continue; + } + if ( code->Name.starts_with( txt("AST_KIND"))) { + // Skip the next 3 definitions + ++ code; + ++ code; + continue; + } + body.append(code); + continue; + + case ECode::Untyped: + if (code->Content.starts_with(txt("AST_KINDS"))) + break; + + case ECode::Enum: + { + if (code->Name.starts_with( txt("AstKind"))) + { + // Swap with generated variant + CodeBody swap_body = def_body( ECode::Enum_Body ); + { + swap_body.append( code_str(Ast_Invalid,)); + for (Odin_AstKind& kind : ast_kinds) + swap_body.append( untyped_str( String::fmt_buf(GlobalAllocator, "Ast_%S,", kind.def->Name ))); + swap_body.append( code_str(Ast_COUNT)); + } + CodeEnum swapped_enum = code.cast().duplicate(); + swapped_enum->Body = swap_body; + body.append(swapped_enum); + } + else + body.append(code); + } + continue; + + case ECode::Variable: + { + if (code->Name.starts_with(txt("ast_strings"))) + { + // Swap with generated table + String generated_table = String::make_reserve(GlobalAllocator, kilobytes(32)); + { + for (Odin_AstKind& kind : ast_kinds) + generated_table.append(token_fmt("desc", (StrC)kind.desc, stringize( + { cast(u8 *) , gb_size_of() -1 }, + ))); + } + CodeVar swapped_table = code.cast().duplicate(); + swapped_table->Value = code_fmt( "kinds", (StrC)generated_table, stringize( + { + {cast(u8 *)"invalid node", gb_size_of("invalid node")},\n + + })); + body.append(swapped_table); + body.append(fmt_newline); + + // Right after is where the struct definitions were defined, we'll insert them here + body.append(def_pragma(txt("region AST_KINDS"))); + body.append(fmt_newline); + for (Odin_AstKind& kind : ast_kinds) + { + Code def = kind.def.duplicate(); + def->Name = get_cached_string( String::fmt_buf(GlobalAllocator, "Ast%S", kind.def->Name)); + body.append( def ); + body.append(fmt_newline); + } + body.append(def_pragma(txt("endregion AST_KINDS"))); + continue; + } + if (code->Name.starts_with(txt("ast_variant_sizes"))) + { + // Swap with generated table + String generated_table = String::make_reserve(GlobalAllocator, kilobytes(32)); + { + for (Odin_AstKind& kind : ast_kinds) + generated_table.append(token_fmt( "name", (StrC)kind.def->Name, stringize( + gb_size_of(Ast),\n + ))); + } + CodeVar swapped_table = code.cast().duplicate(); + swapped_table->Value = code_fmt( "kinds", (StrC)generated_table, stringize( + { + 0,\n + + })); + body.append(swapped_table); + continue; + } + body.append(code); + } + continue; + + case ECode::Struct: + { + CodeStruct code_struct = code.cast(); + if (code->Name.starts_with(txt("Ast"))) + for (Code ast_code : code_struct->Body) switch (ast_code->Type) + { + case ECode::Union: + // Swap out the union's contents with the generated member definitions + CodeBody body_swap = def_body(ECode::Union_Body); + for (Odin_AstKind kind : ast_kinds) + body_swap.append( parse_variable( token_fmt( "name", (StrC)kind.def->Name, stringize( + Ast ; + )))); + ast_code->Body = rcast(AST*, body_swap.ast); + break; + default: + continue; + } + body.append(code); + } + continue; + + default: + body.append(code); + continue; + } + } + + Builder header = Builder::open( path_src "parser.hpp" ); + header.print(body); + header.write(); + format_file( path_src "parser.hpp" ); + } + + // Remove TYPE_KINDS usage in types.cpp + if (0) + { + + } // gen::deinit(); return 0; diff --git a/codegen/gencpp/gen.cpp b/codegen/gencpp/gen.cpp index 55a86f32a..0994c9502 100644 --- a/codegen/gencpp/gen.cpp +++ b/codegen/gencpp/gen.cpp @@ -1526,7 +1526,6 @@ void CodeBody::to_string( String& result ) if (left > 1) switch (ast->Type) { case ECode::Enum_Body: - case ECode::Union_Body: result.append("\n"); } ++curr; @@ -7605,7 +7604,7 @@ namespace parser if ( check( TokType::Operator ) && currtok.Text[0] == '[' && currtok.Text[1] == ']' ) { - Code array_expr = untyped_str( currtok ); + Code array_expr = untyped_str( get_cached_string(txt(" ")) ); eat( TokType::Operator ); // [] diff --git a/codegen/token_kinds.hpp b/codegen/token_kinds.hpp new file mode 100644 index 000000000..e69de29bb diff --git a/codegen/type_kinds.hpp b/codegen/type_kinds.hpp new file mode 100644 index 000000000..e69de29bb diff --git a/scripts/.clang-format b/scripts/.clang-format index 5c39f525a..d22b9631e 100644 --- a/scripts/.clang-format +++ b/scripts/.clang-format @@ -71,7 +71,7 @@ BreakAfterAttributes: Always BreakArrays: true BreakBeforeInlineASMColon: OnlyMultiline BreakBeforeBinaryOperators: NonAssignment -BreakBeforeBraces: Allman +BreakBeforeBraces: Attach BreakBeforeInheritanceComma: true BreakInheritanceList: BeforeComma BreakBeforeConceptDeclarations: true @@ -119,11 +119,11 @@ MaxEmptyLinesToKeep: 4 NamespaceIndentation: All -PointerAlignment: Left +PointerAlignment: Right -QualifierAlignment: Leave +QualifierAlignment: Right -ReferenceAlignment: Left +ReferenceAlignment: Right ReflowComments: true