From 3b81eea6884b6dd9bd1103c870e4eedd8681fe6e Mon Sep 17 00:00:00 2001 From: Ed_ Date: Wed, 19 Feb 2025 10:50:55 -0500 Subject: [PATCH] Doing some initial prep for parser overhaul --- base/components/ast.hpp | 5 +-- base/components/interface.hpp | 44 +++++++++++++++------------ base/components/interface.parsing.cpp | 23 ++++++++++++++ base/components/parser.cpp | 11 ++++--- base/components/parser_types.hpp | 21 ++++++++----- gen_c_library/c_library.refactor | 2 +- 6 files changed, 70 insertions(+), 36 deletions(-) diff --git a/base/components/ast.hpp b/base/components/ast.hpp index 2c9f9af..0041456 100644 --- a/base/components/ast.hpp +++ b/base/components/ast.hpp @@ -406,7 +406,8 @@ struct AST Code PostNameMacro; // Only used with parameters for specifically UE_REQUIRES (Thanks Unreal) }; }; - StrCached Content; // Attributes, Comment, Execution, Include + StrCached Content; // Attributes, Comment, Execution, Include + // TokenSlice Content; // TODO(Ed): Use a token slice for content struct { Specifier ArrSpecs[AST_ArrSpecs_Cap]; // Specifiers Code NextSpecs; // Specifiers; If ArrSpecs is full, then NextSpecs is used. @@ -422,7 +423,7 @@ struct AST Code Next; Code Back; }; - Token* Token; // Reference to starting token, only available if it was derived from parsing. + Token* Token; // Reference to starting token, only available if it was derived from parsing. // TODO(Ed): Change this to a token slice. Code Parent; CodeType Type; // CodeFlag CodeFlags; diff --git a/base/components/interface.hpp b/base/components/interface.hpp index 0f58ba4..bf74f1c 100644 --- a/base/components/interface.hpp +++ b/base/components/interface.hpp @@ -391,37 +391,41 @@ forceinline CodeBody def_union_body ( s32 num, Code* codes ) #pragma region Parsing -#if 0 -struct StackNode +struct ParseStackNode { - StackNode* Prev; + ParseStackNode* Prev; - Token Start; - Token Name; // The name of the AST node (if parsed) - Str FailedProc; // The name of the procedure that failed + TokenSlice tokens; + Token* Start; + Str Name; // The name of the AST node (if parsed) + Str ProcName; // The name of the procedure + Code code; // Relevant AST node + // TODO(Ed): When an error occurs, the parse stack is not released and instead the scope is left dangling. }; -// Stack nodes are allocated the error's allocator -struct Error +struct ParseMessage { - StrBuilder message; - StackNode* context_stack; + ParseMessage* Next; + ParseStackNode* Scope; + Str Log; + LogLevel Level; }; struct ParseInfo { - Arena FileMem; - Arena TokMem; - Arena CodeMem; - - FileContents FileContent; - Array Tokens; - Array Errors; - // Errors are allocated to a dedicated general arena. + ParseMessage* messages; + LexedInfo lexed; + Code result; }; -CodeBody parse_file( Str path ); -#endif +struct Opts_parse +{ + AllocatorInfo backing_msgs; + AllocatorInfo backing_tokens; + AllocatorInfo backing_ast; +}; + +ParseInfo wip_parse_str( LexedInfo lexed, Opts_parse opts GEN_PARAM_DEFAULT ); GEN_API CodeClass parse_class ( Str class_def ); GEN_API CodeConstructor parse_constructor ( Str constructor_def ); diff --git a/base/components/interface.parsing.cpp b/base/components/interface.parsing.cpp index d5f4bef..cad87eb 100644 --- a/base/components/interface.parsing.cpp +++ b/base/components/interface.parsing.cpp @@ -8,6 +8,29 @@ // Publically Exposed Interface +ParseInfo wip_parse_str(LexedInfo lexed, Opts_parse opts) +{ + TokArray toks; + if (lexed.tokens.Num == 0 && lexed.tokens.Ptr == nullptr) { + check_parse_args(lexed.text); + toks = lex(lexed.text); + + TokenSlice slice = { toks.Arr, scast(s32, array_num(toks.Arr)) }; + lexed.tokens = slice; + } + ParseInfo info = struct_zero(ParseInfo); + info.lexed = lexed; + + // TODO(Ed): ParseInfo should be set to the parser context. + + _ctx->parser.Tokens = toks; + push_scope(); + CodeBody result = parse_global_nspace(CT_Global_Body); + parser_pop(& _ctx->parser); + + return info; +} + CodeClass parse_class( Str def ) { check_parse_args( def ); diff --git a/base/components/parser.cpp b/base/components/parser.cpp index b685406..709723d 100644 --- a/base/components/parser.cpp +++ b/base/components/parser.cpp @@ -11,7 +11,7 @@ constexpr bool lex_dont_skip_formatting = false; constexpr bool lex_skip_formatting = true; -void parser_push( ParseContext* ctx, StackNode* node ) +void parser_push( ParseContext* ctx, ParseStackNode* node ) { node->Prev = ctx->Scope; ctx->Scope = node; @@ -60,7 +60,7 @@ StrBuilder parser_to_strbuilder(ParseContext ctx) else strbuilder_append_fmt(& result, "\t(%d, %d)\n", last_valid.Line, last_valid.Column ); - StackNode* curr_scope = ctx.Scope; + ParseStackNode* curr_scope = ctx.Scope; s32 level = 0; do { @@ -181,9 +181,10 @@ bool _check_parse_args( Str def, char const* func_name ) # define check_noskip( Type_ ) ( left && currtok_noskip.Type == Type_ ) # define check( Type_ ) ( left && currtok.Type == Type_ ) -# define push_scope() \ - Str null_name = {}; \ - StackNode scope = { nullptr, lex_current( & _ctx->parser.Tokens, lex_dont_skip_formatting ), null_name, txt( __func__ ) }; \ +// TODO(Ed): Don't do this anymore, we need a better initializer. +# define push_scope() \ + Str null_name = {}; \ + ParseStackNode scope = { nullptr, {nullptr, 0}, lex_current( & _ctx->parser.Tokens, lex_dont_skip_formatting ), null_name, txt( __func__ ), { nullptr} }; \ parser_push( & _ctx->parser, & scope ) #pragma endregion Helper Macros diff --git a/base/components/parser_types.hpp b/base/components/parser_types.hpp index a2bd57a..8969740 100644 --- a/base/components/parser_types.hpp +++ b/base/components/parser_types.hpp @@ -97,6 +97,12 @@ struct TokArray s32 Idx; }; +struct TokenSlice +{ + Token* Ptr; + s32 Num; +}; + struct LexContext { Str content; @@ -108,19 +114,18 @@ struct LexContext Token token; }; -struct StackNode +struct LexedInfo { - StackNode* Prev; - - Token* Start; - Str Name; // The name of the AST node (if parsed) - Str ProcName; // The name of the procedure + Str text; + TokenSlice tokens; }; +typedef struct ParseStackNode ParseStackNode; + struct ParseContext { - TokArray Tokens; - StackNode* Scope; + TokArray Tokens; + ParseStackNode* Scope; }; enum MacroType : u16 diff --git a/gen_c_library/c_library.refactor b/gen_c_library/c_library.refactor index 96021a1..4c3da39 100644 --- a/gen_c_library/c_library.refactor +++ b/gen_c_library/c_library.refactor @@ -535,7 +535,7 @@ namespace Lexer_, gen_Lexer_ word LexContext, gen_LexContext word lex, gen_lex -word StackNode, gen_StackNode +word ParseStackNode, gen_ParseStackNode word ParseContext, gen_ParseContext // namespace parse_, gen_parse_