From 26623075add09f91e3dde411df6533f0cab37983 Mon Sep 17 00:00:00 2001 From: Ed_ Date: Wed, 19 Feb 2025 12:10:11 -0500 Subject: [PATCH] started to work on modularizing parser code paths --- base/components/ast.hpp | 2 +- base/components/gen/ast_inlines.hpp | 2 +- base/components/interface.hpp | 19 ++++++++++++------- base/components/interface.parsing.cpp | 2 +- base/components/interface.untyped.cpp | 13 +++++++++++++ base/components/lexer.cpp | 5 ++++- base/components/parser.cpp | 25 ++++++++++++++++++------- base/components/parser_types.hpp | 27 +++++++++++++++++++++------ base/dependencies/macros.hpp | 2 +- base/dependencies/strings.hpp | 2 +- 10 files changed, 73 insertions(+), 26 deletions(-) diff --git a/base/components/ast.hpp b/base/components/ast.hpp index 13c679e..ae2513e 100644 --- a/base/components/ast.hpp +++ b/base/components/ast.hpp @@ -404,7 +404,7 @@ struct AST }; }; StrCached Content; // Attributes, Comment, Execution, Include - // TokenSlice Content; // TODO(Ed): Use a token slice for content + TokenSlice ContentToks; // TODO(Ed): Use a token slice for content struct { Specifier ArrSpecs[AST_ArrSpecs_Cap]; // Specifiers Code NextSpecs; // Specifiers; If ArrSpecs is full, then NextSpecs is used. diff --git a/base/components/gen/ast_inlines.hpp b/base/components/gen/ast_inlines.hpp index 2ccb518..7ec01aa 100644 --- a/base/components/gen/ast_inlines.hpp +++ b/base/components/gen/ast_inlines.hpp @@ -1,6 +1,6 @@ #ifdef INTELLISENSE_DIRECTIVES #pragma once -#include "inlines.hpp" +#include "components/types.hpp" #endif // This file was generated automatially by gencpp's bootstrap.cpp (See: https://github.com/Ed94/gencpp) diff --git a/base/components/interface.hpp b/base/components/interface.hpp index 97e14f2..29495d6 100644 --- a/base/components/interface.hpp +++ b/base/components/interface.hpp @@ -15,7 +15,7 @@ \▓▓▓▓▓▓ \▓▓▓▓▓▓▓\▓▓ \▓▓ \▓▓▓▓▓▓\▓▓ \▓▓ \▓▓▓▓ \▓▓▓▓▓▓▓\▓▓ \▓▓ \▓▓▓▓▓▓▓ \▓▓▓▓▓▓▓ \▓▓▓▓▓▓▓ */ -enum LogLevel : u32 +enum LogLevel //: u32 { LL_Null, LL_Note, @@ -24,14 +24,17 @@ enum LogLevel : u32 LL_Fatal, LL_UnderlyingType = GEN_U32_MAX, }; +typedef enum LogLevel LogLevel; Str loglevel_to_str(LogLevel level) { local_persist Str lookup[] = { - txt("Info"), - txt("Warning"), - txt("Panic"), + { "Null", sizeof("Null") - 1 }, + { "Note", sizeof("Note") - 1 }, + { "Warning", sizeof("Info") - 1 }, + { "Error", sizeof("Error") - 1 }, + { "Fatal", sizeof("Fatal") - 1 }, }; return lookup[level]; } @@ -148,7 +151,7 @@ inline void logger_fmt(Context* ctx, LogLevel level, char const* fmt, ...) { local_persist thread_local - PrintF_Buffer buf = struct_init(PrintF_Buffer, {0}); + PrintF_Buffer buf = struct_zero(PrintF_Buffer); va_list va; va_start(va, fmt); @@ -403,6 +406,7 @@ struct ParseStackNode // TODO(Ed): When an error occurs, the parse stack is not released and instead the scope is left dangling. }; +typedef struct ParseMessage ParseMessage; struct ParseMessage { ParseMessage* Next; @@ -418,14 +422,14 @@ struct ParseInfo Code result; }; -struct Opts_parse +struct ParseOpts { AllocatorInfo backing_msgs; AllocatorInfo backing_tokens; AllocatorInfo backing_ast; }; -ParseInfo wip_parse_str( LexedInfo lexed, Opts_parse opts GEN_PARAM_DEFAULT ); +ParseInfo wip_parse_str( LexedInfo lexed, ParseOpts* opts GEN_PARAM_DEFAULT ); GEN_API CodeClass parse_class ( Str class_def ); GEN_API CodeConstructor parse_constructor ( Str constructor_def ); @@ -459,6 +463,7 @@ Str token_fmt_impl( ssize, ... ); GEN_API Code untyped_str ( Str content); GEN_API Code untyped_fmt ( char const* fmt, ... ); GEN_API Code untyped_token_fmt( s32 num_tokens, char const* fmt, ... ); +GEN_API Code untyped_toks ( TokenSlice tokens ); #pragma endregion Untyped text diff --git a/base/components/interface.parsing.cpp b/base/components/interface.parsing.cpp index 69acb69..0c10c39 100644 --- a/base/components/interface.parsing.cpp +++ b/base/components/interface.parsing.cpp @@ -8,7 +8,7 @@ // Publically Exposed Interface -ParseInfo wip_parse_str(LexedInfo lexed, Opts_parse opts) +ParseInfo wip_parse_str(LexedInfo lexed, ParseOpts* opts) { TokArray toks; if (lexed.tokens.Num == 0 && lexed.tokens.Ptr == nullptr) { diff --git a/base/components/interface.untyped.cpp b/base/components/interface.untyped.cpp index 69d74bb..3e05369 100644 --- a/base/components/interface.untyped.cpp +++ b/base/components/interface.untyped.cpp @@ -176,3 +176,16 @@ Code untyped_token_fmt( s32 num_tokens, char const* fmt, ... ) return result; } + +Code untyped_toks( TokenSlice tokens ) +{ + if ( tokens.Num == 0 ) { + log_failure( "untyped_toks: empty token slice" ); + return InvalidCode; + } + Code + result = make_code(); + result->Type = CT_Untyped; + result->ContentToks = tokens; + return result; +} diff --git a/base/components/lexer.cpp b/base/components/lexer.cpp index 6f4ca55..0fc46b8 100644 --- a/base/components/lexer.cpp +++ b/base/components/lexer.cpp @@ -564,9 +564,12 @@ void lex_found_token( LexContext* ctx ) array_append( _ctx->Lexer_Tokens, ctx->token ); } +// TODO(Ed): We should dynamically allocate the lexer's array in Allocator_DyanmicContainers. + // TODO(Ed): We need to to attempt to recover from a lex failure? + neverinline -// TokArray lex( Array tokens, Str content ) +// void lex( Array tokens, Str content ) TokArray lex( Str content ) { LexContext c; LexContext* ctx = & c; diff --git a/base/components/parser.cpp b/base/components/parser.cpp index d470021..edd5e08 100644 --- a/base/components/parser.cpp +++ b/base/components/parser.cpp @@ -191,7 +191,7 @@ bool _check_parse_args( Str def, char const* func_name ) // Procedure Forwards ( Entire parser internal parser interface ) -internal Code parse_array_decl (); +internal Code parse_array_decl (ParseContext* ctx); internal CodeAttributes parse_attributes (); internal CodeComment parse_comment (); internal Code parse_complicated_definition ( TokType which ); @@ -491,8 +491,15 @@ StrBuilder parser_strip_formatting( Str raw_text, bool preserve_newlines ) return content; } +StrBuilder parser_strip_formatting_2(TokenSlice tokens) +{ + // TODO(Ed): Use this to produce strings for validation purposes. We shouldn't serialize down from tokens once we start storing token slices for content. + StrBuilder result = struct_zero(StrBuilder); + return result; +} + internal -Code parse_array_decl() +Code parse_array_decl(ParseContext* ctx) { push_scope(); @@ -525,16 +532,20 @@ Code parse_array_decl() return InvalidCode; } + TokenSlice tokens = { & currtok, 1 }; Token untyped_tok = currtok; while ( left && currtok.Type != Tok_BraceSquare_Close ) { eat( currtok.Type ); + ++ tokens.Num; } - untyped_tok.Text.Len = ( (sptr)prevtok.Text.Ptr + prevtok.Text.Len ) - (sptr)untyped_tok.Text.Ptr; + // untyped_tok.Text.Len = ( (sptr)prevtok.Text.Ptr + prevtok.Text.Len ) - (sptr)untyped_tok.Text.Ptr; + untyped_tok.Text = token_range_to_str(untyped_tok, prevtok); Code array_expr = untyped_str( untyped_tok.Text ); + // Code array_expr = untyped_toks( tokens ); // TODO(Ed): Use token slice instead of untyped strings. // [ if ( left == 0 ) @@ -557,7 +568,7 @@ Code parse_array_decl() // Its a multi-dimensional array if ( check( Tok_BraceSquare_Open )) { - Code adjacent_arr_expr = parse_array_decl(); + Code adjacent_arr_expr = parse_array_decl(ctx); // [ ][ ]... array_expr->Next = adjacent_arr_expr; @@ -3291,7 +3302,7 @@ CodeVar parse_variable_after_name( { push_scope(); - Code array_expr = parse_array_decl(); + Code array_expr = parse_array_decl(& _ctx->parser); Code expr = NullCode; Code bitfield_expr = NullCode; @@ -5345,7 +5356,7 @@ CodeTypedef parser_parse_typedef() return InvalidCode; } - array_expr = parse_array_decl(); + array_expr = parse_array_decl(& _ctx->parser); // + } @@ -5591,7 +5602,7 @@ CodeUsing parser_parse_using() type = parser_parse_type(parser_not_from_template, nullptr); // using = - array_expr = parse_array_decl(); + array_expr = parse_array_decl(& _ctx->parser); // + } } diff --git a/base/components/parser_types.hpp b/base/components/parser_types.hpp index fb4fc9a..4308316 100644 --- a/base/components/parser_types.hpp +++ b/base/components/parser_types.hpp @@ -91,16 +91,31 @@ bool tok_is_end_definition(Token tok) { StrBuilder tok_to_strbuilder(Token tok); -struct TokArray -{ - Array(Token) Arr; - s32 Idx; -}; - struct TokenSlice { Token* Ptr; s32 Num; + +#if GEN_COMPILER_CPP + forceinline operator Token* () const { return Ptr; } + forceinline Token& operator[]( ssize index ) const { return Ptr[index]; } +#endif +}; + +forceinline +Str token_range_to_str(Token start, Token end) +{ + Str result = { + start.Text.Ptr, + (scast(sptr, rcast(uptr, end.Text.Ptr)) + end.Text.Len) - scast(sptr, rcast(uptr, start.Text.Ptr)) + }; + return result; +} + +struct TokArray +{ + Array(Token) Arr; + s32 Idx; }; struct LexContext diff --git a/base/dependencies/macros.hpp b/base/dependencies/macros.hpp index ec2106c..69abbc7 100644 --- a/base/dependencies/macros.hpp +++ b/base/dependencies/macros.hpp @@ -310,7 +310,7 @@ # if GEN_COMPILER_CPP # define struct_zero(type) {} # else -# define struct_zero(type) (type) {0} +# define struct_zero(type) {0} # endif #endif diff --git a/base/dependencies/strings.hpp b/base/dependencies/strings.hpp index 866e944..eaf2e5e 100644 --- a/base/dependencies/strings.hpp +++ b/base/dependencies/strings.hpp @@ -320,7 +320,7 @@ inline StrBuilder strbuilder_fmt_buf(AllocatorInfo allocator, char const* fmt, ...) { local_persist thread_local - PrintF_Buffer buf = struct_init(PrintF_Buffer, {0}); + PrintF_Buffer buf = struct_zero(PrintF_Buffer); va_list va; va_start(va, fmt);