WIP Broken: Working towards major changes to lexer

Lexer tokens now tracked using TokenSlice. ParserContext as the id for current token during traversal.
Last progress on errors needs updates to interface.parsing.cpp
Not adding message support yet as that will need to revamped with the logging, right now just focused on getting both lexer on parser to use proper info data structures.
Thinking of separating ParseContext from the general lib Context, plan is to just have the parser context have its allocator and other references it needs.
So there will seem to be redundant parameter passing to some procedures for now.
The big endgoal of this other than the parser's compression is the ability to support mult-threading.
Immediate concern other than making sure everything necessary is only within ParseContext, etc is things related to logging, or otherwise that is not thread dependent.
Those can get guarded but I don't have full intuition on what will have that (most likely the library's provided allocator/s as well will need guards introduced).
I'll concern myself more with charting those out once things are at least lifted properly. Worst case a trivial situation can be achived by the user by just abusing multiple contextes/allocators/etc as we already have in place.
This commit is contained in:
Edward R. Gonzalez 2025-03-18 03:26:14 -04:00
parent 1e7fdcec16
commit 1c7dd4ab32
8 changed files with 272 additions and 258 deletions

View File

@ -93,14 +93,6 @@ struct Context
// An implicit context interface will be provided instead as wrapper procedures as convience.
GEN_API extern Context* _ctx;
// By default this library will either crash or exit if an error is detected while generating codes.
// Even if set to not use GEN_FATAL, GEN_FATAL will still be used for memory failures as the library is unusable when they occur.
#ifdef GEN_DONT_USE_FATAL
#define log_failure log_fmt
#else
#define log_failure GEN_FATAL
#endif
// TODO(Ed): Swap all usage of this with logger_fmt (then rename logger_fmt to log_fmt)
inline
ssize log_fmt(char const* fmt, ...)
@ -374,15 +366,6 @@ struct ParseStackNode
// TODO(Ed): When an error occurs, the parse stack is not released and instead the scope is left dangling.
};
typedef struct ParseMessage ParseMessage;
struct ParseMessage
{
ParseMessage* Next;
ParseStackNode* Scope;
Str Content;
LogLevel Level;
};
struct ParseInfo
{
ParseMessage* messages;

View File

@ -13,20 +13,16 @@ ParseInfo wip_parse_str(LexedInfo lexed, ParseOpts* opts)
// TODO(Ed): Lift this.
Context* ctx = _ctx;
TokArray toks;
if (lexed.tokens.Num == 0 && lexed.tokens.Ptr == nullptr) {
if (lexed.tokens.num == 0 && lexed.tokens.ptr == nullptr) {
check_parse_args(lexed.text);
toks = lex(lexed.text);
TokenSlice slice = { toks.Arr, scast(s32, array_num(toks.Arr)) };
lexed.tokens = slice;
lexed = lex(ctx, lexed.text);
}
ParseInfo info = struct_zero(ParseInfo);
info.lexed = lexed;
// TODO(Ed): ParseInfo should be set to the parser context.
ctx->parser.Tokens = toks;
ctx->parser.tokens = lexed.tokens;
ParseStackNode scope = NullScope;
parser_push(& ctx->parser, & scope);

View File

@ -17,55 +17,55 @@ StrBuilder tok_to_strbuilder(Token tok)
return result;
}
bool lex__eat(Context* ctx, TokArray* self, TokType type );
bool lex__eat(Context* ctx, ParseContext* self, TokType type );
Token* lex_current(TokArray* self, bool skip_formatting )
Token* lex_current(ParseContext* self, bool skip_formatting )
{
if ( skip_formatting )
{
while ( self->Arr[self->Idx].Type == Tok_NewLine || self->Arr[self->Idx].Type == Tok_Comment )
self->Idx++;
while ( self->tokens[self->token_id].Type == Tok_NewLine || self->tokens[self->token_id].Type == Tok_Comment )
self->token_id++;
}
return & self->Arr[self->Idx];
return & self->tokens[self->token_id];
}
Token* lex_peek(TokArray self, bool skip_formatting)
Token* lex_peek(ParseContext const* self, bool skip_formatting)
{
s32 idx = self.Idx;
s32 idx = self->token_id;
if ( skip_formatting )
{
while ( self.Arr[idx].Type == Tok_NewLine )
while ( self->tokens[idx].Type == Tok_NewLine )
idx++;
return & self.Arr[idx];
return & self->tokens[idx];
}
return & self.Arr[idx];
return & self->tokens[idx];
}
Token* lex_previous(TokArray self, bool skip_formatting)
Token* lex_previous(ParseContext const* self, bool skip_formatting)
{
s32 idx = self.Idx;
s32 idx = self->token_id;
if ( skip_formatting )
{
while ( self.Arr[idx].Type == Tok_NewLine )
while ( self->tokens[idx].Type == Tok_NewLine )
idx --;
return & self.Arr[idx];
return & self->tokens[idx];
}
return & self.Arr[idx - 1];
return & self->tokens[idx - 1];
}
Token* lex_next(TokArray self, bool skip_formatting)
Token* lex_next(ParseContext const* self, bool skip_formatting)
{
s32 idx = self.Idx;
s32 idx = self->token_id;
if ( skip_formatting )
{
while ( self.Arr[idx].Type == Tok_NewLine )
while ( self->tokens[idx].Type == Tok_NewLine )
idx++;
return & self.Arr[idx + 1];
return & self->tokens[idx + 1];
}
return & self.Arr[idx + 1];
return & self->tokens[idx + 1];
}
enum
@ -569,43 +569,27 @@ void lex_found_token( LexContext* ctx )
// TODO(Ed): We need to to attempt to recover from a lex failure?
neverinline
LexedInfo lex_WIP(Context* lib_ctx, Str content)
LexedInfo lex(Context* lib_ctx, Str content)
{
LexedInfo result = struct_zero();
result.text = content;
result.tokens = array_init_reserve(Token, ctx->Allocator_DyanmicContainers, ctx->InitSize_LexerTokens );
LexedInfo info = struct_zero();
LexContext c = struct_zero(); LexContext* ctx = & c;
c.content = content;
c.left = content.Len;
c.scanner = content.Ptr;
c.line = 1;
c.column = 1;
Array(Token) tokens = array_init_reserve(Token, lib_ctx->Allocator_DyanmicContainers, lib_ctx->InitSize_LexerTokens );
return result;
}
neverinline
// void lex( Array<Token> tokens, Str content )
TokArray lex( Str content )
{
LexContext c; LexContext* ctx = & c;
c.content = content;
c.left = content.Len;
c.scanner = content.Ptr;
char const* word = c.scanner;
s32 word_length = 0;
c.line = 1;
c.column = 1;
// TODO(Ed): Re-implement to new constraints:
// 1. Ability to continue on error
// 2. Return a lexed info.
skip_whitespace();
if ( c.left <= 0 )
{
if ( c.left <= 0 ) {
log_failure( "gen::lex: no tokens found (only whitespace provided)" );
TokArray null_array = {};
return null_array;
return info;
}
array_clear(_ctx->Lexer_Tokens);
@ -615,16 +599,12 @@ TokArray lex( Str content )
while (c.left )
{
#if 0
if (Tokens.num())
{
if (Tokens.num()) {
log_fmt("\nLastTok: %SB", Tokens.back().to_strbuilder());
}
#endif
{
Token thanks_c = { { c.scanner, 0 }, Tok_Invalid, c.line, c.column, TF_Null };
c.token = thanks_c;
}
c.token = struct_init(Token) { { c.scanner, 0 }, Tok_Invalid, c.line, c.column, TF_Null };
bool is_define = false;
@ -690,8 +670,7 @@ TokArray lex( Str content )
case Lex_ReturnNull:
{
TokArray tok_array = {};
return tok_array;
return info;
}
}
}
@ -1310,12 +1289,13 @@ TokArray lex( Str content )
if ( array_num(_ctx->Lexer_Tokens) == 0 ) {
log_failure( "Failed to lex any tokens" );
TokArray tok_array = {};
return tok_array;
return info;
}
TokArray result = { _ctx->Lexer_Tokens, 0 };
return result;
info.messages = c.messages;
info.text = content;
info.tokens = struct_init(TokenSlice) { tokens, scast(s32, array_num(tokens)) };
return info;
}
#undef move_forward

File diff suppressed because it is too large Load Diff

View File

@ -93,12 +93,12 @@ StrBuilder tok_to_strbuilder(Token tok);
struct TokenSlice
{
Token* Ptr;
s32 Num;
Token* ptr;
s32 num;
#if GEN_COMPILER_CPP
forceinline operator Token* () const { return Ptr; }
forceinline Token& operator[]( ssize index ) const { return Ptr[index]; }
forceinline operator Token* () const { return ptr; }
forceinline Token& operator[]( ssize index ) const { return ptr[index]; }
#endif
};
@ -118,16 +118,6 @@ struct TokArray
s32 Idx;
};
struct LexContext
{
Str content;
s32 left;
char const* scanner;
s32 line;
s32 column;
Token token;
};
typedef struct LexerMessage LexerMessage;
struct LexerMessage
{
@ -136,19 +126,42 @@ struct LexerMessage
LogLevel level;
};
struct LexContext
{
LexerMessage* messages;
Str content;
s32 left;
char const* scanner;
s32 line;
s32 column;
Token token;
};
struct LexedInfo
{
LexerMessage messages;
Str text;
TokenSlice tokens;
LexerMessage* messages;
Str text;
TokenSlice tokens;
};
typedef struct ParseStackNode ParseStackNode;
typedef struct ParseMessage ParseMessage;
struct ParseMessage
{
ParseMessage* Next;
ParseStackNode* Scope;
Str Content;
LogLevel Level;
};
struct ParseContext
{
TokArray Tokens;
ParseMessage* messages;
ParseStackNode* Scope;
// TokArray Tokens;
TokenSlice tokens;
s32 token_id;
};
enum MacroType : u16

View File

@ -55,6 +55,7 @@ Str loglevel_to_str(LogLevel level)
return lookup[level];
}
typedef struct LogEntry LogEntry;
struct LogEntry
{
Str msg;
@ -63,6 +64,14 @@ struct LogEntry
typedef void LoggerProc(LogEntry entry);
// By default this library will either crash or exit if an error is detected while generating codes.
// Even if set to not use GEN_FATAL, GEN_FATAL will still be used for memory failures as the library is unusable when they occur.
#ifdef GEN_DONT_USE_FATAL
#define log_failure log_fmt
#else
#define log_failure GEN_FATAL
#endif
enum AccessSpec : u32
{
AccessSpec_Default,

View File

@ -17,6 +17,39 @@ template <class TType> struct RemovePtr<TType*> { typedef TType Type; };
template <class TType> using TRemovePtr = typename RemovePtr<TType>::Type;
#pragma region Slice
#if 0
#define Slice(Type) Slice<Type>
template<class Type> struct Slice;
template<class Type>
Type* slice_get(Slice<Type> self, ssize id) {
GEN_ASSERT(id > -1);
GEN_ASSERT(id < self.len);
return self.ptr[id];
}
template<class Type>
struct Slice
{
Type* ptr;
ssize len;
#if GEN_COMPILER_CPP
forceinline operator Token* () const { return ptr; }
forceinline Token& operator[]( ssize index ) const { return ptr[index]; }
forceinline Type* begin() { return ptr; }
forceinline Type* end() { return ptr + len; }
#endif
#if ! GEN_C_LIKE_CPP && GEN_COMPILER_CPP
forceinline Type& back() { return ptr[len - 1]; }
#endif
};
#endif
#pragma endregion Slice
#pragma region Array
#define Array(Type) Array<Type>

View File

@ -300,9 +300,9 @@
#ifndef struct_init
# if GEN_COMPILER_CPP
# define struct_init(type, value) value
# define struct_init(type)
# else
# define struct_init(type, value) (type) value
# define struct_init(type) (type)
# endif
#endif