prepped lexer and parser for c-library generation

2025-10-08 17:50:50 -07:00 · 2024-12-09 16:45:18 -05:00
parent e6f30c7e1d
commit e786d7c3b6
6 changed files with 500 additions and 364 deletions
--- a/project/bootstrap.cpp
+++ b/project/bootstrap.cpp
@@ -246,9 +246,7 @@ int gen_main()
 		CodeBody etoktype         = gen_etoktype( "enums/ETokType.csv", "enums/AttributeTokens.csv" );
 		//CodeNS   nspaced_etoktype = def_namespace( name(parser), def_namespace_body( args(etoktype)) );
 		CodeBody nspaced_etoktype = def_global_body( args(
-			untyped_str(txt("GEN_NS_PARSER_BEGIN\n")),
-			etoktype,
-			untyped_str(txt("GEN_NS_PARSER_END\n"))
+			etoktype
 		));

 		Builder
--- a/project/components/gen/etoktype.cpp
+++ b/project/components/gen/etoktype.cpp
@@ -6,6 +6,7 @@
 // This file was generated automatially by gencpp's bootstrap.cpp (See: https://github.com/Ed94/gencpp)

 GEN_NS_PARSER_BEGIN
+
 #define GEN_DEFINE_ATTRIBUTE_TOKENS Entry( Tok_Attribute_API_Export, "GEN_API_Export_Code" ) Entry( Tok_Attribute_API_Import, "GEN_API_Import_Code" )

 enum TokType : u32
@@ -112,7 +113,7 @@ enum TokType : u32

 inline StrC toktype_to_str( TokType type )
 {
-	local_persist StrC lookup[] {
+	local_persist StrC lookup[] = {
 		{ sizeof( "__invalid__" ),         "__invalid__"         },
 		{ sizeof( "private" ),             "private"             },
 		{ sizeof( "protected" ),           "protected"           },
--- a/project/components/interface.parsing.cpp
+++ b/project/components/interface.parsing.cpp
@@ -12,7 +12,7 @@ CodeClass parse_class( StrC def )
 {
 	GEN_USING_NS_PARSER;
 	check_parse_args( def );
-	
+
 	TokArray toks = lex( def );
 	if ( toks.Arr == nullptr )
 		return InvalidCode;
@@ -20,7 +20,7 @@ CodeClass parse_class( StrC def )
 	Context.Tokens = toks;
 	push_scope();
 	CodeClass result = (CodeClass) parse_class_struct( Tok_Decl_Class );
-	pop(& Context);
+	parser_pop(& Context);
 	return result;
 }

@@ -39,9 +39,9 @@ CodeConstructor parse_constructor( StrC def )
 	Specifier      specs_found[ 16 ] { Spec_NumSpecifiers };
 	s32            NumSpecifiers = 0;

-	while ( left && is_specifier(currtok) )
+	while ( left && tok_is_specifier(currtok) )
 	{
-		Specifier spec = strc_to_specifier( to_str(currtok) );
+		Specifier spec = strc_to_specifier( tok_to_str(currtok) );

 		b32 ignore_spec = false;

@@ -59,8 +59,8 @@ CodeConstructor parse_constructor( StrC def )
 				break;

 			default :
-				log_failure( "Invalid specifier %s for variable\n%s", spec_to_str( spec ), to_string(Context) );
-				pop(& Context);
+				log_failure( "Invalid specifier %s for variable\n%s", spec_to_str( spec ), parser_to_string(Context) );
+				parser_pop(& Context);
 				return InvalidCode;
 		}

@@ -109,7 +109,7 @@ CodeEnum parse_enum( StrC def )
 	TokArray toks = lex( def );
 	if ( toks.Arr == nullptr )
 	{
-		pop(& Context);
+		parser_pop(& Context);
 		return InvalidCode;
 	}

@@ -181,7 +181,7 @@ CodeBody parse_global_body( StrC def )
 	Context.Tokens = toks;
 	push_scope();
 	CodeBody result = parse_global_nspace( CT_Global_Body );
-	pop(& Context);
+	parser_pop(& Context);
 	return result;
 }

@@ -236,7 +236,7 @@ CodeStruct parse_struct( StrC def )
 	Context.Tokens = toks;
 	push_scope();
 	CodeStruct result = (CodeStruct) parse_class_struct( Tok_Decl_Struct );
-	pop(& Context);
+	parser_pop(& Context);
 	return result;
 }

--- a/project/components/lexer.cpp
+++ b/project/components/lexer.cpp
@@ -35,62 +35,62 @@ struct Token

 constexpr Token NullToken { nullptr, 0, Tok_Invalid, false, 0, TF_Null };

-AccessSpec to_access_specifier(Token tok)
+AccessSpec tok_to_access_specifier(Token tok)
 {
 	return scast(AccessSpec, tok.Type);
 }

-StrC to_str(Token tok)
+StrC tok_to_str(Token tok)
 {
 	return { tok.Length, tok.Text };
 }

-bool is_valid( Token tok )
+bool tok_is_valid( Token tok )
 {
 	return tok.Text && tok.Length && tok.Type != Tok_Invalid;
 }

-bool is_access_operator(Token tok)
+bool tok_is_access_operator(Token tok)
 {
 	return bitfield_is_equal( u32, tok.Flags, TF_AccessOperator );
 }

-bool is_access_specifier(Token tok)
+bool tok_is_access_specifier(Token tok)
 {
 	return bitfield_is_equal( u32, tok.Flags, TF_AccessSpecifier );
 }

-bool is_attribute(Token tok)
+bool tok_is_attribute(Token tok)
 {
 	return bitfield_is_equal( u32, tok.Flags, TF_Attribute );
 }

-bool is_operator(Token tok)
+bool tok_is_operator(Token tok)
 {
 	return bitfield_is_equal( u32, tok.Flags, TF_Operator );
 }

-bool is_preprocessor(Token tok)
+bool tok_is_preprocessor(Token tok)
 {
 	return bitfield_is_equal( u32, tok.Flags, TF_Preprocess );
 }

-bool is_preprocess_cond(Token tok)
+bool tok_is_preprocess_cond(Token tok)
 {
 	return bitfield_is_equal( u32, tok.Flags, TF_Preprocess_Cond );
 }

-bool is_specifier(Token tok)
+bool tok_is_specifier(Token tok)
 {
 	return bitfield_is_equal( u32, tok.Flags, TF_Specifier );
 }

-bool is_end_definition(Token tok)
+bool tok_is_end_definition(Token tok)
 {
 	return bitfield_is_equal( u32, tok.Flags, TF_EndDefinition );
 }

-String to_string(Token tok)
+String tok_to_string(Token tok)
 {
 	String result = string_make_reserve( GlobalAllocator, kilobytes(4) );

@@ -111,9 +111,9 @@ struct TokArray
 	s32          Idx;
 };

-bool __eat( TokType type );
+bool lex__eat( TokType type );

-Token* current(TokArray* self, bool skip_formatting )
+Token* lex_current(TokArray* self, bool skip_formatting )
 {
 	if ( skip_formatting )
 	{
@@ -124,7 +124,7 @@ Token* current(TokArray* self, bool skip_formatting )
 	return & self->Arr[self->Idx];
 }

-Token* peek(TokArray self, bool skip_formatting)
+Token* lex_peek(TokArray self, bool skip_formatting)
 {
 	s32 idx = self.Idx;

@@ -139,7 +139,7 @@ Token* peek(TokArray self, bool skip_formatting)
 	return & self.Arr[idx];
 }

-Token* previous(TokArray self, bool skip_formatting)
+Token* lex_previous(TokArray self, bool skip_formatting)
 {
 	s32 idx = self.Idx;

@@ -154,7 +154,7 @@ Token* previous(TokArray self, bool skip_formatting)
 	return & self.Arr[idx - 1];
 }

-Token* next(TokArray self, bool skip_formatting)
+Token* lex_next(TokArray self, bool skip_formatting)
 {
 	s32 idx = self.Idx;

@@ -169,9 +169,9 @@ Token* next(TokArray self, bool skip_formatting)
 	return & self.Arr[idx + 1];
 }

-global Arena_256KB     defines_map_arena;
-global HashTable(StrC) defines;
-global Array(Token)    Tokens;
+global Arena_256KB     Lexer_defines_map_arena;
+global HashTable(StrC) Lexer_defines;
+global Array(Token)    Lexer_Tokens;

 #define current ( * ctx->scanner )

@@ -190,7 +190,7 @@ global Array(Token)    Tokens;
 		ctx->scanner++;         \
 	}

-#define SkipWhitespace()                             \
+#define skip_whitespace()                            \
 	while ( ctx->left && char_is_space( current ) )  \
 	{                                                \
 		move_forward();                              \
@@ -237,10 +237,10 @@ s32 lex_preprocessor_directive( LexContext* ctx )
 {
 	char const* hash = ctx->scanner;
 	Token hash_tok = { hash, 1, Tok_Preprocess_Hash, ctx->line, ctx->column, TF_Preprocess };
-	array_append( Tokens, hash_tok  );
+	array_append( Lexer_Tokens, hash_tok  );

 	move_forward();
-	SkipWhitespace();
+	skip_whitespace();

 	ctx->token.Text = ctx->scanner;
 	while (ctx->left && ! char_is_space(current) )
@@ -249,7 +249,7 @@ s32 lex_preprocessor_directive( LexContext* ctx )
 		ctx->token.Length++;
 	}

-	ctx->token.Type = strc_to_toktype( to_str(ctx->token) );
+	ctx->token.Type = strc_to_toktype( tok_to_str(ctx->token) );

 	bool   is_preprocessor = ctx->token.Type >= Tok_Preprocess_Define && ctx->token.Type <= Tok_Preprocess_Pragma;
 	if ( ! is_preprocessor )
@@ -313,14 +313,14 @@ s32 lex_preprocessor_directive( LexContext* ctx )

 		ctx->token.Length = ctx->token.Length + ctx->token.Text - hash;
 		ctx->token.Text   = hash;
-		array_append( Tokens, ctx->token );
+		array_append( Lexer_Tokens, ctx->token );
 		return Lex_Continue; // Skip found token, its all handled here.
 	}

 	if ( ctx->token.Type == Tok_Preprocess_Else || ctx->token.Type == Tok_Preprocess_EndIf )
 	{
 		ctx->token.Flags |= TF_Preprocess_Cond;
-		array_append( Tokens, ctx->token );
+		array_append( Lexer_Tokens, ctx->token );
 		end_line();
 		return Lex_Continue;
 	}
@@ -329,9 +329,9 @@ s32 lex_preprocessor_directive( LexContext* ctx )
 		ctx->token.Flags |= TF_Preprocess_Cond;
 	}

-	array_append( Tokens, ctx->token );
+	array_append( Lexer_Tokens, ctx->token );

-	SkipWhitespace();
+	skip_whitespace();

 	if ( ctx->token.Type == Tok_Preprocess_Define )
 	{
@@ -353,10 +353,10 @@ s32 lex_preprocessor_directive( LexContext* ctx )
 			name.Length++;
 		}

-		array_append( Tokens, name );
+		array_append( Lexer_Tokens, name );

 		u64 key = crc32( name.Text, name.Length );
-		hashtable_set(ctx->defines, key, to_str(name) );
+		hashtable_set(ctx->defines, key, tok_to_str(name) );
 	}

 	Token preprocess_content = { ctx->scanner, 0, Tok_Preprocess_Content, ctx->line, ctx->column, TF_Preprocess };
@@ -399,7 +399,7 @@ s32 lex_preprocessor_directive( LexContext* ctx )
 			move_forward();
 		}

-		array_append( Tokens, preprocess_content );
+		array_append( Lexer_Tokens, preprocess_content );
 		return Lex_Continue; // Skip found token, its all handled here.
 	}

@@ -462,7 +462,7 @@ s32 lex_preprocessor_directive( LexContext* ctx )
 		preprocess_content.Length++;
 	}

-	array_append( Tokens, preprocess_content );
+	array_append( Lexer_Tokens, preprocess_content );
 	return Lex_Continue; // Skip found token, its all handled here.
 }

@@ -471,11 +471,11 @@ void lex_found_token( LexContext* ctx )
 {
 	if ( ctx->token.Type != Tok_Invalid )
 	{
-		array_append( Tokens, ctx->token );
+		array_append( Lexer_Tokens, ctx->token );
 		return;
 	}

-	TokType type = strc_to_toktype( to_str(ctx->token) );
+	TokType type = strc_to_toktype( tok_to_str(ctx->token) );

 	if (type <= Tok_Access_Public && type >= Tok_Access_Private )
 	{
@@ -489,7 +489,7 @@ void lex_found_token( LexContext* ctx )

 	if ( type == Tok_Decl_Extern_Linkage )
 	{
-		SkipWhitespace();
+		skip_whitespace();

 		if ( current != '"' )
 		{
@@ -498,7 +498,7 @@ void lex_found_token( LexContext* ctx )
 		}

 		ctx->token.Type = type;
-		array_append( Tokens, ctx->token );
+		array_append( Lexer_Tokens, ctx->token );
 		return;
 	}

@@ -508,7 +508,7 @@ void lex_found_token( LexContext* ctx )
 	{
 		ctx->token.Type   = type;
 		ctx->token.Flags |= TF_Specifier;
-		array_append( Tokens, ctx->token );
+		array_append( Lexer_Tokens, ctx->token );
 		return;
 	}

@@ -516,7 +516,7 @@ void lex_found_token( LexContext* ctx )
 	if ( type != Tok_Invalid )
 	{
 		ctx->token.Type = type;
-		array_append( Tokens, ctx->token );
+		array_append( Lexer_Tokens, ctx->token );
 		return;
 	}

@@ -570,7 +570,7 @@ void lex_found_token( LexContext* ctx )
 		ctx->token.Type = Tok_Identifier;
 	}

-	array_append( Tokens, ctx->token );
+	array_append( Lexer_Tokens, ctx->token );
 }

 neverinline
@@ -581,7 +581,7 @@ TokArray lex( StrC content )
 	c.content = content;
 	c.left    = content.Len;
 	c.scanner = content.Ptr;
-	c.defines = defines;
+	c.defines = Lexer_defines;

 	char const* word        = c.scanner;
 	s32         word_length = 0;
@@ -589,7 +589,7 @@ TokArray lex( StrC content )
 	c.line   = 1;
 	c.column = 1;

-	SkipWhitespace();
+	skip_whitespace();
 	if ( c.left <= 0 )
 	{
 		log_failure( "gen::lex: no tokens found (only whitespace provided)" );
@@ -614,7 +614,7 @@ TokArray lex( StrC content )
 		hashtable_set(c.defines, key, (StrC) * entry );
 	}

-	array_clear(Tokens);
+	array_clear(Lexer_Tokens);

 	while (c.left )
 	{
@@ -644,14 +644,14 @@ TokArray lex( StrC content )
 				c.token.Type = Tok_NewLine;
 				c.token.Length++;

-				array_append( Tokens, c.token );
+				array_append( Lexer_Tokens, c.token );
 				continue;
 			}
 		}

 		c.token.Length = 0;

-		SkipWhitespace();
+		skip_whitespace();
 		if ( c.left <= 0 )
 			break;

@@ -680,7 +680,7 @@ TokArray lex( StrC content )
 								c.token.Length++;
 								move_forward();

-								array_append( Tokens, c.token );
+								array_append( Lexer_Tokens, c.token );
 							}
 						}

@@ -1135,7 +1135,7 @@ TokArray lex( StrC content )
 							move_forward();
 							c.token.Length++;
 						}
-						array_append( Tokens, c.token );
+						array_append( Lexer_Tokens, c.token );
 						continue;
 					}
 					else if ( current == '*' )
@@ -1171,7 +1171,7 @@ TokArray lex( StrC content )
 							move_forward();
 							c.token.Length++;
 						}
-						array_append( Tokens, c.token );
+						array_append( Lexer_Tokens, c.token );
 						// end_line();
 						continue;
 					}
@@ -1264,14 +1264,14 @@ TokArray lex( StrC content )
 		}
 		else
 		{
-			s32 start = max( 0, array_num(Tokens) - 100 );
+			s32 start = max( 0, array_num(Lexer_Tokens) - 100 );
 			log_fmt("\n%d\n", start);
-			for ( s32 idx = start; idx < array_num(Tokens); idx++ )
+			for ( s32 idx = start; idx < array_num(Lexer_Tokens); idx++ )
 			{
 				log_fmt( "Token %d Type: %s : %.*s\n"
 					, idx
-					, toktype_to_str( Tokens[ idx ].Type ).Ptr
-					, Tokens[ idx ].Length, Tokens[ idx ].Text
+					, toktype_to_str( Lexer_Tokens[ idx ].Type ).Ptr
+					, Lexer_Tokens[ idx ].Length, Lexer_Tokens[ idx ].Text
 				);
 			}

@@ -1288,7 +1288,7 @@ TokArray lex( StrC content )
 		FoundToken:
 		{
 			lex_found_token( ctx );
-			TokType last_type = array_back(Tokens)->Type;
+			TokType last_type = array_back(Lexer_Tokens)->Type;
 			if ( last_type == Tok_Preprocess_Macro )
 			{
 				c.token = { c.scanner, 0, Tok_Invalid, c.line, c.column, TF_Null };
@@ -1304,22 +1304,23 @@ TokArray lex( StrC content )
 					c.token.Length++;
 					move_forward();

-					array_append( Tokens, c.token );
+					array_append( Lexer_Tokens, c.token );
 					continue;
 				}
 			}
 		}
 	}

-	if ( array_num(Tokens) == 0 )
+	if ( array_num(Lexer_Tokens) == 0 )
 	{
 		log_failure( "Failed to lex any tokens" );
 		return { {}, 0 };
 	}

-	hashtable_clear(defines);
+	hashtable_clear(Lexer_defines);
 	// defines_map_arena.free();
-	return { Tokens, 0 };
+	TokArray result = { Lexer_Tokens, 0 };
+	return result;
 }
 #undef current
 #undef move_forward
--- a/project/components/parser.cpp
+++ b/project/components/parser.cpp