Progress toward preprocessor parsing, lexing works, parsing does not.

This commit is contained in:
Edward R. Gonzalez 2023-07-30 18:55:57 -04:00
parent bfbfae466f
commit 2b63fc27cd
8 changed files with 274 additions and 80 deletions

View File

@ -55,16 +55,17 @@ namespace Parser
Entry( Module_Export, "export" ) \ Entry( Module_Export, "export" ) \
Entry( Number, "number" ) \ Entry( Number, "number" ) \
Entry( Operator, "operator" ) \ Entry( Operator, "operator" ) \
Entry( Preprocess_Define, "#define") \ Entry( Preprocess_Define, "define") \
Entry( Preprocess_If, "if") \
Entry( Preprocess_IfDef, "ifdef") \
Entry( Preprocess_IfNotDef, "ifndef") \
Entry( Preprocess_ElIf, "elif") \
Entry( Preprocess_Else, "else") \
Entry( Preprocess_EndIf, "endif") \
Entry( Preprocess_Include, "include" ) \ Entry( Preprocess_Include, "include" ) \
Entry( Preprocess_If, "#if") \ Entry( Preprocess_Pragma, "pragma") \
Entry( Preprocess_IfDef, "#if") \
Entry( Preprocess_IfNotDef, "#ifndef") \
Entry( Preprocess_ElIf, "#elif") \
Entry( Preprocess_Else, "#else") \
Entry( Preprocess_EndIf, "#endif") \
Entry( Preprocess_Pragma, "#pragma") \
Entry( Preprocess_Content, "macro content") \ Entry( Preprocess_Content, "macro content") \
Entry( Preprocess_Macro, "macro") \
Entry( Spec_Alignas, "alignas" ) \ Entry( Spec_Alignas, "alignas" ) \
Entry( Spec_Const, "const" ) \ Entry( Spec_Const, "const" ) \
Entry( Spec_Consteval, "consteval" ) \ Entry( Spec_Consteval, "consteval" ) \

View File

@ -1,3 +1,6 @@
internal void init_parser();
internal void deinit_parser();
internal internal
void* Global_Allocator_Proc( void* allocator_data, AllocType type, sw size, sw alignment, void* old_memory, sw old_size, u64 flags ) void* Global_Allocator_Proc( void* allocator_data, AllocType type, sw size, sw alignment, void* old_memory, sw old_size, u64 flags )
{ {
@ -266,6 +269,7 @@ void init()
} }
define_constants(); define_constants();
init_parser();
} }
void deinit() void deinit()
@ -308,6 +312,7 @@ void deinit()
while ( left--, left ); while ( left--, left );
Global_AllocatorBuckets.free(); Global_AllocatorBuckets.free();
deinit_parser();
} }
void reset() void reset()

View File

@ -188,6 +188,8 @@ namespace Parser
IsAssign = bit(0), IsAssign = bit(0),
}; };
global Array<Token> Tokens;
TokArray lex( StrC content ) TokArray lex( StrC content )
{ {
# define current ( * scanner ) # define current ( * scanner )
@ -213,9 +215,6 @@ namespace Parser
move_forward(); \ move_forward(); \
} }
local_persist thread_local
Array<Token> Tokens = { nullptr };
s32 left = content.Len; s32 left = content.Len;
char const* scanner = content.Ptr; char const* scanner = content.Ptr;
@ -232,17 +231,22 @@ namespace Parser
return { { nullptr }, 0 }; return { { nullptr }, 0 };
} }
if ( Tokens ) local_persist char defines_map_mem[ kilobytes(64) ];
local_persist Arena defines_map_arena;
HashTable<StrC> defines;
{ {
Tokens.free(); defines_map_arena = Arena::init_from_memory( defines_map_mem, sizeof(defines_map_mem) );
defines = HashTable<StrC>::init( defines_map_arena );
} }
Tokens = Array<Token>::init_reserve( LexArena, content.Len / 6 ); Tokens.clear();
while (left ) while (left )
{ {
Token token = { nullptr, 0, TokType::Invalid, false, line, column }; Token token = { nullptr, 0, TokType::Invalid, false, line, column };
bool is_define = false;
SkipWhitespace(); SkipWhitespace();
if ( left <= 0 ) if ( left <= 0 )
break; break;
@ -251,11 +255,11 @@ namespace Parser
{ {
case '#': case '#':
{ {
token.Text = scanner;
token.Length = 1;
move_forward(); move_forward();
SkipWhitespace();
while (left && current != ' ' ) token.Text = scanner;
while (left && ! char_is_space(current) )
{ {
move_forward(); move_forward();
token.Length++; token.Length++;
@ -264,6 +268,34 @@ namespace Parser
token.Type = ETokType::to_type( token ); token.Type = ETokType::to_type( token );
Tokens.append( token ); Tokens.append( token );
if ( token.Type == TokType::Preprocess_Define )
{
SkipWhitespace();
Token name = { scanner, 0, TokType::Identifier, false, line, column };
name.Text = scanner;
name.Length = 1;
move_forward();
while ( left && ( char_is_alphanumeric(current) || current == '_' ) )
{
move_forward();
name.Length++;
}
Tokens.append( name );
s32 key = crc32( name.Text, name.Length );
defines.set( key, name );
}
if ( token.Type == TokType::Preprocess_Else || token.Type == TokType::Preprocess_EndIf )
{
SkipWhitespace();
Tokens.append( token );
continue;
}
Token content = { scanner, 0, TokType::Preprocess_Content, false, line, column }; Token content = { scanner, 0, TokType::Preprocess_Content, false, line, column };
if ( token.Type == TokType::Preprocess_Include ) if ( token.Type == TokType::Preprocess_Include )
@ -271,18 +303,20 @@ namespace Parser
content.Type = TokType::String; content.Type = TokType::String;
SkipWhitespace(); SkipWhitespace();
if ( current != '"' ) if ( current != '"' && current != '<' )
{ {
log_failure( "gen::Parser::lex: Expected '\"' after #include, not '%c' (%d, %d)\n%s" String directive_str = String::fmt_buf( GlobalAllocator, "%.*s", min( 80, left + content.Length ), token.Text );
log_failure( "gen::Parser::lex: Expected '\"' or '<' after #include, not '%c' (%d, %d)\n%s"
, current , current
, token.Line , content.Line
, token.Column , content.Column
, Context.to_string() , directive_str.Data
); );
return { { nullptr }, 0 }; return { { nullptr }, 0 };
} }
while ( left && current != '"' ) while ( left && current != '"' && current != '>' )
{ {
move_forward(); move_forward();
content.Length++; content.Length++;
@ -295,13 +329,27 @@ namespace Parser
continue; // Skip found token, its all handled here. continue; // Skip found token, its all handled here.
} }
s32 within_string = false;
s32 within_char = false;
while ( left ) while ( left )
{ {
if ( current == '\\' ) if ( current == '"' )
within_string ^= true;
if ( current == '\'' )
within_char ^= true;
if ( current == '\\' && ! within_string && ! within_char )
{ {
move_forward(); move_forward();
content.Length++; content.Length++;
if ( current == '\r' )
{
move_forward();
content.Length++;
}
if ( current == '\n' ) if ( current == '\n' )
{ {
move_forward(); move_forward();
@ -310,14 +358,14 @@ namespace Parser
} }
else else
{ {
String directive_str = String::fmt_buf( GlobalAllocator, "%s", token.Text, token.Length ); String directive_str = String::make_length( GlobalAllocator, token.Text, token.Length );
String content_str = String::fmt_buf( GlobalAllocator, "%s", content, min( 40, left + content.Length ) ); String content_str = String::fmt_buf( GlobalAllocator, "%.*s", min( 400, left + content.Length ), content.Text );
log_failure( "gen::Parser::lex: Invalid escape sequence '\\%c' (%d, %d)" log_failure( "gen::Parser::lex: Invalid escape sequence '\\%c' (%d, %d)"
" in preprocessor directive '%.*s' (%d, %d)\n" " in preprocessor directive '%s' (%d, %d)\n%s"
"will continue parsing, but compiliation will fail (if using non-fatal failures).\n" , current, line, column
, content_str, line, column , directive_str, content.Line, content.Column
, directive_str, token.Line, token.Column ); , content_str );
break; break;
} }
} }
@ -358,7 +406,7 @@ namespace Parser
{ {
String context_str = String::fmt_buf( GlobalAllocator, "%s", scanner, min( 100, left ) ); String context_str = String::fmt_buf( GlobalAllocator, "%s", scanner, min( 100, left ) );
log_failure( "gen::lex: invalid varadic argument, expected '...' got '..%c' (%d, %d)", context_str, line, column ); log_failure( "gen::lex: invalid varadic argument, expected '...' got '..%c' (%d, %d)\n%s", current, line, column, context_str );
} }
} }
@ -468,6 +516,18 @@ namespace Parser
move_forward(); move_forward();
if ( left && current == '\\' )
{
move_forward();
token.Length++;
if ( current == '\'' )
{
move_forward();
token.Length++;
}
}
while ( left && current != '\'' ) while ( left && current != '\'' )
{ {
move_forward(); move_forward();
@ -540,6 +600,17 @@ namespace Parser
} }
goto FoundToken; goto FoundToken;
case '?':
token.Text = scanner;
token.Length = 1;
token.Type = TokType::Operator;
token.IsAssign = false;
if (left)
move_forward();
goto FoundToken;
// All other operators we just label as an operator and move forward. // All other operators we just label as an operator and move forward.
case '=': case '=':
token.Text = scanner; token.Text = scanner;
@ -550,6 +621,15 @@ namespace Parser
if (left) if (left)
move_forward(); move_forward();
if ( current == '=' )
{
token.Length++;
token.IsAssign = false;
if (left)
move_forward();
}
goto FoundToken; goto FoundToken;
case '+': case '+':
@ -653,10 +733,17 @@ namespace Parser
token.Text = scanner; token.Text = scanner;
token.Length = 0; token.Length = 0;
while ( left && ( current != '*' && *(scanner + 1) != '/' ) ) bool star = current == '*';
bool slash = scanner[1] == '/';
bool at_end = star && slash;
while ( left && ! at_end )
{ {
move_forward(); move_forward();
token.Length++; token.Length++;
star = current == '*';
slash = scanner[1] == '/';
at_end = star && slash;
} }
move_forward(); move_forward();
move_forward(); move_forward();
@ -728,9 +815,9 @@ namespace Parser
} }
else else
{ {
String context_str = String::fmt_buf( GlobalAllocator, "%s", scanner, min( 100, left ) ); String context_str = String::fmt_buf( GlobalAllocator, "%.*s", min( 100, left ), scanner );
log_failure( "Failed to lex token %s (%d, %d)", context_str, line, column ); log_failure( "Failed to lex token '%c' (%d, %d)\n%s", current, line, column, context_str );
// Skip to next whitespace since we can't know if anything else is valid until then. // Skip to next whitespace since we can't know if anything else is valid until then.
while ( left && ! char_is_space( current ) ) while ( left && ! char_is_space( current ) )
@ -749,10 +836,47 @@ namespace Parser
TokType type = ETokType::to_type( token ); TokType type = ETokType::to_type( token );
if ( type == TokType::Invalid) if ( type != TokType::Invalid )
type = TokType::Identifier; {
token.Type = type;
Tokens.append( token );
continue;
}
u32 key = crc32( token.Text, token.Length );
StrC* define = defines.get( key );
if ( define )
{
token.Type = TokType::Preprocess_Macro;
// Want to ignore any arguments the define may have as they can be execution expressions.
if ( left && current == '(' )
{
move_forward();
token.Length++;
s32 level = 0;
while ( left && (current != ')' || level > 0) )
{
if ( current == '(' )
level++;
else if ( current == ')' && level > 0 )
level--;
move_forward();
token.Length++;
}
move_forward();
token.Length++;
}
}
else
{
token.Type = TokType::Identifier;
}
token.Type = type;
Tokens.append( token ); Tokens.append( token );
} }
@ -762,6 +886,8 @@ namespace Parser
return { { nullptr }, 0 }; return { { nullptr }, 0 };
} }
defines.clear();
defines_map_arena.free();
return { Tokens, 0 }; return { Tokens, 0 };
# undef current # undef current
# undef move_forward # undef move_forward
@ -769,6 +895,22 @@ namespace Parser
} }
} }
internal
void init_parser()
{
using namespace Parser;
Tokens = Array<Token>::init_reserve( LexArena
, ( LexAllocator_Size - sizeof( Array<Token>::Header ) ) / sizeof(Token)
);
}
internal
void deinit_parser()
{
Parser::Tokens = { nullptr };
}
#pragma region Helper Macros #pragma region Helper Macros
# define check_parse_args( def ) \ # define check_parse_args( def ) \
@ -912,10 +1054,11 @@ CodePragma parse_pragma()
CodePragma CodePragma
pragma = (CodePragma) make_code(); pragma = (CodePragma) make_code();
pragma->Type = ECode::Preprocess_Pragma; pragma->Type = ECode::Preprocess_Pragma;
eat( TokType::Preprocess_Pragma );
if ( ! check( TokType::Preprocess_Content )) if ( ! check( TokType::Preprocess_Content ))
{ {
log_failure( "Error, expected content after #define\n%s", Context.to_string() ); log_failure( "Error, expected content after #pragma\n%s", Context.to_string() );
return CodeInvalid; return CodeInvalid;
} }
@ -1858,6 +2001,11 @@ CodeBody parse_class_struct_body( Parser::TokType which )
member = parse_preprocess_cond(); member = parse_preprocess_cond();
break; break;
case TokType::Preprocess_Macro:
member = untyped_str( currtok );
eat( TokType::Preprocess_Macro );
break;
case TokType::Preprocess_Pragma: case TokType::Preprocess_Pragma:
member = parse_pragma(); member = parse_pragma();
break; break;
@ -2174,16 +2322,23 @@ CodeBody parse_global_nspace( CodeT which )
member = parse_preprocess_cond(); member = parse_preprocess_cond();
break; break;
case TokType::Preprocess_Macro:
member = untyped_str( currtok );
eat( TokType::Preprocess_Macro );
break;
case TokType::Preprocess_Pragma: case TokType::Preprocess_Pragma:
member = parse_pragma(); member = parse_pragma();
break; break;
case TokType::Preprocess_Else: case TokType::Preprocess_Else:
member = preprocess_else; member = preprocess_else;
eat( TokType::Preprocess_Else );
break; break;
case TokType::Preprocess_EndIf: case TokType::Preprocess_EndIf:
member = preprocess_endif; member = preprocess_endif;
eat( TokType::Preprocess_EndIf );
break; break;
case TokType::Module_Export: case TokType::Module_Export:
@ -2273,6 +2428,7 @@ CodeBody parse_global_nspace( CodeT which )
return CodeInvalid; return CodeInvalid;
} }
// log_fmt("Global Body Member: %s", member->debug_str());
result.append( member ); result.append( member );
} }

View File

@ -7,8 +7,8 @@ Access_StaticSymbol, "::"
Ampersand, "&" Ampersand, "&"
Ampersand_DBL, "&&" Ampersand_DBL, "&&"
Assign_Classifer, ":" Assign_Classifer, ":"
Attribute_Open, "[[" Attribute_Open, "[["
Attribute_Close, "]]" Attribute_Close, "]]"
BraceCurly_Open, "{" BraceCurly_Open, "{"
BraceCurly_Close, "}" BraceCurly_Close, "}"
BraceSquare_Open, "[" BraceSquare_Open, "["
@ -38,23 +38,24 @@ Module_Export, "export"
Number, "number" Number, "number"
Operator, "operator" Operator, "operator"
Preprocess_Define, "#define" Preprocess_Define, "#define"
Preprocess_If, "#if" Preprocess_If, "if"
Preprocess_IfDef, "#ifdef" Preprocess_IfDef, "ifdef"
Preprocess_IfNotDef, "#ifndef" Preprocess_IfNotDef, "ifndef"
Preprocess_ElIf, "#elif" Preprocess_ElIf, "elif"
Preprocess_Else, "#else" Preprocess_Else, "else"
Preprocess_EndIf, "#endif" Preprocess_EndIf, "endif"
Preprocess_Include, "#include" Preprocess_Include, "include"
Preprocess_Pragma, "#pragma" Preprocess_Pragma, "pragma"
Preprocess_Macro, "macro"
Spec_Alignas, "alignas" Spec_Alignas, "alignas"
Spec_Const, "const" Spec_Const, "const"
Spec_Consteval, "consteval" Spec_Consteval, "consteval"
Spec_Constexpr, "constexpr" Spec_Constexpr, "constexpr"
Spec_Constinit, "constinit" Spec_Constinit, "constinit"
Spec_Explicit, "explicit" Spec_Explicit, "explicit"
Spec_Extern, "extern" Spec_Extern, "extern"
Spec_Final, "final" Spec_Final, "final"
Spec_Global, "global" Spec_Global, "global"
Spec_Inline, "inline" Spec_Inline, "inline"
Spec_Internal_Linkage, "internal" Spec_Internal_Linkage, "internal"
Spec_LocalPersist, "local_persist" Spec_LocalPersist, "local_persist"
@ -66,12 +67,12 @@ Spec_Volatile, "volatile"
Star, "*" Star, "*"
Statement_End, ";" Statement_End, ";"
String, "string" String, "string"
Type_Unsigned, "unsigned" Type_Unsigned, "unsigned"
Type_Signed, "signed" Type_Signed, "signed"
Type_Short, "short" Type_Short, "short"
Type_Long, "long" Type_Long, "long"
Type_char, "char" Type_char, "char"
Type_int, "int" Type_int, "int"
Type_double, "double" Type_double, "double"
Varadic_Argument, "..." Varadic_Argument, "..."
Attributes_Start, "__attrib_start__" Attributes_Start, "__attrib_start__"

1 Invalid __invalid__
7 Ampersand &
8 Ampersand_DBL &&
9 Assign_Classifer :
10 Attribute_Open [[
11 Attribute_Close ]]
12 BraceCurly_Open {
13 BraceCurly_Close }
14 BraceSquare_Open [
38 Number number
39 Operator operator
40 Preprocess_Define #define
41 Preprocess_If #if if
42 Preprocess_IfDef #ifdef ifdef
43 Preprocess_IfNotDef #ifndef ifndef
44 Preprocess_ElIf #elif elif
45 Preprocess_Else #else else
46 Preprocess_EndIf #endif endif
47 Preprocess_Include #include include
48 Preprocess_Pragma #pragma pragma
49 Preprocess_Macro macro
50 Spec_Alignas alignas
51 Spec_Const const
52 Spec_Consteval consteval
53 Spec_Constexpr constexpr
54 Spec_Constinit constinit
55 Spec_Explicit explicit
56 Spec_Extern extern
57 Spec_Final final
58 Spec_Global global
59 Spec_Inline inline
60 Spec_Internal_Linkage internal
61 Spec_LocalPersist local_persist
67 Star *
68 Statement_End ;
69 String string
70 Type_Unsigned unsigned
71 Type_Signed signed
72 Type_Short short
73 Type_Long long
74 Type_char char
75 Type_int int
76 Type_double double
77 Varadic_Argument ...
78 Attributes_Start __attrib_start__

View File

@ -36,8 +36,8 @@ AlwaysBreakAfterReturnType: None
AlwaysBreakBeforeMultilineStrings: true AlwaysBreakBeforeMultilineStrings: true
AlwaysBreakTemplateDeclarations: Yes AlwaysBreakTemplateDeclarations: Yes
BinPackArguments: false BinPackArguments: true
BinPackParameters: false BinPackParameters: true
BitFieldColonSpacing: Both BitFieldColonSpacing: Both
@ -73,7 +73,7 @@ BreakBeforeTernaryOperators: true
BreakConstructorInitializers: BeforeComma BreakConstructorInitializers: BeforeComma
BreakStringLiterals: true BreakStringLiterals: true
ColumnLimit: 180 ColumnLimit: 120
CompactNamespaces: true CompactNamespaces: true
@ -103,7 +103,7 @@ IndentWidth: 4
IndentWrappedFunctionNames: false IndentWrappedFunctionNames: false
# InsertNewlineAtEOF: true # InsertNewlineAtEOF: true
InsertTrailingCommas: Wrapped # InsertTrailingCommas: Wrapped
LambdaBodyIndentation: OuterScope LambdaBodyIndentation: OuterScope

View File

@ -1,26 +1,23 @@
# This script is used to iniitate the MSVC DevShell $ErrorActionPreference = "Stop"
$vs_devshell = @()
@("enterprise", "professional", "community") | ForEach-Object {
$vs_devshell_2022 = "C:\Program Files\Microsoft Visual Studio\2022\" + $_ + "\Common7\Tools\Launch-VsDevShell.ps1"
$vs_devshell_2019 = "C:\Program Files (x86)\Microsoft Visual Studio\2019\" + $_ + "\Common7\Tools\Launch-VsDevShell.ps1"
$vs_devshell += @( $vs_devshell_2022, $vs_devshell_2019 ) # Use vswhere to find the latest Visual Studio installation
$vswhere_out = & "C:\Program Files (x86)\Microsoft Visual Studio\Installer\vswhere.exe" -latest -property installationPath
if ($null -eq $vswhere_out) {
Write-Host "ERROR: Visual Studio installation not found"
exit 1
} }
$found = $false # Find Launch-VsDevShell.ps1 in the Visual Studio installation
foreach($path in $vs_devshell) { $vs_path = $vswhere_out
if (Test-Path $path) { $vs_devshell = Join-Path $vs_path "\Common7\Tools\Launch-VsDevShell.ps1"
write-host "Found $path"
Push-Location # Save the current path, loading the script will change it. if ( -not (Test-Path $vs_devshell) ) {
& $path Write-Host "ERROR: Launch-VsDevShell.ps1 not found in Visual Studio installation"
Pop-Location Write-Host Tested path: $vs_devshell
exit 1
$found = $true
break;
}
} }
if (-not $found) { # Launch the Visual Studio Developer Shell
write-host "MSVC DevShell: No valid path found" Push-Location
} & $vs_devshell @args
Pop-Location

View File

@ -5,6 +5,7 @@
#include "gen.cpp" #include "gen.cpp"
#include "sanity.cpp" #include "sanity.cpp"
#include "SOA.cpp" #include "SOA.cpp"
#include "test.singleheader_ast.cpp"
#if GEN_TIME #if GEN_TIME
int gen_main() int gen_main()
@ -12,10 +13,12 @@ int gen_main()
using namespace gen; using namespace gen;
log_fmt("\ngen_time:"); log_fmt("\ngen_time:");
check_sanity(); // check_sanity();
check_SOA(); check_SOA();
check_singleheader_ast();
return 0; return 0;
} }
#endif #endif

View File

@ -0,0 +1,31 @@
#pragma once
#define GEN_DEFINE_LIBRARY_CODE_CONSTANTS
#define GEN_ENFORCE_STRONG_CODE_TYPES
#define GEN_EXPOSE_BACKEND
#define GEN_BENCHMARK
#include "gen.hpp"
#include "file_processors/scanner.hpp"
using namespace gen;
void check_singleheader_ast()
{
#define project_dir "../../"
gen::init();
log_fmt("\ncheck_singleheader_ast:\n");
FileContents file = file_read_contents( GlobalAllocator, true, project_dir "singleheader/gen/gen.hpp" );
CodeBody ast = parse_global_body( { file.size, (char const*)file.data } );
log_fmt("generated AST!!!");
Builder builder;
builder.open( "singleheader_copy.hpp" );
log_fmt("serializng ast");
builder.print( ast );
builder.write();
log_fmt("passed!!\n");
gen::deinit();
}