Progress toward preprocessor parsing, lexing works, parsing does not.

This commit is contained in:
Edward R. Gonzalez 2023-07-30 18:55:57 -04:00
parent bfbfae466f
commit 2b63fc27cd
8 changed files with 274 additions and 80 deletions

View File

@ -55,16 +55,17 @@ namespace Parser
Entry( Module_Export, "export" ) \
Entry( Number, "number" ) \
Entry( Operator, "operator" ) \
Entry( Preprocess_Define, "#define") \
Entry( Preprocess_Define, "define") \
Entry( Preprocess_If, "if") \
Entry( Preprocess_IfDef, "ifdef") \
Entry( Preprocess_IfNotDef, "ifndef") \
Entry( Preprocess_ElIf, "elif") \
Entry( Preprocess_Else, "else") \
Entry( Preprocess_EndIf, "endif") \
Entry( Preprocess_Include, "include" ) \
Entry( Preprocess_If, "#if") \
Entry( Preprocess_IfDef, "#if") \
Entry( Preprocess_IfNotDef, "#ifndef") \
Entry( Preprocess_ElIf, "#elif") \
Entry( Preprocess_Else, "#else") \
Entry( Preprocess_EndIf, "#endif") \
Entry( Preprocess_Pragma, "#pragma") \
Entry( Preprocess_Pragma, "pragma") \
Entry( Preprocess_Content, "macro content") \
Entry( Preprocess_Macro, "macro") \
Entry( Spec_Alignas, "alignas" ) \
Entry( Spec_Const, "const" ) \
Entry( Spec_Consteval, "consteval" ) \

View File

@ -1,3 +1,6 @@
internal void init_parser();
internal void deinit_parser();
internal
void* Global_Allocator_Proc( void* allocator_data, AllocType type, sw size, sw alignment, void* old_memory, sw old_size, u64 flags )
{
@ -266,6 +269,7 @@ void init()
}
define_constants();
init_parser();
}
void deinit()
@ -308,6 +312,7 @@ void deinit()
while ( left--, left );
Global_AllocatorBuckets.free();
deinit_parser();
}
void reset()

View File

@ -188,6 +188,8 @@ namespace Parser
IsAssign = bit(0),
};
global Array<Token> Tokens;
TokArray lex( StrC content )
{
# define current ( * scanner )
@ -213,9 +215,6 @@ namespace Parser
move_forward(); \
}
local_persist thread_local
Array<Token> Tokens = { nullptr };
s32 left = content.Len;
char const* scanner = content.Ptr;
@ -232,17 +231,22 @@ namespace Parser
return { { nullptr }, 0 };
}
if ( Tokens )
local_persist char defines_map_mem[ kilobytes(64) ];
local_persist Arena defines_map_arena;
HashTable<StrC> defines;
{
Tokens.free();
defines_map_arena = Arena::init_from_memory( defines_map_mem, sizeof(defines_map_mem) );
defines = HashTable<StrC>::init( defines_map_arena );
}
Tokens = Array<Token>::init_reserve( LexArena, content.Len / 6 );
Tokens.clear();
while (left )
{
Token token = { nullptr, 0, TokType::Invalid, false, line, column };
bool is_define = false;
SkipWhitespace();
if ( left <= 0 )
break;
@ -251,11 +255,11 @@ namespace Parser
{
case '#':
{
token.Text = scanner;
token.Length = 1;
move_forward();
SkipWhitespace();
while (left && current != ' ' )
token.Text = scanner;
while (left && ! char_is_space(current) )
{
move_forward();
token.Length++;
@ -264,6 +268,34 @@ namespace Parser
token.Type = ETokType::to_type( token );
Tokens.append( token );
if ( token.Type == TokType::Preprocess_Define )
{
SkipWhitespace();
Token name = { scanner, 0, TokType::Identifier, false, line, column };
name.Text = scanner;
name.Length = 1;
move_forward();
while ( left && ( char_is_alphanumeric(current) || current == '_' ) )
{
move_forward();
name.Length++;
}
Tokens.append( name );
s32 key = crc32( name.Text, name.Length );
defines.set( key, name );
}
if ( token.Type == TokType::Preprocess_Else || token.Type == TokType::Preprocess_EndIf )
{
SkipWhitespace();
Tokens.append( token );
continue;
}
Token content = { scanner, 0, TokType::Preprocess_Content, false, line, column };
if ( token.Type == TokType::Preprocess_Include )
@ -271,18 +303,20 @@ namespace Parser
content.Type = TokType::String;
SkipWhitespace();
if ( current != '"' )
if ( current != '"' && current != '<' )
{
log_failure( "gen::Parser::lex: Expected '\"' after #include, not '%c' (%d, %d)\n%s"
String directive_str = String::fmt_buf( GlobalAllocator, "%.*s", min( 80, left + content.Length ), token.Text );
log_failure( "gen::Parser::lex: Expected '\"' or '<' after #include, not '%c' (%d, %d)\n%s"
, current
, token.Line
, token.Column
, Context.to_string()
, content.Line
, content.Column
, directive_str.Data
);
return { { nullptr }, 0 };
}
while ( left && current != '"' )
while ( left && current != '"' && current != '>' )
{
move_forward();
content.Length++;
@ -295,13 +329,27 @@ namespace Parser
continue; // Skip found token, its all handled here.
}
s32 within_string = false;
s32 within_char = false;
while ( left )
{
if ( current == '\\' )
if ( current == '"' )
within_string ^= true;
if ( current == '\'' )
within_char ^= true;
if ( current == '\\' && ! within_string && ! within_char )
{
move_forward();
content.Length++;
if ( current == '\r' )
{
move_forward();
content.Length++;
}
if ( current == '\n' )
{
move_forward();
@ -310,14 +358,14 @@ namespace Parser
}
else
{
String directive_str = String::fmt_buf( GlobalAllocator, "%s", token.Text, token.Length );
String content_str = String::fmt_buf( GlobalAllocator, "%s", content, min( 40, left + content.Length ) );
String directive_str = String::make_length( GlobalAllocator, token.Text, token.Length );
String content_str = String::fmt_buf( GlobalAllocator, "%.*s", min( 400, left + content.Length ), content.Text );
log_failure( "gen::Parser::lex: Invalid escape sequence '\\%c' (%d, %d)"
" in preprocessor directive '%.*s' (%d, %d)\n"
"will continue parsing, but compiliation will fail (if using non-fatal failures).\n"
, content_str, line, column
, directive_str, token.Line, token.Column );
" in preprocessor directive '%s' (%d, %d)\n%s"
, current, line, column
, directive_str, content.Line, content.Column
, content_str );
break;
}
}
@ -358,7 +406,7 @@ namespace Parser
{
String context_str = String::fmt_buf( GlobalAllocator, "%s", scanner, min( 100, left ) );
log_failure( "gen::lex: invalid varadic argument, expected '...' got '..%c' (%d, %d)", context_str, line, column );
log_failure( "gen::lex: invalid varadic argument, expected '...' got '..%c' (%d, %d)\n%s", current, line, column, context_str );
}
}
@ -468,6 +516,18 @@ namespace Parser
move_forward();
if ( left && current == '\\' )
{
move_forward();
token.Length++;
if ( current == '\'' )
{
move_forward();
token.Length++;
}
}
while ( left && current != '\'' )
{
move_forward();
@ -540,6 +600,17 @@ namespace Parser
}
goto FoundToken;
case '?':
token.Text = scanner;
token.Length = 1;
token.Type = TokType::Operator;
token.IsAssign = false;
if (left)
move_forward();
goto FoundToken;
// All other operators we just label as an operator and move forward.
case '=':
token.Text = scanner;
@ -550,6 +621,15 @@ namespace Parser
if (left)
move_forward();
if ( current == '=' )
{
token.Length++;
token.IsAssign = false;
if (left)
move_forward();
}
goto FoundToken;
case '+':
@ -653,10 +733,17 @@ namespace Parser
token.Text = scanner;
token.Length = 0;
while ( left && ( current != '*' && *(scanner + 1) != '/' ) )
bool star = current == '*';
bool slash = scanner[1] == '/';
bool at_end = star && slash;
while ( left && ! at_end )
{
move_forward();
token.Length++;
star = current == '*';
slash = scanner[1] == '/';
at_end = star && slash;
}
move_forward();
move_forward();
@ -728,9 +815,9 @@ namespace Parser
}
else
{
String context_str = String::fmt_buf( GlobalAllocator, "%s", scanner, min( 100, left ) );
String context_str = String::fmt_buf( GlobalAllocator, "%.*s", min( 100, left ), scanner );
log_failure( "Failed to lex token %s (%d, %d)", context_str, line, column );
log_failure( "Failed to lex token '%c' (%d, %d)\n%s", current, line, column, context_str );
// Skip to next whitespace since we can't know if anything else is valid until then.
while ( left && ! char_is_space( current ) )
@ -749,11 +836,48 @@ namespace Parser
TokType type = ETokType::to_type( token );
if ( type == TokType::Invalid)
type = TokType::Identifier;
if ( type != TokType::Invalid )
{
token.Type = type;
Tokens.append( token );
continue;
}
u32 key = crc32( token.Text, token.Length );
StrC* define = defines.get( key );
if ( define )
{
token.Type = TokType::Preprocess_Macro;
// Want to ignore any arguments the define may have as they can be execution expressions.
if ( left && current == '(' )
{
move_forward();
token.Length++;
s32 level = 0;
while ( left && (current != ')' || level > 0) )
{
if ( current == '(' )
level++;
else if ( current == ')' && level > 0 )
level--;
move_forward();
token.Length++;
}
move_forward();
token.Length++;
}
}
else
{
token.Type = TokType::Identifier;
}
Tokens.append( token );
}
if ( Tokens.num() == 0 )
@ -762,6 +886,8 @@ namespace Parser
return { { nullptr }, 0 };
}
defines.clear();
defines_map_arena.free();
return { Tokens, 0 };
# undef current
# undef move_forward
@ -769,6 +895,22 @@ namespace Parser
}
}
internal
void init_parser()
{
using namespace Parser;
Tokens = Array<Token>::init_reserve( LexArena
, ( LexAllocator_Size - sizeof( Array<Token>::Header ) ) / sizeof(Token)
);
}
internal
void deinit_parser()
{
Parser::Tokens = { nullptr };
}
#pragma region Helper Macros
# define check_parse_args( def ) \
@ -912,10 +1054,11 @@ CodePragma parse_pragma()
CodePragma
pragma = (CodePragma) make_code();
pragma->Type = ECode::Preprocess_Pragma;
eat( TokType::Preprocess_Pragma );
if ( ! check( TokType::Preprocess_Content ))
{
log_failure( "Error, expected content after #define\n%s", Context.to_string() );
log_failure( "Error, expected content after #pragma\n%s", Context.to_string() );
return CodeInvalid;
}
@ -1858,6 +2001,11 @@ CodeBody parse_class_struct_body( Parser::TokType which )
member = parse_preprocess_cond();
break;
case TokType::Preprocess_Macro:
member = untyped_str( currtok );
eat( TokType::Preprocess_Macro );
break;
case TokType::Preprocess_Pragma:
member = parse_pragma();
break;
@ -2174,16 +2322,23 @@ CodeBody parse_global_nspace( CodeT which )
member = parse_preprocess_cond();
break;
case TokType::Preprocess_Macro:
member = untyped_str( currtok );
eat( TokType::Preprocess_Macro );
break;
case TokType::Preprocess_Pragma:
member = parse_pragma();
break;
case TokType::Preprocess_Else:
member = preprocess_else;
eat( TokType::Preprocess_Else );
break;
case TokType::Preprocess_EndIf:
member = preprocess_endif;
eat( TokType::Preprocess_EndIf );
break;
case TokType::Module_Export:
@ -2273,6 +2428,7 @@ CodeBody parse_global_nspace( CodeT which )
return CodeInvalid;
}
// log_fmt("Global Body Member: %s", member->debug_str());
result.append( member );
}

View File

@ -38,14 +38,15 @@ Module_Export, "export"
Number, "number"
Operator, "operator"
Preprocess_Define, "#define"
Preprocess_If, "#if"
Preprocess_IfDef, "#ifdef"
Preprocess_IfNotDef, "#ifndef"
Preprocess_ElIf, "#elif"
Preprocess_Else, "#else"
Preprocess_EndIf, "#endif"
Preprocess_Include, "#include"
Preprocess_Pragma, "#pragma"
Preprocess_If, "if"
Preprocess_IfDef, "ifdef"
Preprocess_IfNotDef, "ifndef"
Preprocess_ElIf, "elif"
Preprocess_Else, "else"
Preprocess_EndIf, "endif"
Preprocess_Include, "include"
Preprocess_Pragma, "pragma"
Preprocess_Macro, "macro"
Spec_Alignas, "alignas"
Spec_Const, "const"
Spec_Consteval, "consteval"

1 Invalid __invalid__
38 Number number
39 Operator operator
40 Preprocess_Define #define
41 Preprocess_If #if if
42 Preprocess_IfDef #ifdef ifdef
43 Preprocess_IfNotDef #ifndef ifndef
44 Preprocess_ElIf #elif elif
45 Preprocess_Else #else else
46 Preprocess_EndIf #endif endif
47 Preprocess_Include #include include
48 Preprocess_Pragma #pragma pragma
49 Preprocess_Macro macro
50 Spec_Alignas alignas
51 Spec_Const const
52 Spec_Consteval consteval

View File

@ -36,8 +36,8 @@ AlwaysBreakAfterReturnType: None
AlwaysBreakBeforeMultilineStrings: true
AlwaysBreakTemplateDeclarations: Yes
BinPackArguments: false
BinPackParameters: false
BinPackArguments: true
BinPackParameters: true
BitFieldColonSpacing: Both
@ -73,7 +73,7 @@ BreakBeforeTernaryOperators: true
BreakConstructorInitializers: BeforeComma
BreakStringLiterals: true
ColumnLimit: 180
ColumnLimit: 120
CompactNamespaces: true
@ -103,7 +103,7 @@ IndentWidth: 4
IndentWrappedFunctionNames: false
# InsertNewlineAtEOF: true
InsertTrailingCommas: Wrapped
# InsertTrailingCommas: Wrapped
LambdaBodyIndentation: OuterScope

View File

@ -1,26 +1,23 @@
# This script is used to iniitate the MSVC DevShell
$vs_devshell = @()
@("enterprise", "professional", "community") | ForEach-Object {
$vs_devshell_2022 = "C:\Program Files\Microsoft Visual Studio\2022\" + $_ + "\Common7\Tools\Launch-VsDevShell.ps1"
$vs_devshell_2019 = "C:\Program Files (x86)\Microsoft Visual Studio\2019\" + $_ + "\Common7\Tools\Launch-VsDevShell.ps1"
$ErrorActionPreference = "Stop"
$vs_devshell += @( $vs_devshell_2022, $vs_devshell_2019 )
# Use vswhere to find the latest Visual Studio installation
$vswhere_out = & "C:\Program Files (x86)\Microsoft Visual Studio\Installer\vswhere.exe" -latest -property installationPath
if ($null -eq $vswhere_out) {
Write-Host "ERROR: Visual Studio installation not found"
exit 1
}
$found = $false
foreach($path in $vs_devshell) {
if (Test-Path $path) {
write-host "Found $path"
# Find Launch-VsDevShell.ps1 in the Visual Studio installation
$vs_path = $vswhere_out
$vs_devshell = Join-Path $vs_path "\Common7\Tools\Launch-VsDevShell.ps1"
Push-Location # Save the current path, loading the script will change it.
& $path
Pop-Location
$found = $true
break;
}
if ( -not (Test-Path $vs_devshell) ) {
Write-Host "ERROR: Launch-VsDevShell.ps1 not found in Visual Studio installation"
Write-Host Tested path: $vs_devshell
exit 1
}
if (-not $found) {
write-host "MSVC DevShell: No valid path found"
}
# Launch the Visual Studio Developer Shell
Push-Location
& $vs_devshell @args
Pop-Location

View File

@ -5,6 +5,7 @@
#include "gen.cpp"
#include "sanity.cpp"
#include "SOA.cpp"
#include "test.singleheader_ast.cpp"
#if GEN_TIME
int gen_main()
@ -12,10 +13,12 @@ int gen_main()
using namespace gen;
log_fmt("\ngen_time:");
check_sanity();
// check_sanity();
check_SOA();
check_singleheader_ast();
return 0;
}
#endif

View File

@ -0,0 +1,31 @@
#pragma once
#define GEN_DEFINE_LIBRARY_CODE_CONSTANTS
#define GEN_ENFORCE_STRONG_CODE_TYPES
#define GEN_EXPOSE_BACKEND
#define GEN_BENCHMARK
#include "gen.hpp"
#include "file_processors/scanner.hpp"
using namespace gen;
void check_singleheader_ast()
{
#define project_dir "../../"
gen::init();
log_fmt("\ncheck_singleheader_ast:\n");
FileContents file = file_read_contents( GlobalAllocator, true, project_dir "singleheader/gen/gen.hpp" );
CodeBody ast = parse_global_body( { file.size, (char const*)file.data } );
log_fmt("generated AST!!!");
Builder builder;
builder.open( "singleheader_copy.hpp" );
log_fmt("serializng ast");
builder.print( ast );
builder.write();
log_fmt("passed!!\n");
gen::deinit();
}