From 2b63fc27cd545ce8e98c159a917e77f31ae9740c Mon Sep 17 00:00:00 2001 From: Ed_ Date: Sun, 30 Jul 2023 18:55:57 -0400 Subject: [PATCH] Progress toward preprocessor parsing, lexing works, parsing does not. --- project/components/etoktype.cpp | 17 +- project/components/interface.cpp | 5 + project/components/interface.parsing.cpp | 216 +++++++++++++++++++---- project/enums/ETokType.csv | 35 ++-- scripts/.clang-format | 8 +- scripts/msvc/devshell.ps1 | 37 ++-- test/test.cpp | 5 +- test/test.singleheader_ast.cpp | 31 ++++ 8 files changed, 274 insertions(+), 80 deletions(-) create mode 100644 test/test.singleheader_ast.cpp diff --git a/project/components/etoktype.cpp b/project/components/etoktype.cpp index ac85155..dbb17b9 100644 --- a/project/components/etoktype.cpp +++ b/project/components/etoktype.cpp @@ -55,16 +55,17 @@ namespace Parser Entry( Module_Export, "export" ) \ Entry( Number, "number" ) \ Entry( Operator, "operator" ) \ - Entry( Preprocess_Define, "#define") \ + Entry( Preprocess_Define, "define") \ + Entry( Preprocess_If, "if") \ + Entry( Preprocess_IfDef, "ifdef") \ + Entry( Preprocess_IfNotDef, "ifndef") \ + Entry( Preprocess_ElIf, "elif") \ + Entry( Preprocess_Else, "else") \ + Entry( Preprocess_EndIf, "endif") \ Entry( Preprocess_Include, "include" ) \ - Entry( Preprocess_If, "#if") \ - Entry( Preprocess_IfDef, "#if") \ - Entry( Preprocess_IfNotDef, "#ifndef") \ - Entry( Preprocess_ElIf, "#elif") \ - Entry( Preprocess_Else, "#else") \ - Entry( Preprocess_EndIf, "#endif") \ - Entry( Preprocess_Pragma, "#pragma") \ + Entry( Preprocess_Pragma, "pragma") \ Entry( Preprocess_Content, "macro content") \ + Entry( Preprocess_Macro, "macro") \ Entry( Spec_Alignas, "alignas" ) \ Entry( Spec_Const, "const" ) \ Entry( Spec_Consteval, "consteval" ) \ diff --git a/project/components/interface.cpp b/project/components/interface.cpp index 8e948bd..cf2b2ec 100644 --- a/project/components/interface.cpp +++ b/project/components/interface.cpp @@ -1,3 +1,6 @@ +internal void init_parser(); +internal void deinit_parser(); + internal void* Global_Allocator_Proc( void* allocator_data, AllocType type, sw size, sw alignment, void* old_memory, sw old_size, u64 flags ) { @@ -266,6 +269,7 @@ void init() } define_constants(); + init_parser(); } void deinit() @@ -308,6 +312,7 @@ void deinit() while ( left--, left ); Global_AllocatorBuckets.free(); + deinit_parser(); } void reset() diff --git a/project/components/interface.parsing.cpp b/project/components/interface.parsing.cpp index 99ddfc9..b71acaf 100644 --- a/project/components/interface.parsing.cpp +++ b/project/components/interface.parsing.cpp @@ -188,6 +188,8 @@ namespace Parser IsAssign = bit(0), }; + global Array Tokens; + TokArray lex( StrC content ) { # define current ( * scanner ) @@ -213,9 +215,6 @@ namespace Parser move_forward(); \ } - local_persist thread_local - Array Tokens = { nullptr }; - s32 left = content.Len; char const* scanner = content.Ptr; @@ -232,17 +231,22 @@ namespace Parser return { { nullptr }, 0 }; } - if ( Tokens ) + local_persist char defines_map_mem[ kilobytes(64) ]; + local_persist Arena defines_map_arena; + HashTable defines; { - Tokens.free(); + defines_map_arena = Arena::init_from_memory( defines_map_mem, sizeof(defines_map_mem) ); + defines = HashTable::init( defines_map_arena ); } - Tokens = Array::init_reserve( LexArena, content.Len / 6 ); + Tokens.clear(); while (left ) { Token token = { nullptr, 0, TokType::Invalid, false, line, column }; + bool is_define = false; + SkipWhitespace(); if ( left <= 0 ) break; @@ -251,11 +255,11 @@ namespace Parser { case '#': { - token.Text = scanner; - token.Length = 1; move_forward(); + SkipWhitespace(); - while (left && current != ' ' ) + token.Text = scanner; + while (left && ! char_is_space(current) ) { move_forward(); token.Length++; @@ -264,6 +268,34 @@ namespace Parser token.Type = ETokType::to_type( token ); Tokens.append( token ); + if ( token.Type == TokType::Preprocess_Define ) + { + SkipWhitespace(); + Token name = { scanner, 0, TokType::Identifier, false, line, column }; + + name.Text = scanner; + name.Length = 1; + move_forward(); + + while ( left && ( char_is_alphanumeric(current) || current == '_' ) ) + { + move_forward(); + name.Length++; + } + + Tokens.append( name ); + + s32 key = crc32( name.Text, name.Length ); + defines.set( key, name ); + } + + if ( token.Type == TokType::Preprocess_Else || token.Type == TokType::Preprocess_EndIf ) + { + SkipWhitespace(); + Tokens.append( token ); + continue; + } + Token content = { scanner, 0, TokType::Preprocess_Content, false, line, column }; if ( token.Type == TokType::Preprocess_Include ) @@ -271,18 +303,20 @@ namespace Parser content.Type = TokType::String; SkipWhitespace(); - if ( current != '"' ) + if ( current != '"' && current != '<' ) { - log_failure( "gen::Parser::lex: Expected '\"' after #include, not '%c' (%d, %d)\n%s" + String directive_str = String::fmt_buf( GlobalAllocator, "%.*s", min( 80, left + content.Length ), token.Text ); + + log_failure( "gen::Parser::lex: Expected '\"' or '<' after #include, not '%c' (%d, %d)\n%s" , current - , token.Line - , token.Column - , Context.to_string() + , content.Line + , content.Column + , directive_str.Data ); return { { nullptr }, 0 }; } - while ( left && current != '"' ) + while ( left && current != '"' && current != '>' ) { move_forward(); content.Length++; @@ -295,13 +329,27 @@ namespace Parser continue; // Skip found token, its all handled here. } + s32 within_string = false; + s32 within_char = false; while ( left ) { - if ( current == '\\' ) + if ( current == '"' ) + within_string ^= true; + + if ( current == '\'' ) + within_char ^= true; + + if ( current == '\\' && ! within_string && ! within_char ) { move_forward(); content.Length++; + if ( current == '\r' ) + { + move_forward(); + content.Length++; + } + if ( current == '\n' ) { move_forward(); @@ -310,14 +358,14 @@ namespace Parser } else { - String directive_str = String::fmt_buf( GlobalAllocator, "%s", token.Text, token.Length ); - String content_str = String::fmt_buf( GlobalAllocator, "%s", content, min( 40, left + content.Length ) ); + String directive_str = String::make_length( GlobalAllocator, token.Text, token.Length ); + String content_str = String::fmt_buf( GlobalAllocator, "%.*s", min( 400, left + content.Length ), content.Text ); log_failure( "gen::Parser::lex: Invalid escape sequence '\\%c' (%d, %d)" - " in preprocessor directive '%.*s' (%d, %d)\n" - "will continue parsing, but compiliation will fail (if using non-fatal failures).\n" - , content_str, line, column - , directive_str, token.Line, token.Column ); + " in preprocessor directive '%s' (%d, %d)\n%s" + , current, line, column + , directive_str, content.Line, content.Column + , content_str ); break; } } @@ -358,7 +406,7 @@ namespace Parser { String context_str = String::fmt_buf( GlobalAllocator, "%s", scanner, min( 100, left ) ); - log_failure( "gen::lex: invalid varadic argument, expected '...' got '..%c' (%d, %d)", context_str, line, column ); + log_failure( "gen::lex: invalid varadic argument, expected '...' got '..%c' (%d, %d)\n%s", current, line, column, context_str ); } } @@ -468,6 +516,18 @@ namespace Parser move_forward(); + if ( left && current == '\\' ) + { + move_forward(); + token.Length++; + + if ( current == '\'' ) + { + move_forward(); + token.Length++; + } + } + while ( left && current != '\'' ) { move_forward(); @@ -540,6 +600,17 @@ namespace Parser } goto FoundToken; + case '?': + token.Text = scanner; + token.Length = 1; + token.Type = TokType::Operator; + token.IsAssign = false; + + if (left) + move_forward(); + + goto FoundToken; + // All other operators we just label as an operator and move forward. case '=': token.Text = scanner; @@ -550,6 +621,15 @@ namespace Parser if (left) move_forward(); + if ( current == '=' ) + { + token.Length++; + token.IsAssign = false; + + if (left) + move_forward(); + } + goto FoundToken; case '+': @@ -653,10 +733,17 @@ namespace Parser token.Text = scanner; token.Length = 0; - while ( left && ( current != '*' && *(scanner + 1) != '/' ) ) + bool star = current == '*'; + bool slash = scanner[1] == '/'; + bool at_end = star && slash; + while ( left && ! at_end ) { move_forward(); token.Length++; + + star = current == '*'; + slash = scanner[1] == '/'; + at_end = star && slash; } move_forward(); move_forward(); @@ -728,9 +815,9 @@ namespace Parser } else { - String context_str = String::fmt_buf( GlobalAllocator, "%s", scanner, min( 100, left ) ); + String context_str = String::fmt_buf( GlobalAllocator, "%.*s", min( 100, left ), scanner ); - log_failure( "Failed to lex token %s (%d, %d)", context_str, line, column ); + log_failure( "Failed to lex token '%c' (%d, %d)\n%s", current, line, column, context_str ); // Skip to next whitespace since we can't know if anything else is valid until then. while ( left && ! char_is_space( current ) ) @@ -749,10 +836,47 @@ namespace Parser TokType type = ETokType::to_type( token ); - if ( type == TokType::Invalid) - type = TokType::Identifier; + if ( type != TokType::Invalid ) + { + token.Type = type; + Tokens.append( token ); + continue; + } + + u32 key = crc32( token.Text, token.Length ); + StrC* define = defines.get( key ); + if ( define ) + { + token.Type = TokType::Preprocess_Macro; + + // Want to ignore any arguments the define may have as they can be execution expressions. + if ( left && current == '(' ) + { + move_forward(); + token.Length++; + + s32 level = 0; + while ( left && (current != ')' || level > 0) ) + { + if ( current == '(' ) + level++; + + else if ( current == ')' && level > 0 ) + level--; + + move_forward(); + token.Length++; + } + + move_forward(); + token.Length++; + } + } + else + { + token.Type = TokType::Identifier; + } - token.Type = type; Tokens.append( token ); } @@ -762,6 +886,8 @@ namespace Parser return { { nullptr }, 0 }; } + defines.clear(); + defines_map_arena.free(); return { Tokens, 0 }; # undef current # undef move_forward @@ -769,6 +895,22 @@ namespace Parser } } +internal +void init_parser() +{ + using namespace Parser; + + Tokens = Array::init_reserve( LexArena + , ( LexAllocator_Size - sizeof( Array::Header ) ) / sizeof(Token) + ); +} + +internal +void deinit_parser() +{ + Parser::Tokens = { nullptr }; +} + #pragma region Helper Macros # define check_parse_args( def ) \ @@ -912,10 +1054,11 @@ CodePragma parse_pragma() CodePragma pragma = (CodePragma) make_code(); pragma->Type = ECode::Preprocess_Pragma; + eat( TokType::Preprocess_Pragma ); if ( ! check( TokType::Preprocess_Content )) { - log_failure( "Error, expected content after #define\n%s", Context.to_string() ); + log_failure( "Error, expected content after #pragma\n%s", Context.to_string() ); return CodeInvalid; } @@ -1858,6 +2001,11 @@ CodeBody parse_class_struct_body( Parser::TokType which ) member = parse_preprocess_cond(); break; + case TokType::Preprocess_Macro: + member = untyped_str( currtok ); + eat( TokType::Preprocess_Macro ); + break; + case TokType::Preprocess_Pragma: member = parse_pragma(); break; @@ -2174,16 +2322,23 @@ CodeBody parse_global_nspace( CodeT which ) member = parse_preprocess_cond(); break; + case TokType::Preprocess_Macro: + member = untyped_str( currtok ); + eat( TokType::Preprocess_Macro ); + break; + case TokType::Preprocess_Pragma: member = parse_pragma(); break; case TokType::Preprocess_Else: member = preprocess_else; + eat( TokType::Preprocess_Else ); break; case TokType::Preprocess_EndIf: member = preprocess_endif; + eat( TokType::Preprocess_EndIf ); break; case TokType::Module_Export: @@ -2273,6 +2428,7 @@ CodeBody parse_global_nspace( CodeT which ) return CodeInvalid; } + // log_fmt("Global Body Member: %s", member->debug_str()); result.append( member ); } diff --git a/project/enums/ETokType.csv b/project/enums/ETokType.csv index 9a172c2..f4e624f 100644 --- a/project/enums/ETokType.csv +++ b/project/enums/ETokType.csv @@ -7,8 +7,8 @@ Access_StaticSymbol, "::" Ampersand, "&" Ampersand_DBL, "&&" Assign_Classifer, ":" -Attribute_Open, "[[" -Attribute_Close, "]]" +Attribute_Open, "[[" +Attribute_Close, "]]" BraceCurly_Open, "{" BraceCurly_Close, "}" BraceSquare_Open, "[" @@ -38,23 +38,24 @@ Module_Export, "export" Number, "number" Operator, "operator" Preprocess_Define, "#define" -Preprocess_If, "#if" -Preprocess_IfDef, "#ifdef" -Preprocess_IfNotDef, "#ifndef" -Preprocess_ElIf, "#elif" -Preprocess_Else, "#else" -Preprocess_EndIf, "#endif" -Preprocess_Include, "#include" -Preprocess_Pragma, "#pragma" +Preprocess_If, "if" +Preprocess_IfDef, "ifdef" +Preprocess_IfNotDef, "ifndef" +Preprocess_ElIf, "elif" +Preprocess_Else, "else" +Preprocess_EndIf, "endif" +Preprocess_Include, "include" +Preprocess_Pragma, "pragma" +Preprocess_Macro, "macro" Spec_Alignas, "alignas" Spec_Const, "const" Spec_Consteval, "consteval" Spec_Constexpr, "constexpr" Spec_Constinit, "constinit" -Spec_Explicit, "explicit" +Spec_Explicit, "explicit" Spec_Extern, "extern" -Spec_Final, "final" -Spec_Global, "global" +Spec_Final, "final" +Spec_Global, "global" Spec_Inline, "inline" Spec_Internal_Linkage, "internal" Spec_LocalPersist, "local_persist" @@ -66,12 +67,12 @@ Spec_Volatile, "volatile" Star, "*" Statement_End, ";" String, "string" -Type_Unsigned, "unsigned" +Type_Unsigned, "unsigned" Type_Signed, "signed" Type_Short, "short" Type_Long, "long" -Type_char, "char" -Type_int, "int" -Type_double, "double" +Type_char, "char" +Type_int, "int" +Type_double, "double" Varadic_Argument, "..." Attributes_Start, "__attrib_start__" diff --git a/scripts/.clang-format b/scripts/.clang-format index 26f9cc8..9f67e41 100644 --- a/scripts/.clang-format +++ b/scripts/.clang-format @@ -36,8 +36,8 @@ AlwaysBreakAfterReturnType: None AlwaysBreakBeforeMultilineStrings: true AlwaysBreakTemplateDeclarations: Yes -BinPackArguments: false -BinPackParameters: false +BinPackArguments: true +BinPackParameters: true BitFieldColonSpacing: Both @@ -73,7 +73,7 @@ BreakBeforeTernaryOperators: true BreakConstructorInitializers: BeforeComma BreakStringLiterals: true -ColumnLimit: 180 +ColumnLimit: 120 CompactNamespaces: true @@ -103,7 +103,7 @@ IndentWidth: 4 IndentWrappedFunctionNames: false # InsertNewlineAtEOF: true -InsertTrailingCommas: Wrapped +# InsertTrailingCommas: Wrapped LambdaBodyIndentation: OuterScope diff --git a/scripts/msvc/devshell.ps1 b/scripts/msvc/devshell.ps1 index 459a164..531bdfa 100644 --- a/scripts/msvc/devshell.ps1 +++ b/scripts/msvc/devshell.ps1 @@ -1,26 +1,23 @@ -# This script is used to iniitate the MSVC DevShell -$vs_devshell = @() -@("enterprise", "professional", "community") | ForEach-Object { - $vs_devshell_2022 = "C:\Program Files\Microsoft Visual Studio\2022\" + $_ + "\Common7\Tools\Launch-VsDevShell.ps1" - $vs_devshell_2019 = "C:\Program Files (x86)\Microsoft Visual Studio\2019\" + $_ + "\Common7\Tools\Launch-VsDevShell.ps1" +$ErrorActionPreference = "Stop" - $vs_devshell += @( $vs_devshell_2022, $vs_devshell_2019 ) +# Use vswhere to find the latest Visual Studio installation +$vswhere_out = & "C:\Program Files (x86)\Microsoft Visual Studio\Installer\vswhere.exe" -latest -property installationPath +if ($null -eq $vswhere_out) { + Write-Host "ERROR: Visual Studio installation not found" + exit 1 } -$found = $false -foreach($path in $vs_devshell) { - if (Test-Path $path) { - write-host "Found $path" +# Find Launch-VsDevShell.ps1 in the Visual Studio installation +$vs_path = $vswhere_out +$vs_devshell = Join-Path $vs_path "\Common7\Tools\Launch-VsDevShell.ps1" - Push-Location # Save the current path, loading the script will change it. - & $path - Pop-Location - - $found = $true - break; - } +if ( -not (Test-Path $vs_devshell) ) { + Write-Host "ERROR: Launch-VsDevShell.ps1 not found in Visual Studio installation" + Write-Host Tested path: $vs_devshell + exit 1 } -if (-not $found) { - write-host "MSVC DevShell: No valid path found" -} +# Launch the Visual Studio Developer Shell +Push-Location +& $vs_devshell @args +Pop-Location diff --git a/test/test.cpp b/test/test.cpp index a91dbfa..50aceba 100644 --- a/test/test.cpp +++ b/test/test.cpp @@ -5,6 +5,7 @@ #include "gen.cpp" #include "sanity.cpp" #include "SOA.cpp" +#include "test.singleheader_ast.cpp" #if GEN_TIME int gen_main() @@ -12,10 +13,12 @@ int gen_main() using namespace gen; log_fmt("\ngen_time:"); - check_sanity(); + // check_sanity(); check_SOA(); + check_singleheader_ast(); + return 0; } #endif diff --git a/test/test.singleheader_ast.cpp b/test/test.singleheader_ast.cpp new file mode 100644 index 0000000..cf26097 --- /dev/null +++ b/test/test.singleheader_ast.cpp @@ -0,0 +1,31 @@ +#pragma once + +#define GEN_DEFINE_LIBRARY_CODE_CONSTANTS +#define GEN_ENFORCE_STRONG_CODE_TYPES +#define GEN_EXPOSE_BACKEND +#define GEN_BENCHMARK +#include "gen.hpp" +#include "file_processors/scanner.hpp" +using namespace gen; + +void check_singleheader_ast() +{ + #define project_dir "../../" + gen::init(); + log_fmt("\ncheck_singleheader_ast:\n"); + + FileContents file = file_read_contents( GlobalAllocator, true, project_dir "singleheader/gen/gen.hpp" ); + + CodeBody ast = parse_global_body( { file.size, (char const*)file.data } ); + + log_fmt("generated AST!!!"); + + Builder builder; + builder.open( "singleheader_copy.hpp" ); + log_fmt("serializng ast"); + builder.print( ast ); + builder.write(); + + log_fmt("passed!!\n"); + gen::deinit(); +}