#ifdef GEN_INTELLISENSE_DIRECTIVES #pragma once #include "interface.upfront.cpp" #include "gen/etoktype.cpp" #endif namespace parser { enum TokFlags : u32 { TF_Operator = bit(0), TF_Assign = bit(1), TF_Preprocess = bit(2), TF_Preprocess_Cond = bit(3), TF_Attribute = bit(6), TF_AccessSpecifier = bit(7), TF_Specifier = bit(8), TF_EndDefinition = bit(9), // Either ; or } TF_Formatting = bit(10), TF_Literal = bit(11), TF_Null = 0, }; struct Token { char const* Text; sptr Length; TokType Type; s32 Line; s32 Column; u32 Flags; operator bool() { return Text && Length && Type != TokType::Invalid; } operator StrC() { return { Length, Text }; } bool is_access_specifier() { return bitfield_is_equal( u32, Flags, TF_AccessSpecifier ); } bool is_attribute() { return bitfield_is_equal( u32, Flags, TF_Attribute ); } bool is_operator() { return bitfield_is_equal( u32, Flags, TF_Operator ); } bool is_preprocessor() { return bitfield_is_equal( u32, Flags, TF_Preprocess ); } bool is_preprocess_cond() { return bitfield_is_equal( u32, Flags, TF_Preprocess_Cond ); } bool is_specifier() { return bitfield_is_equal( u32, Flags, TF_Specifier ); } bool is_end_definition() { return bitfield_is_equal( u32, Flags, TF_EndDefinition ); } AccessSpec to_access_specifier() { return scast(AccessSpec, Type); } String to_string() { String result = String::make_reserve( GlobalAllocator, kilobytes(4) ); StrC type_str = ETokType::to_str( Type ); result.append_fmt( "Line: %d Column: %d, Type: %.*s Content: %.*s" , Line, Column , type_str.Len, type_str.Ptr , Length, Text ); return result; } }; constexpr Token NullToken { nullptr, 0, TokType::Invalid, false, 0, TF_Null }; struct TokArray { Array Arr; s32 Idx; bool __eat( TokType type ); Token& current( bool skip_formatting = true ) { if ( skip_formatting ) { while ( Arr[Idx].Type == TokType::NewLine || Arr[Idx].Type == TokType::Comment ) Idx++; } return Arr[Idx]; } Token& previous( bool skip_formatting = false ) { s32 idx = this->Idx; if ( skip_formatting ) { while ( Arr[idx].Type == TokType::NewLine ) idx--; return Arr[idx]; } return Arr[idx - 1]; } Token& next( bool skip_formatting = false ) { s32 idx = this->Idx; if ( skip_formatting ) { while ( Arr[idx].Type == TokType::NewLine ) idx++; return Arr[idx]; } return Arr[idx + 1]; } Token& operator []( s32 idx ) { return Arr[idx]; } }; global Arena_64KB defines_map_arena; global HashTable defines; global Array Tokens; #define current ( * scanner ) #define move_forward() \ { \ if ( current == '\n' ) \ { \ line++; \ column = 1; \ } \ else \ { \ column++; \ } \ left--; \ scanner++; \ } #define SkipWhitespace() \ while ( left && char_is_space( current ) ) \ { \ move_forward(); \ } #define end_line() \ do \ { \ while ( left && current == ' ' ) \ { \ move_forward(); \ } \ if ( left && current == '\r' ) \ { \ move_forward(); \ move_forward(); \ } \ else if ( left && current == '\n' ) \ { \ move_forward(); \ } \ } \ while (0) enum { Lex_Continue, Lex_ReturnNull, }; forceinline s32 lex_preprocessor_directive( StrC& content , s32& left , char const*& scanner , s32& line , s32& column , HashTable& defines , Token& token ) { char const* hash = scanner; Tokens.append( { hash, 1, TokType::Preprocess_Hash, line, column, TF_Preprocess } ); move_forward(); SkipWhitespace(); token.Text = scanner; while (left && ! char_is_space(current) ) { move_forward(); token.Length++; } token.Type = ETokType::to_type( token ); bool is_preprocessor = token.Type >= TokType::Preprocess_Define && token.Type <= TokType::Preprocess_Pragma; if ( ! is_preprocessor ) { token.Type = TokType::Preprocess_Unsupported; // Its an unsupported directive, skip it s32 within_string = false; s32 within_char = false; while ( left ) { if ( current == '"' && ! within_char ) within_string ^= true; if ( current == '\'' && ! within_string ) within_char ^= true; if ( current == '\\' && ! within_string && ! within_char ) { move_forward(); token.Length++; if ( current == '\r' ) { move_forward(); token.Length++; } if ( current == '\n' ) { move_forward(); token.Length++; continue; } else { log_failure( "gen::Parser::lex: Invalid escape sequence '\\%c' (%d, %d)" " in preprocessor directive (%d, %d)\n%.100s" , current, line, column , token.Line, token.Column, token.Text ); break; } } if ( current == '\r' ) { move_forward(); token.Length++; } if ( current == '\n' ) { move_forward(); token.Length++; break; } move_forward(); token.Length++; } token.Length = token.Length + token.Text - hash; token.Text = hash; Tokens.append( token ); return Lex_Continue; // Skip found token, its all handled here. } if ( token.Type == TokType::Preprocess_Else || token.Type == TokType::Preprocess_EndIf ) { token.Flags |= TF_Preprocess_Cond; Tokens.append( token ); end_line(); return Lex_Continue; } else if ( token.Type >= TokType::Preprocess_If && token.Type <= TokType::Preprocess_ElIf ) { token.Flags |= TF_Preprocess_Cond; } Tokens.append( token ); SkipWhitespace(); if ( token.Type == TokType::Preprocess_Define ) { Token name = { scanner, 0, TokType::Identifier, line, column, TF_Preprocess }; name.Text = scanner; name.Length = 1; move_forward(); while ( left && ( char_is_alphanumeric(current) || current == '_' ) ) { move_forward(); name.Length++; } if ( left && current == '(' ) { move_forward(); name.Length++; } Tokens.append( name ); u64 key = crc32( name.Text, name.Length ); defines.set( key, name ); } Token preprocess_content = { scanner, 0, TokType::Preprocess_Content, line, column, TF_Preprocess }; if ( token.Type == TokType::Preprocess_Include ) { preprocess_content.Type = TokType::String; if ( current != '"' && current != '<' ) { String directive_str = String::fmt_buf( GlobalAllocator, "%.*s", min( 80, left + preprocess_content.Length ), token.Text ); log_failure( "gen::Parser::lex: Expected '\"' or '<' after #include, not '%c' (%d, %d)\n%s" , current , preprocess_content.Line , preprocess_content.Column , directive_str.Data ); return Lex_ReturnNull; } move_forward(); preprocess_content.Length++; while ( left && current != '"' && current != '>' ) { move_forward(); preprocess_content.Length++; } move_forward(); preprocess_content.Length++; Tokens.append( preprocess_content ); return Lex_Continue; // Skip found token, its all handled here. } s32 within_string = false; s32 within_char = false; // SkipWhitespace(); while ( left ) { if ( current == '"' && ! within_char ) within_string ^= true; if ( current == '\'' && ! within_string ) within_char ^= true; if ( current == '\\' && ! within_string && ! within_char ) { move_forward(); preprocess_content.Length++; if ( current == '\r' ) { move_forward(); preprocess_content.Length++; } if ( current == '\n' ) { move_forward(); preprocess_content.Length++; continue; } else { String directive_str = String::make_length( GlobalAllocator, token.Text, token.Length ); String content_str = String::fmt_buf( GlobalAllocator, "%.*s", min( 400, left + preprocess_content.Length ), preprocess_content.Text ); log_failure( "gen::Parser::lex: Invalid escape sequence '\\%c' (%d, %d)" " in preprocessor directive '%s' (%d, %d)\n%s" , current, line, column , directive_str, preprocess_content.Line, preprocess_content.Column , content_str ); break; } } if ( current == '\r' ) { move_forward(); } if ( current == '\n' ) { move_forward(); break; } move_forward(); preprocess_content.Length++; } Tokens.append( preprocess_content ); return Lex_Continue; // Skip found token, its all handled here. } forceinline void lex_found_token( StrC& content , s32& left , char const*& scanner , s32& line , s32& column , HashTable& defines , Token& token ) { if ( token.Type != TokType::Invalid ) { Tokens.append( token ); return; } TokType type = ETokType::to_type( token ); if ( type == ETokType::Decl_Extern_Linkage ) { SkipWhitespace(); if ( current != '"' ) { type = ETokType::Spec_Extern; token.Flags |= TF_Specifier; } token.Type = type; Tokens.append( token ); return; } if ( ( type <= TokType::Star && type >= TokType::Spec_Alignas) || type == TokType::Ampersand || type == TokType::Ampersand_DBL ) { token.Type = type; token.Flags |= TF_Specifier; Tokens.append( token ); return; } if ( type != TokType::Invalid ) { token.Type = type; Tokens.append( token ); return; } u64 key = 0; if ( current == '(') key = crc32( token.Text, token.Length + 1 ); else key = crc32( token.Text, token.Length ); StrC* define = defines.get( key ); if ( define ) { token.Type = TokType::Preprocess_Macro; // Want to ignore any arguments the define may have as they can be execution expressions. if ( left && current == '(' ) { move_forward(); token.Length++; s32 level = 0; while ( left && (current != ')' || level > 0) ) { if ( current == '(' ) level++; else if ( current == ')' && level > 0 ) level--; move_forward(); token.Length++; } move_forward(); token.Length++; } if ( current == '\r' && scanner[1] == '\n' ) { move_forward(); } else if ( current == '\n' ) { move_forward(); } } else { token.Type = TokType::Identifier; } Tokens.append( token ); } neverinline TokArray lex( StrC content ) { s32 left = content.Len; char const* scanner = content.Ptr; char const* word = scanner; s32 word_length = 0; s32 line = 1; s32 column = 1; SkipWhitespace(); if ( left <= 0 ) { log_failure( "gen::lex: no tokens found (only whitespace provided)" ); return { { nullptr }, 0 }; } for ( StringCached entry : PreprocessorDefines ) { s32 length = 0; char const* scanner = entry.Data; while ( entry.length() > length && char_is_alphanumeric( *scanner ) || *scanner == '_' ) { scanner++; length ++; } if ( scanner[1] == '(' ) { length++; } u64 key = crc32( entry.Data, length ); defines.set( key, entry ); } Tokens.clear(); while (left ) { #if 0 if (Tokens.num()) { log_fmt("\nLastTok: %S", Tokens.back().to_string()); } #endif Token token = { scanner, 0, TokType::Invalid, line, column, TF_Null }; bool is_define = false; if ( column == 1 ) { if ( current == '\r') { move_forward(); token.Length = 1; } if ( current == '\n' ) { move_forward(); token.Type = TokType::NewLine; token.Length++; Tokens.append( token ); continue; } } token.Length = 0; SkipWhitespace(); if ( left <= 0 ) break; switch ( current ) { case '#': { s32 result = lex_preprocessor_directive( content, left, scanner, line, column, defines, token ); switch ( result ) { case Lex_Continue: continue; case Lex_ReturnNull: return { { nullptr }, 0 }; } } case '.': { token.Text = scanner; token.Length = 1; token.Type = TokType::Access_MemberSymbol; token.Flags = TF_AccessSpecifier; if (left) { move_forward(); } if ( current == '.' ) { move_forward(); if( current == '.' ) { token.Length = 3; token.Type = TokType::Varadic_Argument; token.Flags = TF_Null; move_forward(); } else { String context_str = String::fmt_buf( GlobalAllocator, "%s", scanner, min( 100, left ) ); log_failure( "gen::lex: invalid varadic argument, expected '...' got '..%c' (%d, %d)\n%s", current, line, column, context_str ); } } goto FoundToken; } case '&' : { token.Text = scanner; token.Length = 1; token.Type = TokType::Ampersand; token.Flags |= TF_Operator; token.Flags |= TF_Specifier; if (left) move_forward(); if ( current == '&' ) // && { token.Length = 2; token.Type = TokType::Ampersand_DBL; if (left) move_forward(); } goto FoundToken; } case ':': { token.Text = scanner; token.Length = 1; token.Type = TokType::Assign_Classifer; // Can be either a classifier (ParentType, Bitfield width), or ternary else // token.Type = TokType::Colon; if (left) move_forward(); if ( current == ':' ) { move_forward(); token.Type = TokType::Access_StaticSymbol; token.Length++; } goto FoundToken; } case '{': { token.Text = scanner; token.Length = 1; token.Type = TokType::BraceCurly_Open; if (left) move_forward(); goto FoundToken; } case '}': { token.Text = scanner; token.Length = 1; token.Type = TokType::BraceCurly_Close; token.Flags = TF_EndDefinition; if (left) move_forward(); end_line(); goto FoundToken; } case '[': { token.Text = scanner; token.Length = 1; token.Type = TokType::BraceSquare_Open; if ( left ) { move_forward(); if ( current == ']' ) { token.Length = 2; token.Type = TokType::Operator; move_forward(); } } goto FoundToken; } case ']': { token.Text = scanner; token.Length = 1; token.Type = TokType::BraceSquare_Close; if (left) move_forward(); goto FoundToken; } case '(': { token.Text = scanner; token.Length = 1; token.Type = TokType::Capture_Start; if (left) move_forward(); goto FoundToken; } case ')': { token.Text = scanner; token.Length = 1; token.Type = TokType::Capture_End; if (left) move_forward(); goto FoundToken; } case '\'': { token.Text = scanner; token.Length = 1; token.Type = TokType::Char; token.Flags = TF_Literal; move_forward(); if ( left && current == '\\' ) { move_forward(); token.Length++; if ( current == '\'' ) { move_forward(); token.Length++; } } while ( left && current != '\'' ) { move_forward(); token.Length++; } if ( left ) { move_forward(); token.Length++; } goto FoundToken; } case ',': { token.Text = scanner; token.Length = 1; token.Type = TokType::Comma; token.Flags = TF_Operator; if (left) move_forward(); goto FoundToken; } case '*': { token.Text = scanner; token.Length = 1; token.Type = TokType::Star; token.Flags |= TF_Specifier; token.Flags |= TF_Operator; if (left) move_forward(); if ( current == '=' ) { token.Length++; token.Flags |= TF_Assign; // token.Type = TokType::Assign_Multiply; if ( left ) move_forward(); } goto FoundToken; } case ';': { token.Text = scanner; token.Length = 1; token.Type = TokType::Statement_End; token.Flags = TF_EndDefinition; if (left) move_forward(); end_line(); goto FoundToken; } case '"': { token.Text = scanner; token.Length = 1; token.Type = TokType::String; token.Flags |= TF_Literal; move_forward(); while ( left ) { if ( current == '"' ) { move_forward(); break; } if ( current == '\\' ) { move_forward(); token.Length++; if ( left ) { move_forward(); token.Length++; } continue; } move_forward(); token.Length++; } goto FoundToken; } case '?': { token.Text = scanner; token.Length = 1; token.Type = TokType::Operator; // token.Type = TokType::Ternary; token.Flags = TF_Operator; if (left) move_forward(); goto FoundToken; } case '=': { token.Text = scanner; token.Length = 1; token.Type = TokType::Operator; // token.Type = TokType::Assign; token.Flags = TF_Operator; token.Flags |= TF_Assign; if (left) move_forward(); if ( current == '=' ) { token.Length++; token.Flags = TF_Operator; if (left) move_forward(); } goto FoundToken; } case '+': { // token.Type = TokType::Add } case '%': { // token.Type = TokType::Modulo; } case '^': { // token.Type = TokType::B_XOr; } case '~': { // token.Type = TokType::Unary_Not; } case '!': { // token.Type = TokType::L_Not; } case '<': { // token.Type = TokType::Lesser; } case '>': { // token.Type = TokType::Greater; } case '|': { token.Text = scanner; token.Length = 1; token.Type = TokType::Operator; token.Flags = TF_Operator; // token.Type = TokType::L_Or; if (left) move_forward(); if ( current == '=' ) { token.Length++; token.Flags |= TF_Assign; // token.Flags |= TokFlags::Assignment; // token.Type = TokType::Assign_L_Or; if (left) move_forward(); } else while ( left && current == *(scanner - 1) && token.Length < 3 ) { token.Length++; if (left) move_forward(); } goto FoundToken; } // Dash is unfortunatlly a bit more complicated... case '-': { token.Text = scanner; token.Length = 1; token.Type = TokType::Operator; // token.Type = TokType::Subtract; token.Flags = TF_Operator; if ( left ) { move_forward(); if ( current == '>' ) { token.Length++; // token.Type = TokType::Access_PointerToMemberSymbol; token.Flags |= TF_AccessSpecifier; move_forward(); if ( current == '*' ) { // token.Type = TokType::Access_PointerToMemberOfPointerSymbol; token.Length++; move_forward(); } } else if ( current == '=' ) { token.Length++; // token.Type = TokType::Assign_Subtract; token.Flags |= TF_Assign; if (left) move_forward(); } else while ( left && current == *(scanner - 1) && token.Length < 3 ) { token.Length++; if (left) move_forward(); } } goto FoundToken; } case '/': { token.Text = scanner; token.Length = 1; token.Type = TokType::Operator; // token.Type = TokType::Divide; token.Flags = TF_Operator; move_forward(); if ( left ) { if ( current == '=' ) { // token.Type = TokeType::Assign_Divide; move_forward(); token.Length++; token.Flags = TF_Assign; } else if ( current == '/' ) { token.Type = TokType::Comment; token.Length = 2; token.Flags = TF_Null; move_forward(); while ( left && current != '\n' && current != '\r' ) { move_forward(); token.Length++; } if ( current == '\r' ) { move_forward(); token.Length++; } if ( current == '\n' ) { move_forward(); token.Length++; } Tokens.append( token ); continue; } else if ( current == '*' ) { token.Type = TokType::Comment; token.Length = 2; token.Flags = TF_Null; move_forward(); bool star = current == '*'; bool slash = scanner[1] == '/'; bool at_end = star && slash; while ( left && ! at_end ) { move_forward(); token.Length++; star = current == '*'; slash = scanner[1] == '/'; at_end = star && slash; } token.Length += 2; move_forward(); move_forward(); if ( current == '\r' ) { move_forward(); token.Length++; } if ( current == '\n' ) { move_forward(); token.Length++; } Tokens.append( token ); // end_line(); continue; } } goto FoundToken; } } if ( char_is_alpha( current ) || current == '_' ) { token.Text = scanner; token.Length = 1; move_forward(); while ( left && ( char_is_alphanumeric(current) || current == '_' ) ) { move_forward(); token.Length++; } goto FoundToken; } else if ( char_is_digit(current) ) { // This is a very brute force lex, no checks are done for validity of literal. token.Text = scanner; token.Length = 1; token.Type = TokType::Number; token.Flags = TF_Literal; move_forward(); if (left && ( current == 'x' || current == 'X' || current == 'b' || current == 'B' || current == 'o' || current == 'O' ) ) { move_forward(); token.Length++; while ( left && char_is_hex_digit(current) ) { move_forward(); token.Length++; } goto FoundToken; } while ( left && char_is_digit(current) ) { move_forward(); token.Length++; } if ( left && current == '.' ) { move_forward(); token.Length++; while ( left && char_is_digit(current) ) { move_forward(); token.Length++; } } goto FoundToken; } else { s32 start = max( 0, Tokens.num() - 100 ); log_fmt("\n%d\n", start); for ( s32 idx = start; idx < Tokens.num(); idx++ ) { log_fmt( "Token %d Type: %s : %.*s\n" , idx , ETokType::to_str( Tokens[ idx ].Type ).Ptr , Tokens[ idx ].Length, Tokens[ idx ].Text ); } String context_str = String::fmt_buf( GlobalAllocator, "%.*s", min( 100, left ), scanner ); log_failure( "Failed to lex token '%c' (%d, %d)\n%s", current, line, column, context_str ); // Skip to next whitespace since we can't know if anything else is valid until then. while ( left && ! char_is_space( current ) ) { move_forward(); } } FoundToken: lex_found_token( content, left, scanner, line, column, defines, token ); } if ( Tokens.num() == 0 ) { log_failure( "Failed to lex any tokens" ); return { { nullptr }, 0 }; } defines.clear(); // defines_map_arena.free(); return { Tokens, 0 }; } #undef current #undef move_forward #undef SkipWhitespace // namespace parser }