From 107681825073265679a9e190a1fc526052baf7e4 Mon Sep 17 00:00:00 2001 From: Ed_ Date: Sun, 3 Sep 2023 20:29:45 -0400 Subject: [PATCH] Got whitepace stripping properly working (AFAICT) for parse_define() Made debug for viewing whitespace in AST::is_equal with String::visualize_whitespace() Format stripping code is currently confined within parse_define() I plan to move it to its own function soon, I just want to make sure its finalized first. Other unvalidated content will need to have an extra check for preprocessed lines. Example: Function bodies can have a #define . I cannot strip the last as it will break the semantic importance to distinguish that line. So it needs to be: In the content string that is minimally preserved --- project/components/ast.cpp | 160 ++++++++++++++--------- project/components/interface.parsing.cpp | 153 ++++++++++++++++++++-- project/dependencies/strings.hpp | 47 ++++++- 3 files changed, 281 insertions(+), 79 deletions(-) diff --git a/project/components/ast.cpp b/project/components/ast.cpp index b5e1866..c405510 100644 --- a/project/components/ast.cpp +++ b/project/components/ast.cpp @@ -981,36 +981,36 @@ bool AST::is_equal( AST* other ) { using namespace ECode; - #define check_member_val( val ) \ - if ( val != other->val ) \ - { \ - log_fmt("AST::is_equal: Member - " #val " failed\n" \ - "AST : %S\n" \ - "Other: %S\n" \ - , debug_str() \ - , other->debug_str() \ - ); \ - \ - return false; \ + #define check_member_val( val ) \ + if ( val != other->val ) \ + { \ + log_fmt("\nAST::is_equal: Member - " #val " failed\n" \ + "AST : %S\n" \ + "Other: %S\n" \ + , debug_str() \ + , other->debug_str() \ + ); \ + \ + return false; \ } - #define check_member_str( str ) \ - if ( str != other->str ) \ - { \ - log_fmt("AST::is_equal: Member string - "#str " failed\n" \ - "AST : %S\n" \ - "Other: %S\n" \ - , debug_str() \ - , other->debug_str() \ - ); \ - \ - return false; \ + #define check_member_str( str ) \ + if ( str != other->str ) \ + { \ + log_fmt("\nAST::is_equal: Member string - "#str " failed\n" \ + "AST : %S\n" \ + "Other: %S\n" \ + , debug_str() \ + , other->debug_str() \ + ); \ + \ + return false; \ } - #define check_member_content( content ) \ - if ( content != other->content ) \ - { \ - log_fmt("AST::is_equal: Member content - "#content " failed\n" \ + #define check_member_content( content ) \ + if ( content != other->content ) \ + { \ + log_fmt("\nAST::is_equal: Member content - "#content " failed\n" \ "AST : %S\n" \ "Other: %S\n" \ , debug_str() \ @@ -1021,43 +1021,43 @@ bool AST::is_equal( AST* other ) "so it must be verified by eye for now\n" \ "AST Content:\n%S\n" \ "Other Content:\n%S\n" \ - , content \ - , other->content \ + , content.visualize_whitespace() \ + , other->content.visualize_whitespace() \ ); \ } - #define check_member_ast( ast ) \ - if ( ast ) \ - { \ - if ( other->ast == nullptr ) \ - { \ - log_fmt("AST::is_equal: Failed for member " #ast " other equivalent param is null\n" \ - "AST : %s\n" \ - "Other: %s\n" \ - "For ast member: %s\n" \ - , debug_str() \ - , other->debug_str() \ - , ast->debug_str() \ - ); \ - \ - return false; \ - } \ - \ - if ( ! ast->is_equal( other->ast ) ) \ - { \ - log_fmt( "AST::is_equal: Failed for " #ast"\n" \ - "AST : %S\n" \ - "Other: %S\n" \ - "For ast member: %S\n" \ - "other's ast member: %S\n" \ - , debug_str() \ - , other->debug_str() \ - , ast->debug_str() \ - , other->ast->debug_str() \ - ); \ - \ - return false; \ - } \ + #define check_member_ast( ast ) \ + if ( ast ) \ + { \ + if ( other->ast == nullptr ) \ + { \ + log_fmt("\nAST::is_equal: Failed for member " #ast " other equivalent param is null\n" \ + "AST : %s\n" \ + "Other: %s\n" \ + "For ast member: %s\n" \ + , debug_str() \ + , other->debug_str() \ + , ast->debug_str() \ + ); \ + \ + return false; \ + } \ + \ + if ( ! ast->is_equal( other->ast ) ) \ + { \ + log_fmt( "\nAST::is_equal: Failed for " #ast"\n" \ + "AST : %S\n" \ + "Other: %S\n" \ + "For ast member: %S\n" \ + "other's ast member: %S\n" \ + , debug_str() \ + , other->debug_str() \ + , ast->debug_str() \ + , other->ast->debug_str() \ + ); \ + \ + return false; \ + } \ } case NewLine: @@ -1313,7 +1313,7 @@ bool AST::is_equal( AST* other ) { if ( curr_other == nullptr ) { - log_fmt("AST::is_equal: Failed for parameter, other equivalent param is null\n" + log_fmt("\nAST::is_equal: Failed for parameter, other equivalent param is null\n" "AST : %S\n" "Other: %S\n" "For ast member: %S\n" @@ -1323,9 +1323,39 @@ bool AST::is_equal( AST* other ) return false; } - if ( ! curr->is_equal( curr_other ) ) + if ( ! curr->Name != curr_other->Name ) { - log_fmt( "AST::is_equal: Failed for parameter\n" + log_fmt( "\nAST::is_equal: Failed for parameter name check\n" + "AST : %S\n" + "Other: %S\n" + "For ast member: %S\n" + "other's ast member: %S\n" + , debug_str() + , other->debug_str() + , curr->debug_str() + , curr_other->debug_str() + ); + return false; + } + + if ( curr->ValueType && ! curr->ValueType->is_equal(curr_other->ValueType) ) + { + log_fmt( "\nAST::is_equal: Failed for parameter value type check\n" + "AST : %S\n" + "Other: %S\n" + "For ast member: %S\n" + "other's ast member: %S\n" + , debug_str() + , other->debug_str() + , curr->debug_str() + , curr_other->debug_str() + ); + return false; + } + + if ( curr->Value && ! curr->Value->is_equal(curr_other->Value) ) + { + log_fmt( "\nAST::is_equal: Failed for parameter value check\n" "AST : %S\n" "Other: %S\n" "For ast member: %S\n" @@ -1494,7 +1524,7 @@ bool AST::is_equal( AST* other ) { if ( curr_other == nullptr ) { - log_fmt("AST::is_equal: Failed for body, other equivalent param is null\n" + log_fmt("\nAST::is_equal: Failed for body, other equivalent param is null\n" "AST : %S\n" "Other: %S\n" "For ast member: %S\n" @@ -1506,7 +1536,7 @@ bool AST::is_equal( AST* other ) if ( ! curr->is_equal( curr_other ) ) { - log_fmt( "AST::is_equal: Failed for body\n" + log_fmt( "\nAST::is_equal: Failed for body\n" "AST : %S\n" "Other: %S\n" "For ast member: %S\n" diff --git a/project/components/interface.parsing.cpp b/project/components/interface.parsing.cpp index 87eba4b..f13c962 100644 --- a/project/components/interface.parsing.cpp +++ b/project/components/interface.parsing.cpp @@ -1276,19 +1276,148 @@ CodeDefine parse_define() return CodeInvalid; } - // s32 left = currtok.Length; - // char const* scanner = currtok.Text; - // while ( left ) - // { - // if ( scanner[0] == ' ' ) - // { - // scanner++; - // left--; - // continue; - // } - // } + if ( currtok.Length == 0 ) + { + define->Content = get_cached_string( currtok ); + eat( TokType::Preprocess_Content ); - define->Content = get_cached_string( currtok ); + Context.pop(); + return define; + } + + String content = String::make_reserve( GlobalAllocator, currtok.Length ); + +#define cut_length ( scanner - currtok.Text - last_cut ) +#define cut_ptr ( currtok.Text + last_cut ) +#define pos ( sptr( scanner ) - sptr( currtok.Text ) ) + s32 tokleft = currtok.Length; + sptr last_cut = 0; + char const* scanner = currtok.Text; + + if ( scanner[0] == ' ' ) + { + ++ scanner; + -- tokleft; + last_cut = 1; + } + + while ( tokleft ) + { + if ( tokleft > 1 && char_is_space( scanner[0] ) && char_is_space( scanner[ 1 ] ) ) + { + content.append( cut_ptr, cut_length ); + do + { + ++ scanner; + -- tokleft; + } + while ( tokleft && char_is_space( scanner[0] ) ); + + last_cut = sptr( scanner ) - sptr( currtok.Text ); + + // Preserve only 1 space of formattting + if ( content.back() != ' ' ) + content.append( ' ' ); + continue; + } + + if ( scanner[0] == '\t' ) + { + if ( pos > last_cut ) + content.append( cut_ptr, cut_length ); + + // Replace with a space + if ( content.back() != ' ' ) + content.append( ' ' ); + + ++ scanner; + -- tokleft; + last_cut = sptr( scanner ) - sptr( currtok.Text ); + continue; + } + + if ( tokleft > 1 && scanner[0] == '\r' && scanner[1] == '\n' ) + { + if ( pos > last_cut ) + content.append( cut_ptr, cut_length ); + + // Replace with a space + if ( content.back() != ' ' ) + content.append( ' ' ); + + scanner += 2; + tokleft -= 2; + last_cut = sptr( scanner ) - sptr( currtok.Text ); + continue; + } + + if ( scanner[0] == '\n' ) + { + if ( pos > last_cut ) + content.append( cut_ptr, cut_length ); + + // Replace with a space + if ( content.back() != ' ' ) + content.append( ' ' ); + + ++ scanner; + -- tokleft; + last_cut = sptr( scanner ) - sptr( currtok.Text ); + continue; + } + + if ( scanner[0] == '\\' ) + { + s32 amount_to_skip = 1; + if ( tokleft > 1 && scanner[1] == '\n' ) + { + amount_to_skip = 2; + } + else if ( tokleft > 2 && scanner[1] == '\r' && scanner[2] == '\n' ) + { + amount_to_skip = 3; + } + + if ( amount_to_skip > 1 ) + { + if ( pos == last_cut ) + { + // If the backslash is the first character on the line, then skip it + scanner += amount_to_skip; + tokleft -= amount_to_skip; + last_cut = sptr( scanner ) - sptr( currtok.Text ); + continue; + } + + // We have content to add. + content.append( cut_ptr, pos - last_cut ); + + scanner += amount_to_skip; + tokleft -= amount_to_skip; + } + else + { + ++ scanner; + -- tokleft; + } + + last_cut = sptr( scanner ) - sptr( currtok.Text ); + continue; + } + + ++ scanner; + -- tokleft; + } + + if ( last_cut < currtok.Length ) + { + content.append( cut_ptr, currtok.Length - last_cut ); + } +#undef cut_ptr +#undef cut_length +#undef pos + + define->Content = get_cached_string( content ); eat( TokType::Preprocess_Content ); Context.pop(); diff --git a/project/dependencies/strings.hpp b/project/dependencies/strings.hpp index b834575..cc57a96 100644 --- a/project/dependencies/strings.hpp +++ b/project/dependencies/strings.hpp @@ -101,6 +101,11 @@ struct String bool make_space_for( char const* str, sw add_len ); + bool append( char c ) + { + return append( & c, 1 ); + } + bool append( char const* str ) { return append( str, str_len( str ) ); @@ -264,14 +269,52 @@ struct String return trim( " \t\r\n\v\f" ); } + // Debug function that provides a copy of the string with whitespace characters visualized. + String visualize_whitespace() const + { + Header* header = (Header*)(Data - sizeof(Header)); + + String result = make_reserve(header->Allocator, length() * 2); // Assume worst case for space requirements. + + for ( char c : *this ) + { + switch ( c ) + { + case ' ': + result.append('·'); + break; + case '\t': + result.append('→'); + break; + case '\n': + result.append('↵'); + break; + case '\r': + result.append('⏎'); + break; + case '\v': + result.append('⇕'); + break; + case '\f': + result.append('⌂'); + break; + default: + result.append(c); + break; + } + } + + return result; + } + // For-range support - char* begin() + char* begin() const { return Data; } - char* end() + char* end() const { Header const& header = * rcast( Header const*, Data - sizeof( Header ));