Got whitepace stripping properly working (AFAICT) for parse_define()

Made debug for viewing whitespace in AST::is_equal with String::visualize_whitespace()

Format stripping code is currently confined within parse_define()

I plan to move it to its own function soon, I just want to make sure its finalized first.

Other unvalidated content will need to have an extra check for preprocessed lines.
Example: Function bodies can have a #define <identifier> <definition>. I cannot strip the last <new line> as it will break the semantic importance to distinguish that line.
So it needs to be:
<content before> <new line>
<preprocessed line> <new line>
<content after>

In the content string that is minimally preserved
This commit is contained in:
Edward R. Gonzalez 2023-09-03 20:29:45 -04:00
parent c4c308c8ba
commit 1076818250
3 changed files with 281 additions and 79 deletions

View File

@ -984,7 +984,7 @@ bool AST::is_equal( AST* other )
#define check_member_val( val ) \ #define check_member_val( val ) \
if ( val != other->val ) \ if ( val != other->val ) \
{ \ { \
log_fmt("AST::is_equal: Member - " #val " failed\n" \ log_fmt("\nAST::is_equal: Member - " #val " failed\n" \
"AST : %S\n" \ "AST : %S\n" \
"Other: %S\n" \ "Other: %S\n" \
, debug_str() \ , debug_str() \
@ -997,7 +997,7 @@ bool AST::is_equal( AST* other )
#define check_member_str( str ) \ #define check_member_str( str ) \
if ( str != other->str ) \ if ( str != other->str ) \
{ \ { \
log_fmt("AST::is_equal: Member string - "#str " failed\n" \ log_fmt("\nAST::is_equal: Member string - "#str " failed\n" \
"AST : %S\n" \ "AST : %S\n" \
"Other: %S\n" \ "Other: %S\n" \
, debug_str() \ , debug_str() \
@ -1010,7 +1010,7 @@ bool AST::is_equal( AST* other )
#define check_member_content( content ) \ #define check_member_content( content ) \
if ( content != other->content ) \ if ( content != other->content ) \
{ \ { \
log_fmt("AST::is_equal: Member content - "#content " failed\n" \ log_fmt("\nAST::is_equal: Member content - "#content " failed\n" \
"AST : %S\n" \ "AST : %S\n" \
"Other: %S\n" \ "Other: %S\n" \
, debug_str() \ , debug_str() \
@ -1021,8 +1021,8 @@ bool AST::is_equal( AST* other )
"so it must be verified by eye for now\n" \ "so it must be verified by eye for now\n" \
"AST Content:\n%S\n" \ "AST Content:\n%S\n" \
"Other Content:\n%S\n" \ "Other Content:\n%S\n" \
, content \ , content.visualize_whitespace() \
, other->content \ , other->content.visualize_whitespace() \
); \ ); \
} }
@ -1031,7 +1031,7 @@ bool AST::is_equal( AST* other )
{ \ { \
if ( other->ast == nullptr ) \ if ( other->ast == nullptr ) \
{ \ { \
log_fmt("AST::is_equal: Failed for member " #ast " other equivalent param is null\n" \ log_fmt("\nAST::is_equal: Failed for member " #ast " other equivalent param is null\n" \
"AST : %s\n" \ "AST : %s\n" \
"Other: %s\n" \ "Other: %s\n" \
"For ast member: %s\n" \ "For ast member: %s\n" \
@ -1045,7 +1045,7 @@ bool AST::is_equal( AST* other )
\ \
if ( ! ast->is_equal( other->ast ) ) \ if ( ! ast->is_equal( other->ast ) ) \
{ \ { \
log_fmt( "AST::is_equal: Failed for " #ast"\n" \ log_fmt( "\nAST::is_equal: Failed for " #ast"\n" \
"AST : %S\n" \ "AST : %S\n" \
"Other: %S\n" \ "Other: %S\n" \
"For ast member: %S\n" \ "For ast member: %S\n" \
@ -1313,7 +1313,7 @@ bool AST::is_equal( AST* other )
{ {
if ( curr_other == nullptr ) if ( curr_other == nullptr )
{ {
log_fmt("AST::is_equal: Failed for parameter, other equivalent param is null\n" log_fmt("\nAST::is_equal: Failed for parameter, other equivalent param is null\n"
"AST : %S\n" "AST : %S\n"
"Other: %S\n" "Other: %S\n"
"For ast member: %S\n" "For ast member: %S\n"
@ -1323,9 +1323,39 @@ bool AST::is_equal( AST* other )
return false; return false;
} }
if ( ! curr->is_equal( curr_other ) ) if ( ! curr->Name != curr_other->Name )
{ {
log_fmt( "AST::is_equal: Failed for parameter\n" log_fmt( "\nAST::is_equal: Failed for parameter name check\n"
"AST : %S\n"
"Other: %S\n"
"For ast member: %S\n"
"other's ast member: %S\n"
, debug_str()
, other->debug_str()
, curr->debug_str()
, curr_other->debug_str()
);
return false;
}
if ( curr->ValueType && ! curr->ValueType->is_equal(curr_other->ValueType) )
{
log_fmt( "\nAST::is_equal: Failed for parameter value type check\n"
"AST : %S\n"
"Other: %S\n"
"For ast member: %S\n"
"other's ast member: %S\n"
, debug_str()
, other->debug_str()
, curr->debug_str()
, curr_other->debug_str()
);
return false;
}
if ( curr->Value && ! curr->Value->is_equal(curr_other->Value) )
{
log_fmt( "\nAST::is_equal: Failed for parameter value check\n"
"AST : %S\n" "AST : %S\n"
"Other: %S\n" "Other: %S\n"
"For ast member: %S\n" "For ast member: %S\n"
@ -1494,7 +1524,7 @@ bool AST::is_equal( AST* other )
{ {
if ( curr_other == nullptr ) if ( curr_other == nullptr )
{ {
log_fmt("AST::is_equal: Failed for body, other equivalent param is null\n" log_fmt("\nAST::is_equal: Failed for body, other equivalent param is null\n"
"AST : %S\n" "AST : %S\n"
"Other: %S\n" "Other: %S\n"
"For ast member: %S\n" "For ast member: %S\n"
@ -1506,7 +1536,7 @@ bool AST::is_equal( AST* other )
if ( ! curr->is_equal( curr_other ) ) if ( ! curr->is_equal( curr_other ) )
{ {
log_fmt( "AST::is_equal: Failed for body\n" log_fmt( "\nAST::is_equal: Failed for body\n"
"AST : %S\n" "AST : %S\n"
"Other: %S\n" "Other: %S\n"
"For ast member: %S\n" "For ast member: %S\n"

View File

@ -1276,18 +1276,8 @@ CodeDefine parse_define()
return CodeInvalid; return CodeInvalid;
} }
// s32 left = currtok.Length; if ( currtok.Length == 0 )
// char const* scanner = currtok.Text; {
// while ( left )
// {
// if ( scanner[0] == ' ' )
// {
// scanner++;
// left--;
// continue;
// }
// }
define->Content = get_cached_string( currtok ); define->Content = get_cached_string( currtok );
eat( TokType::Preprocess_Content ); eat( TokType::Preprocess_Content );
@ -1295,6 +1285,145 @@ CodeDefine parse_define()
return define; return define;
} }
String content = String::make_reserve( GlobalAllocator, currtok.Length );
#define cut_length ( scanner - currtok.Text - last_cut )
#define cut_ptr ( currtok.Text + last_cut )
#define pos ( sptr( scanner ) - sptr( currtok.Text ) )
s32 tokleft = currtok.Length;
sptr last_cut = 0;
char const* scanner = currtok.Text;
if ( scanner[0] == ' ' )
{
++ scanner;
-- tokleft;
last_cut = 1;
}
while ( tokleft )
{
if ( tokleft > 1 && char_is_space( scanner[0] ) && char_is_space( scanner[ 1 ] ) )
{
content.append( cut_ptr, cut_length );
do
{
++ scanner;
-- tokleft;
}
while ( tokleft && char_is_space( scanner[0] ) );
last_cut = sptr( scanner ) - sptr( currtok.Text );
// Preserve only 1 space of formattting
if ( content.back() != ' ' )
content.append( ' ' );
continue;
}
if ( scanner[0] == '\t' )
{
if ( pos > last_cut )
content.append( cut_ptr, cut_length );
// Replace with a space
if ( content.back() != ' ' )
content.append( ' ' );
++ scanner;
-- tokleft;
last_cut = sptr( scanner ) - sptr( currtok.Text );
continue;
}
if ( tokleft > 1 && scanner[0] == '\r' && scanner[1] == '\n' )
{
if ( pos > last_cut )
content.append( cut_ptr, cut_length );
// Replace with a space
if ( content.back() != ' ' )
content.append( ' ' );
scanner += 2;
tokleft -= 2;
last_cut = sptr( scanner ) - sptr( currtok.Text );
continue;
}
if ( scanner[0] == '\n' )
{
if ( pos > last_cut )
content.append( cut_ptr, cut_length );
// Replace with a space
if ( content.back() != ' ' )
content.append( ' ' );
++ scanner;
-- tokleft;
last_cut = sptr( scanner ) - sptr( currtok.Text );
continue;
}
if ( scanner[0] == '\\' )
{
s32 amount_to_skip = 1;
if ( tokleft > 1 && scanner[1] == '\n' )
{
amount_to_skip = 2;
}
else if ( tokleft > 2 && scanner[1] == '\r' && scanner[2] == '\n' )
{
amount_to_skip = 3;
}
if ( amount_to_skip > 1 )
{
if ( pos == last_cut )
{
// If the backslash is the first character on the line, then skip it
scanner += amount_to_skip;
tokleft -= amount_to_skip;
last_cut = sptr( scanner ) - sptr( currtok.Text );
continue;
}
// We have content to add.
content.append( cut_ptr, pos - last_cut );
scanner += amount_to_skip;
tokleft -= amount_to_skip;
}
else
{
++ scanner;
-- tokleft;
}
last_cut = sptr( scanner ) - sptr( currtok.Text );
continue;
}
++ scanner;
-- tokleft;
}
if ( last_cut < currtok.Length )
{
content.append( cut_ptr, currtok.Length - last_cut );
}
#undef cut_ptr
#undef cut_length
#undef pos
define->Content = get_cached_string( content );
eat( TokType::Preprocess_Content );
Context.pop();
return define;
}
internal internal
CodePreprocessCond parse_preprocess_cond() CodePreprocessCond parse_preprocess_cond()
{ {

View File

@ -101,6 +101,11 @@ struct String
bool make_space_for( char const* str, sw add_len ); bool make_space_for( char const* str, sw add_len );
bool append( char c )
{
return append( & c, 1 );
}
bool append( char const* str ) bool append( char const* str )
{ {
return append( str, str_len( str ) ); return append( str, str_len( str ) );
@ -264,14 +269,52 @@ struct String
return trim( " \t\r\n\v\f" ); return trim( " \t\r\n\v\f" );
} }
// Debug function that provides a copy of the string with whitespace characters visualized.
String visualize_whitespace() const
{
Header* header = (Header*)(Data - sizeof(Header));
String result = make_reserve(header->Allocator, length() * 2); // Assume worst case for space requirements.
for ( char c : *this )
{
switch ( c )
{
case ' ':
result.append('·');
break;
case '\t':
result.append('');
break;
case '\n':
result.append('');
break;
case '\r':
result.append('');
break;
case '\v':
result.append('');
break;
case '\f':
result.append('');
break;
default:
result.append(c);
break;
}
}
return result;
}
// For-range support // For-range support
char* begin() char* begin() const
{ {
return Data; return Data;
} }
char* end() char* end() const
{ {
Header const& Header const&
header = * rcast( Header const*, Data - sizeof( Header )); header = * rcast( Header const*, Data - sizeof( Header ));