Progress on strip_formatting function, support for multi-dimentional array variables and typenames.

strip_formatting suffers from some edge failure with what looks to be escaped character literals (not entirely sure).

I've decided to not remove formatting from unvalidated function bodies since I plan to support parsing its content properly.
However expression values for a statement will fail to have their formatting removed with this.

Since I don't plan to parse those anytime soon, I'll have to fix any edge cases for those at least..
This commit is contained in:
Edward R. Gonzalez 2023-09-06 00:30:34 -04:00
parent 2200bcde9a
commit f1fb75cc1c
3 changed files with 345 additions and 228 deletions

View File

@ -26,7 +26,9 @@
"ratio": "cpp",
"xstring": "cpp",
"functional": "cpp",
"vector": "cpp"
"vector": "cpp",
"list": "cpp",
"xhash": "cpp"
},
"C_Cpp.intelliSenseEngineFallback": "disabled",
"mesonbuild.configureOnOpen": true,

View File

@ -781,7 +781,14 @@ String AST::to_string()
if ( UnderlyingType->Type == Typename && UnderlyingType->ArrExpr )
{
result.append_fmt( "[%S];", UnderlyingType->ArrExpr->to_string() );
result.append_fmt( "[ %S ];", UnderlyingType->ArrExpr->to_string() );
AST* next_arr_expr = UnderlyingType->ArrExpr->Next;
while ( next_arr_expr )
{
result.append_fmt( "[ %S ];", next_arr_expr->to_string() );
next_arr_expr = next_arr_expr->Next;
}
}
else
{
@ -885,7 +892,16 @@ String AST::to_string()
result.append_fmt( "using %S = %S", Name, UnderlyingType->to_string() );
if ( UnderlyingType->ArrExpr )
result.append_fmt( "[%S]", UnderlyingType->ArrExpr->to_string() );
{
result.append_fmt( "[ %S ]", UnderlyingType->ArrExpr->to_string() );
AST* next_arr_expr = UnderlyingType->ArrExpr->Next;
while ( next_arr_expr )
{
result.append_fmt( "[ %S ]", next_arr_expr->to_string() );
next_arr_expr = next_arr_expr->Next;
}
}
result.append( ";" );
}
@ -922,7 +938,16 @@ String AST::to_string()
result.append_fmt( "%S %S", ValueType->to_string(), Name );
if ( ValueType->ArrExpr )
result.append_fmt( "[%S]", ValueType->ArrExpr->to_string() );
{
result.append_fmt( "[ %S ]", ValueType->ArrExpr->to_string() );
AST* next_arr_expr = ValueType->ArrExpr->Next;
while ( next_arr_expr )
{
result.append_fmt( "[ %S ]", next_arr_expr->to_string() );
next_arr_expr = next_arr_expr->Next;
}
}
if ( BitfieldSize )
result.append_fmt( " : %S", BitfieldSize->to_string() );
@ -941,11 +966,22 @@ String AST::to_string()
if ( BitfieldSize )
result.append_fmt( "%S %S : %S;", ValueType->to_string(), Name, BitfieldSize->to_string() );
else if ( UnderlyingType->ArrExpr )
result.append_fmt( "%S %S[%S];", UnderlyingType->to_string(), Name, UnderlyingType->ArrExpr->to_string() );
else if ( ValueType->ArrExpr )
{
result.append_fmt( "%S %S[ %S ]", ValueType->to_string(), Name, ValueType->ArrExpr->to_string() );
AST* next_arr_expr = ValueType->ArrExpr->Next;
while ( next_arr_expr )
{
result.append_fmt( "[ %S ]", next_arr_expr->to_string() );
next_arr_expr = next_arr_expr->Next;
}
result.append( ";" );
}
else
result.append_fmt( "%S %S;", UnderlyingType->to_string(), Name );
result.append_fmt( "%S %S;", ValueType->to_string(), Name );
if ( InlineCmt )
result.append_fmt(" %S", InlineCmt->Content);
@ -1096,15 +1132,15 @@ bool AST::is_equal( AST* other )
// Comments are not validated.
case Comment:
// return true;
return true;
case Execution:
case PlatformAttributes:
case Untyped:
{
check_member_content( Content );
// check_member_ast( Prev );
// check_member_ast( Next );
return true;
}
case Class_Fwd:
@ -1114,8 +1150,6 @@ bool AST::is_equal( AST* other )
check_member_ast( ParentType );
check_member_val( ParentAccess );
check_member_ast( Attributes );
// check_member_ast( Prev );
// check_member_ast( Next );
return true;
}
@ -1129,8 +1163,6 @@ bool AST::is_equal( AST* other )
check_member_val( ParentAccess );
check_member_ast( Attributes );
check_member_ast( Body );
// check_member_ast( Prev );
// check_member_ast( Next );
return true;
}
@ -1140,8 +1172,6 @@ bool AST::is_equal( AST* other )
check_member_ast( InitializerList );
check_member_ast( Params );
check_member_ast( Body );
// check_member_ast( Prev );
// check_member_ast( Next );
return true;
}
@ -1150,8 +1180,6 @@ bool AST::is_equal( AST* other )
{
check_member_ast( InitializerList );
check_member_ast( Params );
// check_member_ast( Prev );
// check_member_ast( Next );
return true;
}
@ -1160,8 +1188,6 @@ bool AST::is_equal( AST* other )
{
check_member_ast( Specs );
check_member_ast( Body );
// check_member_ast( Prev );
// check_member_ast( Next );
return true;
}
@ -1169,8 +1195,6 @@ bool AST::is_equal( AST* other )
case Destructor_Fwd:
{
check_member_ast( Specs );
// check_member_ast( Prev );
// check_member_ast( Next );
return true;
}
@ -1183,8 +1207,6 @@ bool AST::is_equal( AST* other )
check_member_ast( Attributes );
check_member_ast( UnderlyingType );
check_member_ast( Body );
// check_member_ast( Prev );
// check_member_ast( Next );
return true;
}
@ -1196,8 +1218,6 @@ bool AST::is_equal( AST* other )
check_member_str( Name );
check_member_ast( Attributes );
check_member_ast( UnderlyingType );
// check_member_ast( Prev );
// check_member_ast( Next );
return true;
}
@ -1206,8 +1226,6 @@ bool AST::is_equal( AST* other )
{
check_member_str( Name );
check_member_ast( Body );
// check_member_ast( Prev );
// check_member_ast( Next );
return true;
}
@ -1216,8 +1234,6 @@ bool AST::is_equal( AST* other )
{
check_member_str( Name );
check_member_ast( Declaration );
// check_member_ast( Prev );
// check_member_ast( Next );
return true;
}
@ -1231,8 +1247,6 @@ bool AST::is_equal( AST* other )
check_member_ast( Specs );
check_member_ast( Params );
check_member_ast( Body );
// check_member_ast( Prev );
// check_member_ast( Next );
return true;
}
@ -1245,8 +1259,6 @@ bool AST::is_equal( AST* other )
check_member_ast( Attributes );
check_member_ast( Specs );
check_member_ast( Params );
// check_member_ast( Prev );
// check_member_ast( Next );
return true;
}
@ -1255,8 +1267,6 @@ bool AST::is_equal( AST* other )
{
check_member_val( ModuleFlags );
check_member_str( Name );
// check_member_ast( Prev );
// check_member_ast( Next );
return true;
}
@ -1266,8 +1276,6 @@ bool AST::is_equal( AST* other )
check_member_val( ModuleFlags );
check_member_str( Name );
check_member_ast( Body );
// check_member_ast( Prev );
// check_member_ast( Next );
return true;
}
@ -1282,8 +1290,6 @@ bool AST::is_equal( AST* other )
check_member_ast( Specs );
check_member_ast( Params );
check_member_ast( Body );
// check_member_ast( Prev );
// check_member_ast( Next );
return true;
}
@ -1297,8 +1303,6 @@ bool AST::is_equal( AST* other )
check_member_ast( Attributes );
check_member_ast( Specs );
check_member_ast( Params );
// check_member_ast( Prev );
// check_member_ast( Next );
return true;
}
@ -1309,8 +1313,6 @@ bool AST::is_equal( AST* other )
check_member_ast( Specs );
check_member_ast( ValueType );
check_member_ast( Body );
// check_member_ast( Prev );
// check_member_ast( Next );
return true;
}
@ -1320,8 +1322,6 @@ bool AST::is_equal( AST* other )
check_member_str( Name );
check_member_ast( Specs );
check_member_ast( ValueType );
// check_member_ast( Prev );
// check_member_ast( Next );
return true;
}
@ -1415,8 +1415,6 @@ bool AST::is_equal( AST* other )
{
check_member_str( Name );
check_member_content( Content );
// check_member_ast( Prev );
// check_member_ast( Next );
return true;
}
@ -1427,8 +1425,6 @@ bool AST::is_equal( AST* other )
case Preprocess_ElIf:
{
check_member_content( Content );
// check_member_ast( Prev );
// check_member_ast( Next );
return true;
}
@ -1437,8 +1433,6 @@ bool AST::is_equal( AST* other )
case Preprocess_Pragma:
{
check_member_content( Content );
// check_member_ast( Prev );
// check_member_ast( Next );
return true;
}
@ -1451,8 +1445,6 @@ bool AST::is_equal( AST* other )
{
check_member_val( ArrSpecs[ idx ] );
}
// check_member_ast( Prev );
// check_member_ast( Next );
return true;
}
@ -1462,8 +1454,6 @@ bool AST::is_equal( AST* other )
check_member_str( Name );
check_member_ast( Params );
check_member_ast( Declaration );
// check_member_ast( Prev );
// check_member_ast( Next );
return true;
}
@ -1475,8 +1465,6 @@ bool AST::is_equal( AST* other )
check_member_str( Name );
check_member_ast( Specs );
check_member_ast( UnderlyingType );
// check_member_ast( Prev );
// check_member_ast( Next );
return true;
}
@ -1486,8 +1474,6 @@ bool AST::is_equal( AST* other )
check_member_str( Name );
check_member_ast( Specs );
check_member_ast( ArrExpr );
// check_member_ast( Prev );
// check_member_ast( Next );
return true;
}
@ -1498,8 +1484,6 @@ bool AST::is_equal( AST* other )
check_member_str( Name );
check_member_ast( Attributes );
check_member_ast( Body );
// check_member_ast( Prev );
// check_member_ast( Next );
return true;
}
@ -1511,8 +1495,6 @@ bool AST::is_equal( AST* other )
check_member_str( Name );
check_member_ast( UnderlyingType );
check_member_ast( Attributes );
// check_member_ast( Prev );
// check_member_ast( Next );
return true;
}
@ -1526,8 +1508,6 @@ bool AST::is_equal( AST* other )
check_member_ast( Value );
check_member_ast( Attributes );
check_member_ast( Specs );
// check_member_ast( Prev );
// check_member_ast( Next );
return true;
}

View File

@ -396,10 +396,10 @@ namespace Parser
s32 within_char = false;
while ( left )
{
if ( current == '"' )
if ( current == '"' && ! within_char )
within_string ^= true;
if ( current == '\'' )
if ( current == '\'' && ! within_string )
within_char ^= true;
if ( current == '\\' && ! within_string && ! within_char )
@ -1252,6 +1252,276 @@ constexpr bool inplace_def = true;
// Internal parsing functions
constexpr bool strip_formatting_dont_preserve_newlines = false;
// constexpr bool strip_formatting_for_preprocess_define = true;
/*
This function was an attempt at stripping formatting from any c++ code.
It has edge case failures that prevent it from being used in function bodies.
*/
String strip_formatting( StrC raw_text, bool preserve_newlines = true )
//, bool for_preprocess_define = false )
{
String content = String::make_reserve( GlobalAllocator, raw_text.Len );
if ( raw_text.Len == 0 )
return content;
#define cut_length ( scanner - raw_text.Ptr - last_cut )
#define cut_ptr ( raw_text.Ptr + last_cut )
#define pos ( sptr( scanner ) - sptr( raw_text.Ptr ) )
#define move_fwd() do { scanner++; tokleft--; } while(0)
s32 tokleft = raw_text.Len;
sptr last_cut = 0;
char const* scanner = raw_text.Ptr;
if ( scanner[0] == ' ' )
{
move_fwd();
last_cut = 1;
}
bool within_string = false;
bool within_char = false;
bool must_keep_newline = false;
while ( tokleft )
{
// Skip over the content of string literals
if ( scanner[0] == '"' )
{
// content.append( cut_ptr, cut_length );
// last_cut = sptr( scanner ) - sptr( raw_text.Ptr );
move_fwd();
while ( tokleft && ( scanner[0] != '"' || *( scanner - 1 ) == '\\' ) )
{
if ( scanner[0] == '\\' && tokleft > 1 )
{
scanner += 2;
tokleft -= 2;
}
else
{
move_fwd();
}
}
// Skip the closing "
if ( tokleft )
move_fwd();
content.append( cut_ptr, cut_length );
last_cut = sptr( scanner ) - sptr( raw_text.Ptr );
continue;
}
// Skip over the content of character literals
if ( scanner[0] == '\'' )
{
// content.append( cut_ptr, cut_length );
// last_cut = sptr( scanner ) - sptr( raw_text.Ptr );
move_fwd();
while ( tokleft
&& ( scanner[0] != '\''
|| ( *(scanner -1 ) == '\\' )
) )
{
move_fwd();
}
// Skip the closing '
if ( tokleft )
move_fwd();
content.append( cut_ptr, cut_length );
last_cut = sptr( scanner ) - sptr( raw_text.Ptr );
continue;
}
// Most likely removing as its only useful for funciton bodies.
#if 0
// Preprocessed lines
if ( ! for_preprocess_define && scanner[0] == '#')
{
must_keep_newline = true;
if ( content.back() != '\n' )
content.append( '\n' );
move_fwd();
continue;
}
#endif
// Block comments
if ( tokleft > 1 && scanner[0] == '/' && scanner[1] == '*' )
{
while ( tokleft > 1 && !(scanner[0] == '*' && scanner[1] == '/') )
move_fwd();
scanner += 2;
tokleft -= 2;
content.append( cut_ptr, cut_length );
last_cut = sptr( scanner ) - sptr( raw_text.Ptr );
continue;
}
// Line comments
if ( tokleft > 1 && scanner[0] == '/' && scanner[1] == '/' )
{
must_keep_newline = true;
// if ( content.back() != '\n' )
// content.append( '\n' );
scanner += 2;
tokleft -= 2;
while ( tokleft && scanner[ 0 ] != '\n' )
move_fwd();
if (tokleft)
move_fwd();
content.append( cut_ptr, cut_length );
last_cut = sptr( scanner ) - sptr( raw_text.Ptr );
continue;
}
// Tabs
if (scanner[0] == '\t')
{
if (pos > last_cut)
content.append(cut_ptr, cut_length);
if ( content.back() != ' ' )
content.append(' ');
move_fwd();
last_cut = sptr(scanner) - sptr(raw_text.Ptr);
continue;
}
if ( tokleft > 1 && scanner[0] == '\r' && scanner[1] == '\n' )
{
if ( must_keep_newline || preserve_newlines )
{
must_keep_newline = false;
scanner += 2;
tokleft -= 2;
content.append( cut_ptr, cut_length );
last_cut = sptr( scanner ) - sptr( raw_text.Ptr );
continue;
}
if ( pos > last_cut )
content.append( cut_ptr, cut_length );
// Replace with a space
if ( content.back() != ' ' )
content.append( ' ' );
scanner += 2;
tokleft -= 2;
last_cut = sptr( scanner ) - sptr( raw_text.Ptr );
continue;
}
if ( scanner[0] == '\n' )
{
if ( must_keep_newline || preserve_newlines )
{
must_keep_newline = false;
move_fwd();
content.append( cut_ptr, cut_length );
last_cut = sptr( scanner ) - sptr( raw_text.Ptr );
continue;
}
if ( pos > last_cut )
content.append( cut_ptr, cut_length );
// Replace with a space
if ( content.back() != ' ' )
content.append( ' ' );
move_fwd();
last_cut = sptr( scanner ) - sptr( raw_text.Ptr );
continue;
}
// Escaped newlines
if ( scanner[0] == '\\' )
{
content.append( cut_ptr, cut_length );
s32 amount_to_skip = 1;
if ( tokleft > 1 && scanner[1] == '\n' )
{
amount_to_skip = 2;
}
else if ( tokleft > 2 && scanner[1] == '\r' && scanner[2] == '\n' )
{
amount_to_skip = 3;
}
if ( amount_to_skip > 1 && pos == last_cut )
{
scanner += amount_to_skip;
tokleft -= amount_to_skip;
}
else
move_fwd();
last_cut = sptr( scanner ) - sptr( raw_text.Ptr );
continue;
}
// Consectuive spaces
if ( tokleft > 1 && char_is_space( scanner[0] ) && char_is_space( scanner[ 1 ] ) )
{
content.append( cut_ptr, cut_length );
do
{
move_fwd();
}
while ( tokleft && char_is_space( scanner[0] ) );
last_cut = sptr( scanner ) - sptr( raw_text.Ptr );
// Preserve only 1 space of formattting
if ( content.back() != ' ' )
content.append( ' ' );
continue;
}
move_fwd();
}
if ( last_cut < raw_text.Len )
{
content.append( cut_ptr, raw_text.Len - last_cut );
}
#undef cut_ptr
#undef cut_length
#undef pos
#undef move_fwd
return content;
}
internal
Code parse_array_decl()
{
@ -1280,7 +1550,7 @@ Code parse_array_decl()
if ( currtok.Type == TokType::BraceSquare_Close )
{
log_failure( "Error, empty array expression in typedef definition\n%s", Context.to_string() );
log_failure( "Error, empty array expression in definition\n%s", Context.to_string() );
Context.pop();
return CodeInvalid;
}
@ -1311,6 +1581,15 @@ Code parse_array_decl()
}
eat( TokType::BraceSquare_Close );
// Its a multi-dimensional array
if ( check( TokType::BraceSquare_Open ))
{
Code adjacent_arr_expr = parse_array_decl();
array_expr->Next = adjacent_arr_expr.ast;
}
Context.pop();
return array_expr;
}
@ -1384,9 +1663,7 @@ CodeAttributes parse_attributes()
StrC attribute_txt = { len, start.Text };
Context.pop();
String
name_stripped = String::make( GlobalAllocator, attribute_txt );
name_stripped.strip_space();
String name_stripped = strip_formatting( attribute_txt, strip_formatting_dont_preserve_newlines );
Code
result = make_code();
@ -1917,139 +2194,7 @@ CodeDefine parse_define()
return define;
}
String content = String::make_reserve( GlobalAllocator, currtok.Length );
#define cut_length ( scanner - currtok.Text - last_cut )
#define cut_ptr ( currtok.Text + last_cut )
#define pos ( sptr( scanner ) - sptr( currtok.Text ) )
s32 tokleft = currtok.Length;
sptr last_cut = 0;
char const* scanner = currtok.Text;
if ( scanner[0] == ' ' )
{
++ scanner;
-- tokleft;
last_cut = 1;
}
while ( tokleft )
{
if ( tokleft > 1 && char_is_space( scanner[0] ) && char_is_space( scanner[ 1 ] ) )
{
content.append( cut_ptr, cut_length );
do
{
++ scanner;
-- tokleft;
}
while ( tokleft && char_is_space( scanner[0] ) );
last_cut = sptr( scanner ) - sptr( currtok.Text );
// Preserve only 1 space of formattting
if ( content.back() != ' ' )
content.append( ' ' );
continue;
}
if ( scanner[0] == '\t' )
{
if ( pos > last_cut )
content.append( cut_ptr, cut_length );
// Replace with a space
if ( content.back() != ' ' )
content.append( ' ' );
++ scanner;
-- tokleft;
last_cut = sptr( scanner ) - sptr( currtok.Text );
continue;
}
if ( tokleft > 1 && scanner[0] == '\r' && scanner[1] == '\n' )
{
if ( pos > last_cut )
content.append( cut_ptr, cut_length );
// Replace with a space
if ( content.back() != ' ' )
content.append( ' ' );
scanner += 2;
tokleft -= 2;
last_cut = sptr( scanner ) - sptr( currtok.Text );
continue;
}
if ( scanner[0] == '\n' )
{
if ( pos > last_cut )
content.append( cut_ptr, cut_length );
// Replace with a space
if ( content.back() != ' ' )
content.append( ' ' );
++ scanner;
-- tokleft;
last_cut = sptr( scanner ) - sptr( currtok.Text );
continue;
}
if ( scanner[0] == '\\' )
{
s32 amount_to_skip = 1;
if ( tokleft > 1 && scanner[1] == '\n' )
{
amount_to_skip = 2;
}
else if ( tokleft > 2 && scanner[1] == '\r' && scanner[2] == '\n' )
{
amount_to_skip = 3;
}
if ( amount_to_skip > 1 )
{
if ( pos == last_cut )
{
// If the backslash is the first character on the line, then skip it
scanner += amount_to_skip;
tokleft -= amount_to_skip;
last_cut = sptr( scanner ) - sptr( currtok.Text );
continue;
}
// We have content to add.
content.append( cut_ptr, pos - last_cut );
scanner += amount_to_skip;
tokleft -= amount_to_skip;
}
else
{
++ scanner;
-- tokleft;
}
last_cut = sptr( scanner ) - sptr( currtok.Text );
continue;
}
++ scanner;
-- tokleft;
}
if ( last_cut < currtok.Length )
{
content.append( cut_ptr, currtok.Length - last_cut );
}
#undef cut_ptr
#undef cut_length
#undef pos
define->Content = get_cached_string( content );
define->Content = get_cached_string( strip_formatting( currtok, strip_formatting_dont_preserve_newlines ) );
eat( TokType::Preprocess_Content );
Context.pop();
@ -2230,7 +2375,13 @@ Code parse_function_body()
if ( len > 0 )
{
// #define GEN_STRIP_FUNCTION_BODY_FORMATTING
#ifdef GEN_STRIP_FUNCTION_BODY_FORMATTING
String content = strip_formatting( { len, start.Text }, strip_formatting_dont_preserve_newlines );
result.append( def_execution( content ) );
#else
result.append( def_execution( { len, start.Text } ) );
#endif
}
eat( TokType::BraceCurly_Close );
@ -2979,11 +3130,7 @@ CodePragma parse_pragma()
Context.Scope->Name = currtok;
String
content_stripped = String::make( GlobalAllocator, currtok );
content_stripped.strip_space();
pragma->Content = get_cached_string( content_stripped );
pragma->Content = get_cached_string( currtok );
eat( TokType::Preprocess_Content );
Context.pop();
@ -3060,7 +3207,7 @@ CodeParam parse_params( bool use_template_capture )
eat( currtok.Type );
}
value = untyped_str( value_tok );
value = untyped_str( strip_formatting( value_tok, strip_formatting_dont_preserve_newlines ) );
}
}
@ -3130,7 +3277,7 @@ CodeParam parse_params( bool use_template_capture )
eat( currtok.Type );
}
value = untyped_str( value_tok );
value = untyped_str( strip_formatting( value_tok, strip_formatting_dont_preserve_newlines ) );
}
}
@ -3300,19 +3447,7 @@ Code parse_static_assert()
eat( TokType::Statement_End );
content.Length = ( (sptr)prevtok.Text + prevtok.Length ) - (sptr)content.Text;
String
content_stripped = String::make( GlobalAllocator, content );
content_stripped.strip_space();
char const* result = str_fmt_buf( "%.*s\n", content.Length, content.Text );
if ( content_stripped )
{
result = str_fmt_buf( "%S\n", content_stripped );
}
assert->Content = get_cached_string( to_str( result ) );
assert->Content = get_cached_string( content );
assert->Name = assert->Content;
Context.pop();
@ -4894,8 +5029,8 @@ CodeType parse_type( bool* typedef_is_function )
result->Type = Typename;
// Need to wait until were using the new parsing method to do this.
String
name_stripped = String::make( GlobalAllocator, name );
String name_stripped = strip_formatting( name, strip_formatting_dont_preserve_newlines );
// name_stripped.strip_space();
#ifdef GEN_USE_NEW_TYPENAME_PARSING