update gencpp to latest and adjsut gen_src.pp accordingly.

This commit is contained in:
2024-12-13 11:50:40 -05:00
parent daa820b134
commit ad813d0335
17 changed files with 26015 additions and 24762 deletions
+88 -86
View File
@@ -5,9 +5,9 @@
#define GEN_DEFINE_LIBRARY_CODE_CONSTANTS
#define GEN_ENFORCE_STRING_CODE_TYPES
#define GEN_EXPOSE_BACKEND
#include "gencpp/gen.cpp"
#include "gencpp/gen.builder.cpp"
#include "gencpp/gen.scanner.cpp"
#define GEN_IMPLEMENTATION
// #define GEN_DONT_USE_FATAL
#include "gencpp/gen.hpp"
using namespace gen;
#ifdef GEN_SYSTEM_WINDOWS
@@ -24,18 +24,11 @@ using namespace gen;
#pragma endregion Directories
inline
CodeBody parse_file( char const* path ) {
FileContents content = file_read_contents( GlobalAllocator, true, path );
CodeBody code = parse_global_body( StrC { content.size, (char const*)content.data });
return code;
}
inline
void git_restore_file( char const* path )
{
#define git_restore_cmd "git restore "
String command = String::make( GlobalAllocator, git_restore_cmd );
StrBuilder command = StrBuilder::make( GlobalAllocator, git_restore_cmd );
command.append( path );
log_fmt("Running git restore on: %s", path);
system(command);
@@ -51,7 +44,7 @@ void format_file( char const* path )
#define cf_format_inplace "-i "
#define cf_style "-style=file:" "./scripts/.clang-format "
#define cf_verbose "-verbose "
String command = String::make( GlobalAllocator, clang_format );
StrBuilder command = StrBuilder::make( GlobalAllocator, clang_format );
command.append( cf_format_inplace );
command.append( cf_style );
command.append( cf_verbose );
@@ -80,33 +73,30 @@ Array<Odin_AstKind> get_odin_ast_kinds()
++ done_once;
}
CodeType t_char_const_ptr = parse_type(code(char const*));
CodeTypename t_char_const_ptr = parse_type(code(char const*));
CodeBody ast_kinds_header = parse_file( path_codegen "ast_kinds.hpp" );
for ( Code code = ast_kinds_header.begin(); code != ast_kinds_header.end(); ++ code )
{
switch (code->Type)
{
using namespace ECode;
case Comment:
case Preprocess_Pragma:
case CT_Comment:
case CT_Preprocess_Pragma:
// Ignore
continue;
case Variable:
case CT_Variable:
{
Odin_AstKind entry { {nullptr}, {} };
Odin_AstKind entry {};
CodeVar var = code.cast<CodeVar>();
CodeVar var = cast(CodeVar, code);
if ( ! var->ValueType.is_equal( t_char_const_ptr ) )
{
__debugbreak();
log_failure("Expected all globally defined variables to be char cons* type");
return kinds;
}
if ( ! var->Value || ! var->Value->Content )
{
__debugbreak();
log_failure("Expected all globally defined variable to have a string assigned to it");
return kinds;
}
@@ -116,9 +106,8 @@ Array<Odin_AstKind> get_odin_ast_kinds()
++ code;
// Grab the definition
if ( code->Type != Struct && code->Type != Typedef )
if ( code->Type != CT_Struct && code->Type != CT_Typedef )
{
__debugbreak();
log_failure("Expected a struct or typedef for the entry definition");
return kinds;
}
@@ -128,8 +117,7 @@ Array<Odin_AstKind> get_odin_ast_kinds()
}
continue;
case Struct:
__debugbreak();
case CT_Struct:
log_failure("Expected a description definition as char const* first");
return kinds;
break;
@@ -151,13 +139,13 @@ Array<Code> get_odin_type_kinds()
CodeBody ast_types_header = parse_file( path_codegen "type_kinds.hpp" );
for ( Code code = ast_types_header.begin(); code != ast_types_header.end(); ++ code ) switch (code->Type)
{
case ECode::Comment:
case ECode::Preprocess_Pragma:
case CT_Comment:
case CT_Preprocess_Pragma:
// Ignore
continue;
case ECode::Typedef:
case ECode::Struct:
case CT_Typedef:
case CT_Struct:
{
types.append(code);
}
@@ -170,7 +158,7 @@ int gen_main()
gen::init();
log_fmt("Generating code for Odin's src\n");
StrC str_GB_STATIC_ASSERT = txt("GB_STATIC_ASSERT(");
Str str_GB_STATIC_ASSERT = txt("GB_STATIC_ASSERT(");
PreprocessorDefines.append( get_cached_string(str_GB_STATIC_ASSERT) );
// Remove TOKEN_KINDS usage in tokenizer.cpp
@@ -181,61 +169,64 @@ int gen_main()
{
char scratch_mem[kilobytes(32)];
Arena scratch = Arena::init_from_memory( scratch_mem, sizeof(scratch_mem) );
file_read_contents( scratch, zero_terminate, path_codegen "token_kinds.csv" );
file_read_contents( scratch, file_zero_terminate, path_codegen "token_kinds.csv" );
csv_parse( &csv_nodes, scratch_mem, GlobalAllocator, false );
}
Array<ADT_Node> enum_strs = csv_nodes.nodes[0].nodes;
Array<ADT_Node> str_strs = csv_nodes.nodes[1].nodes;
String enum_entries = String::make_reserve( GlobalAllocator, kilobytes(32) );
String to_str_entries = String::make_reserve( GlobalAllocator, kilobytes(32) );
StrBuilder enum_entries = StrBuilder::make_reserve( GlobalAllocator, kilobytes(32) );
StrBuilder to_str_entries = StrBuilder::make_reserve( GlobalAllocator, kilobytes(32) );
to_str_entries.append(txt("{"));
for (uw idx = 0; idx < enum_strs.num(); idx++)
for (usize idx = 0; idx < enum_strs.num(); idx++)
{
char const* enum_str = enum_strs[idx].string;
StrC entry_to_str = to_str(str_strs [idx].string);
Str entry_to_str = to_str_from_c_str(str_strs [idx].string);
#pragma push_macro("cast")
#undef cast
enum_entries.append_fmt( "Token_%s,\n", enum_str );
to_str_entries.append( token_fmt( "str", (StrC)entry_to_str, stringize(
to_str_entries.append( token_fmt( "str", (Str)entry_to_str, stringize(
{ cast(u8 *) "<str>", gb_size_of("<str>") -1 },\n
)));
#pragma pop_macro("cast")
}
to_str_entries.append(txt("}"));
char const* path_tokenizer = path_src "tokenizer.cpp";
git_restore_file( path_tokenizer );
CodeBody src_tokenizer_cpp = parse_file( path_src "tokenizer.cpp" );
CodeBody body = def_body( ECode::Global_Body );
CodeBody body = def_body( CT_Global_Body );
body.append( def_comment(txt("NOTICE(github: Ed94): This is a generated variant of tokenizer.cpp using <repo_root>/codegen/gen_src.cpp")));
body.append(fmt_newline);
for (Code code = src_tokenizer_cpp.begin(); code != src_tokenizer_cpp.end(); ++ code) switch (code->Type)
{
case ECode::Preprocess_Define:
case CT_Preprocess_Define:
if ( code->Name.starts_with( txt("TOKEN_KINDS"))) {
// Skip, we don't want it.
continue;
}
continue;
case ECode::Enum:
case CT_Enum:
{
if ( code->Name.starts_with(txt("TokenKind")))
{
CodeEnum enum_code = code.cast<CodeEnum>();
CodeEnum enum_code = cast(CodeEnum, code);
enum_code->Body = untyped_str(enum_entries);
}
body.append(code);
}
continue;
case ECode::Variable:
case CT_Variable:
if ( code->Name.starts_with(txt("token_strings")))
{
CodeVar var = code.cast<CodeVar>();
CodeVar var = cast(CodeVar, code);
var->Value = untyped_str(to_str_entries);
}
body.append(code);
@@ -259,7 +250,7 @@ int gen_main()
char const* path_parser = path_src "parser.hpp";
git_restore_file( path_parser );
CodeBody src_parser_header = parse_file( path_src "parser.hpp" );
CodeBody body = def_body( ECode::Global_Body );
CodeBody body = def_body( CT_Global_Body );
body.append( def_comment(txt("NOTICE(github: Ed94): This is a generated variant of parser.hpp using <repo_root>/codegen/gen_src.cpp")));
body.append(fmt_newline);
@@ -270,7 +261,7 @@ int gen_main()
{
switch (code->Type)
{
case ECode::Preprocess_Define:
case CT_Preprocess_Define:
if ( code->Name.starts_with( txt("AST_KINDS"))) {
// Skip, we don't want it.
continue;
@@ -284,26 +275,26 @@ int gen_main()
body.append(code);
continue;
case ECode::Untyped:
case CT_Untyped:
if (code->Content.starts_with(txt("AST_KINDS")))
continue;
body.append(code);
continue;
case ECode::Enum:
case CT_Enum:
{
if (code->Name.starts_with( txt("AstKind")))
{
// Swap with generated variant
CodeBody swap_body = def_body( ECode::Enum_Body );
CodeBody swap_body = def_body( CT_Enum_Body );
{
swap_body.append( code_str(Ast_Invalid,));
for (Odin_AstKind& kind : ast_kinds)
swap_body.append( untyped_str( String::fmt_buf(GlobalAllocator, "Ast_%S,", kind.def->Name )));
swap_body.append( untyped_str( StrBuilder::fmt_buf(GlobalAllocator, "Ast_%S,", kind.def->Name )));
swap_body.append( code_str(Ast_COUNT));
}
CodeEnum swapped_enum = code.cast<CodeEnum>().duplicate();
CodeEnum swapped_enum = cast(CodeEnum, code).duplicate();
swapped_enum->Body = swap_body;
body.append(swapped_enum);
}
@@ -312,24 +303,32 @@ int gen_main()
}
continue;
case ECode::Variable:
case CT_Variable:
{
if (code->Name.starts_with(txt("ast_strings")))
{
// Swap with generated table
String generated_table = String::make_reserve(GlobalAllocator, kilobytes(32));
StrBuilder generated_table = StrBuilder::make_reserve(GlobalAllocator, kilobytes(32));
{
#pragma push_macro("cast")
#undef cast
for (Odin_AstKind& kind : ast_kinds)
generated_table.append(token_fmt("desc", (StrC)kind.desc, stringize(
generated_table.append(token_fmt("desc", (Str)kind.desc, stringize(
{ cast(u8 *) <desc>, gb_size_of(<desc>) -1 },
)));
#pragma pop_macro("cast")
}
CodeVar swapped_table = code.cast<CodeVar>().duplicate();
swapped_table->Value = code_fmt( "kinds", (StrC)generated_table, stringize(
CodeVar swapped_table = cast(CodeVar, code).duplicate();
#pragma push_macro("cast")
#undef cast
swapped_table->Value = code_fmt( "kinds", (Str)generated_table, stringize(
{
{cast(u8 *)"invalid node", gb_size_of("invalid node")},\n
{ cast(u8 *)"invalid node", gb_size_of("invalid node")},\n
<kinds>
}));
#pragma pop_macro("cast")
body.append(swapped_table);
body.append(fmt_newline);
@@ -339,7 +338,7 @@ int gen_main()
for (Odin_AstKind& kind : ast_kinds)
{
Code def = kind.def.duplicate();
def->Name = get_cached_string( String::fmt_buf(GlobalAllocator, "Ast%S", kind.def->Name));
def->Name = get_cached_string( StrBuilder::fmt_buf(GlobalAllocator, "Ast%S", kind.def->Name));
body.append( def );
body.append(fmt_newline);
}
@@ -349,15 +348,15 @@ int gen_main()
if (code->Name.starts_with(txt("ast_variant_sizes")))
{
// Swap with generated table
String generated_table = String::make_reserve(GlobalAllocator, kilobytes(32));
StrBuilder generated_table = StrBuilder::make_reserve(GlobalAllocator, kilobytes(32));
{
for (Odin_AstKind& kind : ast_kinds)
generated_table.append(token_fmt( "name", (StrC)kind.def->Name, stringize(
generated_table.append(token_fmt( "name", (Str)kind.def->Name, stringize(
gb_size_of(Ast<name>),\n
)));
}
CodeVar swapped_table = code.cast<CodeVar>().duplicate();
swapped_table->Value = code_fmt( "kinds", (StrC)generated_table, stringize(
CodeVar swapped_table = cast(CodeVar, code).duplicate();
swapped_table->Value = code_fmt( "kinds", (Str)generated_table, stringize(
{
0,\n
<kinds>
@@ -369,21 +368,21 @@ int gen_main()
}
continue;
case ECode::Struct:
case CT_Struct:
{
CodeStruct code_struct = code.cast<CodeStruct>();
CodeStruct code_struct = cast(CodeStruct, code);
if (code->Name.starts_with(txt("Ast")))
for (Code ast_code : code_struct->Body) switch (ast_code->Type)
{
case ECode::Union:
case CT_Union:
{
// Swap out the union's contents with the generated member definitions
CodeBody body_swap = def_body(ECode::Union_Body);
CodeBody body_swap = def_body(CT_Union_Body);
for (Odin_AstKind kind : ast_kinds)
body_swap.append( parse_variable( token_fmt( "name", (StrC)kind.def->Name, stringize(
body_swap.append( parse_variable( token_fmt( "name", (Str)kind.def->Name, stringize(
Ast<name> <name>;
))));
ast_code->Body = rcast(AST*, body_swap.ast);
ast_code->Body = body_swap;
}
break;
default:
@@ -412,7 +411,7 @@ int gen_main()
char const* path_types = path_src "types.cpp";
git_restore_file( path_types );
CodeBody src_types_cpp = parse_file( path_src "types.cpp" );
CodeBody body = def_body( ECode::Global_Body );
CodeBody body = def_body( CT_Global_Body );
body.append( def_comment(txt("NOTICE(github: Ed94): This is a generated variant of types.cpp using <repo_root>/codegen/gen_src.cpp")));
body.append(fmt_newline);
@@ -421,7 +420,7 @@ int gen_main()
for (Code code = src_types_cpp.begin(); code != src_types_cpp.end(); ++ code) switch (code->Type)
{
case ECode::Preprocess_Define:
case CT_Preprocess_Define:
{
if ( code->Name.starts_with( txt("TYPE_KINDS"))) {
// Skip, we don't want it.
@@ -437,19 +436,19 @@ int gen_main()
}
continue;
case ECode::Enum:
case CT_Enum:
{
if ( code->Name.starts_with( txt("TypeKind")))
{
CodeBody swap_body = def_body( ECode::Enum_Body);
CodeBody swap_body = def_body( CT_Enum_Body);
{
swap_body.append( code_str(Type_Invalid, ));
{
for (Code type : type_kinds)
swap_body.append( untyped_str( String::fmt_buf(GlobalAllocator, "Type_%S,", type->Name )));
swap_body.append( untyped_str( StrBuilder::fmt_buf(GlobalAllocator, "Type_%S,", type->Name )));
swap_body.append( code_str(Type_COUNT));
}
CodeEnum swapped_enum = code.cast<CodeEnum>().duplicate();
CodeEnum swapped_enum = cast(CodeEnum, code).duplicate();
swapped_enum->Body = swap_body;
body.append(swapped_enum);
}
@@ -459,24 +458,27 @@ int gen_main()
}
continue;
case ECode::Variable:
case CT_Variable:
{
if (code->Name.starts_with(txt("type_strings")))
{
#pragma push_macro("cast")
#undef cast
// Swap with generated table
String generated_table = String::make_reserve(GlobalAllocator, kilobytes(32));
StrBuilder generated_table = StrBuilder::make_reserve(GlobalAllocator, kilobytes(32));
{
for (Code type : type_kinds)
generated_table.append(token_fmt("type", (StrC)type->Name, stringize(
generated_table.append(token_fmt("type", (Str)type->Name, stringize(
{ cast(u8 *) "<type>", gb_size_of("<type>") -1 },
)));
}
CodeVar swapped_table = code.cast<CodeVar>().duplicate();
swapped_table->Value = code_fmt( "types", (StrC)generated_table, stringize(
CodeVar swapped_table = ((CodeVar)code).duplicate();
swapped_table->Value = code_fmt( "types", (Str)generated_table, stringize(
{
{cast(u8 *)"invalid node", gb_size_of("invalid node")},\n
{ cast(u8 *)"invalid node", gb_size_of("invalid node")},\n
<types>
}));
#pragma pop_macro("cast")
body.append(swapped_table);
body.append(fmt_newline);
@@ -486,7 +488,7 @@ int gen_main()
for (Code type : type_kinds)
{
Code def = type.duplicate();
def->Name = get_cached_string( String::fmt_buf(GlobalAllocator, "Type%S", type->Name));
def->Name = get_cached_string( StrBuilder::fmt_buf(GlobalAllocator, "Type%S", type->Name));
body.append( def );
body.append(fmt_newline);
}
@@ -497,21 +499,21 @@ int gen_main()
}
continue;
case ECode::Struct:
case CT_Struct:
{
CodeStruct code_struct = code.cast<CodeStruct>();
if ( String::are_equal(code->Name, txt("Type")))
CodeStruct code_struct = cast(CodeStruct, code);
if ( str_are_equal(code->Name, txt("Type")))
for (Code type_code : code_struct->Body) switch (type_code->Type)
{
case ECode::Union:
case CT_Union:
{
// Swap out the union's contents with the generated member definitions
CodeBody body_swap = def_body(ECode::Union_Body);
CodeBody body_swap = def_body(CT_Union_Body);
for (Code type : type_kinds)
body_swap.append( parse_variable( token_fmt( "name", (StrC)type->Name, stringize(
body_swap.append( parse_variable( token_fmt( "name", (Str)type->Name, stringize(
Type<name> <name>;
))));
type_code->Body = rcast(AST*, body_swap.ast);
type_code->Body = body_swap;
}
break;
default:
+58
View File
@@ -0,0 +1,58 @@
## Navigation
[Top](../Readme.md)
<- [docs - General](Readme.md)
## Current Design
`AST` is the actual managed node object for the library.
Its raw and really not meant to be used directly.
All user interaction must be with its pointer so the type they deal with is `AST*`.
In order to abstract away constant use of `AST*` its wrapped in a Code type which can be either:
When its the [C generated variant of the library](../gen_c_library/)
```c
typedef AST* Code;
tyepdef AST_<name>* Code<name>;
...
```
**or**
For C++:
```cpp
struct Code {
AST* ast;
};
struct Code<name> {
...
AST_<name>* ast;
};
```
The full definitions of all asts are within:
* [`ast.hpp`](../base/components/ast.hpp)
* [`ast_types.hpp`](../base/components/ast_types.hpp)
* [`code_types.hpp`](../base/components/code_types.hpp)
The C/C++ interface procedures are located with `ast.hpp` (for the Code type), and `code_types.hpp` for all others.
## Serialization
All code types can either serialize using a function of the pattern:
```c
StrBuilder <prefix>_to_string(Code code);
// or
<prefix>_to_string(Code code, StrBuilder& result);
```
Where the first generates strings allocated using Allocator_StringArena and the other appends an existing strings with their backed allocator.
Serialization of for the AST is defined for `Code` in [`ast.chpp`](../base/components/ast.cpp) with `code_to_strbuilder_ptr` & `code_to_string`.
Serializtion for the rest of the code types is within [`code_serialization.cpp`](../base/components/code_serialization.cpp).
Gencpp's serialization does not provide coherent formatting of the code. The user should use a formatter after serializing.
+765
View File
@@ -0,0 +1,765 @@
## Navigation
[Top](../Readme.md)
<- [docs - General](Readme.md)
# AST Types Documentation
While the Readme for docs covers the data layout per AST, this will focus on the AST types avaialble, and their nuances.
## Body
These are containers representing a scope body of a definition that can be of the following `CodeType` type:
* Class_Body
* Enum_Body
* Export_Body
* Extern_Linkage_Body
* Function_Body
* Global_Body
* Namespace_Body
* Struct_Body
* Union_Body
Fields:
```cpp
StringCached Name;
Code Front;
Code Back;
Token* Tok;
Code Parent;
CodeT Type;
s32 NumEntries;
```
The `Front` member represents the start of the link list and `Back` the end.
NumEntries is the number of entries in the body.
Parent should have a compatible CodeType type for the type of defintion used.
Serialization:
Will output only the entries, the braces are handled by the parent.
```cpp
<Front>
...
<Back>
```
## Attributes
Represent standard or vendor specific C/C++ attributes.
Fields:
```cpp
StringCached Content;
StringCached Name;
Code Prev;
Code Next;
Token* Tok;
Code Parent;
CodeT Type;
```
Serialization:
```cpp
<Content>
```
While the parser supports the `__declspec` and `__attribute__` syntax, the upfront constructor ( def_attributes ) must have the user specify the entire attribute, including the `[[]]`, `__declspec` or `__attribute__` parts.
## Comment
Stores a comment.
Fields:
```cpp
StringCached Content;
StringCached Name;
Code Prev;
Code Next;
Token* Tok;
Code Parent;
CodeT Type;
```
Serialization:
```cpp
<Content>
```
The parser will perserve comments found if residing with a body or in accepted inline-to-definition locations.
Otherwise they will be skipped by the TokArray::__eat and TokArray::current( skip foramtting enabled ) functions.
The upfront constructor: `def_comment` expects to recieve a comment without the `//` or `/* */` parts. It will add them during construction.
## Class & Struct
Fields:
```cpp
CodeComment InlineCmt; // Only supported by forward declarations
CodeAttributes Attributes;
CodeType ParentType;
CodeBody Body;
StringCached Name;
CodeType Prev;
CodeType Next;
Token* Tok;
Code Parent;
CodeT Type;
ModuleFlag ModuleFlags;
AccessSpec ParentAccess;
```
Serialization:
```cpp
// Class_Fwd
<ModuleFlags> <class/struct> <Name>; <InlineCmt>
// Class
<ModuleFlags> <class/struct> <Attributes> <Name> : <ParentAccess> <ParentType>, public <ParentType->Next>, ... <InlineCmt>
{
<Body>
};
```
You'll notice that only one parent type is supported only with parent access. This library only supports single inheritance, the rest are assumed to be interfaces and are given public acess specifiers.
## Constructor
Fields:
```cpp
CodeComment InlineCmt; // Only supported by forward declarations
Code InitializerList;
CodeParams Params;
Code Body;
StringCached Name;
Code Prev;
Code Next;
Token* Tok;
Code Parent;
CodeT Type;
```
Serialization:
```cpp
// Constructor_Fwd
<Specs> <Parent->Name>( <Params> ); <InlineCmt>
// Constructor
<Specs> <Parent->Name>( <Params> ) <InlineCmt>
: <InitializerList>
{
<Body>
}
// Constructor Source Implementation
<Specs> <Parent>::~<Parent->Name>( <Params> ) <Specs>
{
<Body>
}
```
## Define
Represents a preprocessor define
Fields:
```cpp
StringCached Content;
StringCached Name;
Code Prev;
Code Next;
Token* Tok;
Code Parent;
CodeT Type;
```
Serialization:
```cpp
#define <Name> <Content>
```
## Destructor
Fields:
```cpp
CodeComment InlineCmt;
CodeSpecifiers Specs;
Code Body;
StringCached Name;
Code Prev;
Code Next;
Token* Tok;
Code Parent;
CodeT Type;
```
Serialization:
```cpp
// Destructor_Fwd
<Specs> ~<Parent->Name>( <Params> ) <Specs>; <InlineCmt>
// Destructor
<Specs> ~<Parent->Name>( <Params> ) <Specs>
{
<Body>
}
// Destructor Source Implementation
<Specs> <Parent>::~<Parent->Name>( <Params> ) <Specs>
{
<Body>
}
```
## Enum
Fields:
```cpp
CodeComment InlineCmt;
CodeAttributes Attributes;
CodeType UnderlyingType;
Code UnderlyingTypeMacro;
CodeBody Body;
Code Prev;
Code Next;
Token* Tok;
Code Parent;
StringCached Name;
CodeT Type;
ModuleFlag ModuleFlags;
```
UnderlyingTypeMacro is a macro the library natively supports: `enum_underlying(type)` that is meant to behave as a wrapper for underlying type assignment.
The `enum_underlying_sig` is a `Str` global var that can be set which will be defined within `PreprocessorDefines` and used in `parser_parse_enum` to identify a valid macro.
Serialization:
```cpp
// Enum_Fwd
<ModuleFlags> enum class <Name> : <UnderlyingType> or <UnderlyingTypeMacro> ; <InlineCmt>
// Enum
<ModuleFlags> <enum or enum class> <Name> : <UnderlyingType> or <UnderlyingTypeMacro>
{
<Body>
};
```
## Execution
Just represents an execution body. Equivalent to an untyped body.
Will be obsolute when function body parsing is implemented.
Fields:
```cpp
StringCached Content;
StringCached Name;
Code Prev;
Code Next;
Token* Tok;
Code Parent;
CodeT Type;
```
Serialization:
```cpp
<Content>
```
## External Linkage
Fields:
```cpp
CodeBody Body;
StringCached Name;
Code Prev;
Code Next;
Token* Tok;
Code Parent;
CodeT Type;
```
Serialization:
```cpp
extern "<Name>"
{
<Body>
}
```
## Include
Fields:
```cpp
StringCached Content;
StringCached Name;
Code Prev;
Code Next;
Code Parent;
Token* Tok;
CodeT Type;
```
Serialization:
```cpp
#include <Content>
```
## Friend
This library (until its necessary become some third-party library to do otherwise) does not support friend declarations with in-statment function definitions.
Fields:
```cpp
CodeComment InlineCmt;
Code Declaration;
StringCached Name;
Code Prev;
Code Next;
Token* Tok;
Code Parent;
CodeT Type;
```
Serialization:
```cpp
friend <Declaration>; <InlineCmt>
```
## Function
Fields:
```cpp
CodeComment InlineCmt;
CodeAttributes Attributes;
CodeSpecifiers Specs;
CodeType ReturnType;
CodeParams Params;
CodeBody Body;
StringCached Name;
Code Prev;
Code Next;
Token* Tok;
Code Parent;
CodeT Type;
ModuleFlag ModuleFlags;
```
Serialization:
```cpp
// Function_Fwd
<ModuleFlags> <Attributes> <Specs> <ReturnType> <Name>( <Params> ) <Specs>; <InlineCmt>
// Function
<ModuleFlags> <Attributes> <Specs> <ReturnType> <Name>( <Params> ) <Specs>
{
<Body>
}
```
## Module
Fields:
```cpp
StringCached Name;
Code Prev;
Code Next;
Token* Tok;
Code Parent;
CodeT Type;
ModuleFlag ModuleFlags;
```
Serialization:
```cpp
<ModuleFlags> module <Name>;
```
## Namespace
Fields:
```cpp
CodeBody Body;
StringCached Name;
Code Prev;
Code Next;
Token* Tok;
Code Parent;
CodeT Type;
ModuleFlag ModuleFlags;
```
Serialization:
```cpp
<ModuleFlags> namespace <Name>
{
<Body>
}
```
## Operator Overload (Operator)
Fields:
```cpp
CodeComment InlineCmt;
CodeAttributes Attributes;
CodeSpecifiers Specs;
CodeType ReturnType;
CodeParams Params;
CodeBody Body;
StringCached Name;
Code Prev;
Code Next;
Token* Tok;
Code Parent;
CodeT Type;
ModuleFlag ModuleFlags;
OperatorT Op;
```
Serialization:
```cpp
// Operator_Fwd
<ModuleFlags> <Attributes> <Specs> <ReturnType> operator <Op>( <Params> ) <Specs>; <InlineCmt>
// Operator
<ModuleFlags> <Attributes> <Specs> <ReturnType> <Name>operator <Op>( <Params> ) <Specs>
{
<Body>
}
```
## Operator Cast Overload ( User-Defined Type Conversion, OpCast )
Fields:
```cpp
CodeComment InlineCmt;
CodeSpecifiers Specs;
CodeType ValueType;
CodeBody Body;
StringCached Name;
Code Prev;
Code Next;
Token* Tok;
Code Parent;
CodeT Type;
```
Serialization:
```cpp
// Operator_Cast_Fwd
<Specs> operator <ValueType>() <Specs>; <InlineCmt>
// Operator_Cast
<Specs> <Name>operator <ValueType>() <Specs>
{
<Body>
}
```
## Parameters (AST_Params)
Fields:
```cpp
CodeType ValueType;
Code Macro;
Code Value;
Code PostNameMacro;
StringCached Name;
CodeParams Last;
CodeParams Next;
Token* Tok;
Code Parent;
CodeT Type;
s32 NumEntries;
```
Serialization:
```cpp
<Macro>, <Next> ... <Last>
<Macro> <ValueType> <Name> <PostNameMacro> = <Value>, <Next>... <Last>
```
## Pragma
Fields:
```cpp
StringCached Content;
StringCached Name;
Code Prev;
Code Next;
Token* Tok;
Code Parent;
CodeT Type;
```
Serialization:
```cpp
#pragma <Content>
```
## Preprocessor Conditional
Fields:
```cpp
StringCached Content;
StringCached Name;
Code Prev;
Code Next;
Token* Tok;
Code Parent;
CodeT Type;
```
Serialization:
```cpp
#<based off Type> <Content>
```
## Specifiers
Fields:
```cpp
SpecifierT ArrSpecs[ AST_ArrSpecs_Cap ];
CodeSpecifiers NextSpecs;
StringCached Name;
Code Prev;
Code Next;
Token* Tok;
Code Parent;
CodeT Type;
s32 NumEntries;
```
Serialization:
```cpp
<Spec>, ...
```
## Template
Fields:
```cpp
CodeParams Params;
Code Declaration;
StringCached Name;
Code Prev;
Code Next;
Token* Tok;
Code Parent;
CodeT Type;
ModuleFlag ModuleFlags;
```
Serialization:
```cpp
<ModuleFlags>
template< <Params> >
<Declaration>
```
## Typename
Typenames represent the type "symbol".
Fields:
```cpp
CodeAttributes Attributes;
CodeSpecifiers Specs;
CodeReturnType ReturnType;
CodeParams Params;
Code ArrExpr;
Code Prev;
Code Next;
Token* Tok;
Code Parent;
StringCached Name;
CodeT Type;
b32 IsParamPack;
ETypenameTag TypeTag;
```
Serialization:
```cpp
<Attributes> <TypeTag> <Name> <Specs> <IsParamPack ?: ...>
// Function
<Attributes> <ReturnType> <Name> <Params> <Specs>
```
`<Name>` currently has the full serialization of anything with
*Note: ArrExpr is not used in serialization by `typename_to_strbuilder_ref` its instead handled by a parent AST's serailization (variable, typedef, using).*
## Typedef
Behave as usual except function or macro typedefs.
Those (macros) don't use the underlying type field as everything was serialized under the Name field.
Fields:
```cpp
CodeComment InlineCmt;
Code UnderlyingType;
StringCached Name;
Code Prev;
Code Next;
Token* Tok
Code Parent;
CodeT Type;
ModuleFlag ModuleFlags;
b32 IsFunction;
```
Serialization:
```cpp
// Regular
<ModuleFlags> typedef <UnderlyingType> <Name> <UnderlyingType-ArrExpr>; <InlineCmt>
// Functions
// Currently:
<ModuleFlags> typedef <UnderlyingType (Serialized expression)>; <InlineCmt>
// Desired: Not handled yet
<ModuleFlags> typedef <UnderlyingType->ReturnType> UnderlyingType->Name> <UnderlyingType-ArrExpr> ( <UnderlyingType->Parameters> ); <InlineCmt>
<ModuleFlags> typedef <UnderlyingType->ReturnType> ( <Name->Namespace> for<Specs->has(Spec_Ptr) ?: *> <UnderlyingType->Name> <UnderlyingType-ArrExpr> ) ( <UnderlyingType->Parameters> ); <InlineCmt>
```
## Union
Fields:
```cpp
CodeAttributes Attributes;
CodeBody Body;
StringCached Name;
Code Prev;
Code Next;
Token* Tok;
Code Parent;
CodeT Type;
ModuleFlag ModuleFlags;
```
Serialization:
```cpp
<ModuleFlags> union <Attributes> <Name>
{
<Body>
}
```
## Using
Fields:
```cpp
CodeComment InlineCmt;
CodeAttributes Attributes;
CodeType UnderlyingType;
StringCached Name;
Code Prev;
Code Next;
Token* Tok;
Code Parent;
CodeT Type;
ModuleFlag ModuleFlags;
```
Serialization:
```cpp
// Regular
<ModuleFlags> using <Attributes> <Name> = <UnderlyingType>; <InlineCmt>
// Namespace
<ModuleFlags> using namespace <Name>; <InlineCmt>
```
## Variable
[Algo](./Parser_Algo.md:)
Fields:
```cpp
CodeComment InlineCmt;
CodeAttributes Attributes;
CodeSpecifiers Specs;
CodeType ValueType;
Code BitfieldSize;
Code Value;
StringCached Name;
CodeVar NextVar;
Code Prev;
Code Next;
Token* Tok;
Code Parent;
CodeT Type;
ModuleFlag ModuleFlags;
s32 VarParenthesizedInit;
```
Serialization:
```cpp
// Regular
<ModuleFlags> <Attributes> <Specs> <ValueType> <Name> = <Value>, NextVar ...; <InlineCmt>
// Bitfield
<ModuleFlags> <Attributes> <Specs> <ValueType> <Name> : <BitfieldSize> = <Value>, NextVar ...; <InlineCmt>
// VarParenthesizedInit
<Attributes> <Specs> <ValueType> <Name>( <Value>, NextVar ... ); <InlineCmt>
```
+708
View File
@@ -0,0 +1,708 @@
## Navigation
[Top](../Readme.md)
<- [docs - General](Readme.md)
# Parser's Algorithim
gencpp uses a hand-written recursive descent parser. Both the lexer and parser currently handle a full C/C++ file in a single pass.
## Notable implementation background
### Lexer
The lex procedure does the lexical pass of content provided as a `Str` type.
The tokens are stored (for now) in `Lexer_Tokens`.
Fields:
```cpp
Array<Token> Arr;
s32 Idx;
```
What token types are supported can be found in [ETokType.csv](../base/enums/ETokType.csv) you can also find the token types in [ETokType.h](../base/components/gen/etoktype.cpp) , which is the generated enum from the csv file.
Tokens are defined with the struct `gen::parser::Token`:
Fields:
```cpp
char const* Text;
sptr Length;
TokType Type;
s32 Line;
s32 Column;
u32 Flags;
```
Flags is a bitfield made up of TokFlags (Token Flags):
* `TF_Operator` : Any operator token used in expressions
* `TF_Assign`
* Using statment assignment
* Parameter argument default value assignment
* Variable declaration initialization assignment
* `TF_Preprocess` : Related to a preprocessing directive
* `TF_Preprocess_Cond` : A preprocess conditional
* `TF_Attribute` : An attribute token
* `TF_AccessSpecifier` : An accesor operation token
* `TF_Specifier` : One of the specifier tokens
* `TF_EndDefinition` : Can be interpreted as an end definition for a scope.
* `TF_Formatting` : Considered a part of the formatting
* `TF_Literal` : Anything considered a literal by C++.
I plan to replace IsAssign with a general flags field and properly keep track of all operator types instead of abstracting it away to `ETokType::Operator`.
Traversing the tokens is done with the following interface macros:
| Macro | Description |
| --- | --- |
| `currtok_noskip` | Get the current token without skipping whitespace |
| `currtok` | Get the current token, skip any whitespace tokens |
| `prevtok` | Get the previous token (does not skip whitespace) |
| `nexttok` | Get the next token (does not skip whitespace) |
| `eat( Token Type )` | Check to see if the current token is of the given type, if so, advance Token's index to the next token |
| `left` | Get the number of tokens left in the token array |
| `check_noskip` | Check to see if the current token is of the given type, without skipping whitespace |
| `check` | Check to see if the current token is of the given type, skip any whitespace tokens |
### Parser
The parser has a limited user interface, only specific types of definitions or statements are expected to be provided by the user directly when using to construct an AST dynamically (See SOA for example). It however does attempt to provide capability to parse a full C/C++ from production codebases.
Each public user interface procedure has the following format:
```cpp
<code type> parse_<definition type>( Str def )
{
check_parse_args( def );
using namespace Parser;
TokArray toks = lex( def );
if ( toks.Arr == nullptr )
return CodeInvalid;
// Parse the tokens and return a constructed AST using internal procedures
...
}
```
The most top-level parsing procedure used for C/C++ file parsing is `parse_global_body`:
It uses a helper procedure called `parse_global_nspace`.
Each internal procedure will have the following format:
```cpp
internal
<code type> parse_<definition_type>( <empty or contextual params> )
{
push_scope();
...
<code type> result = (<code type>) make_code();
...
Context.pop();
return result;
}
```
Below is an outline of the general alogirithim used for these internal procedures. The intention is to provide a basic briefing to aid the user in traversing the actual code definitions. These appear in the same order as they are in the `parser.cpp` file
***NOTE: This is still heavily in an alpha state. A large swaph of this can change, make sure these docs are up to date before considering them 1:1 with the repo commit your considering.***
## `parse_array_decl`
1. Check if its an array declaration with no expression.
1. Consume and return empty array declaration
2. Opening square bracket
3. Consume expression
4. Closing square bracket
5. If adjacent opening bracket
1. Repeat array declaration parse until no brackets remain
## `parse_assignment_expression`
1. Eat the assignment operator
2. Make sure there is content or at least an end statement after.
3. Flatten the assignment expression to an untyped Code string.
## `parse_attributes`
1. Check for standard attribute
2. Check for GNU attribute
3. Check for MSVC attribute
4. Check for a token registered as an attribute
a. Check and grab the arguments of a token registered of an attribute if it has any.
5. Repeat for chained attributes. Flatten them to a single attribute AST node.
## `parse_class_struct`
1. Check for export module specifier
2. class or struct keyword
3. `parse_attributes`
4. If identifier : `parse_identifier`
5. Parse inherited parent or interfaces
6. If opening curly brace : `parse_class_struct_body`
7. If not an inplace definition
1. End statement
2. Check for inline comment
## `parse_class_struct_body`
1. Opening curly brace
2. Parse the body (Possible options):
1. Ignore dangling end statements
2. Newline : ast constant
3. Comment : `parse_comment`
4. Access_Public : ast constant
5. Access_Protected : ast constant
6. Access_Private : ast constant
7. Decl_Class : `parse_complicated_definition`
8. Decl_Enum : `parse_complicated_definition`
9. Decl_Friend : `parse_friend`
10. Decl_Operator : `parse_operator_cast`
11. Decl_Struct : `parse_complicated_definition`
12. Decl_Template : `parse_template`
13. Decl_Typedef : `parse_typedef`
14. Decl_Union : `parse_complicated_definition`
15. Decl_Using : `parse_using`
16. Operator == '~'
1. `parse_destructor`
17. Preprocess_Define : `parse_define`
18. Preprocess_Include : `parse_include`
19. Preprocess_Conditional (if, ifdef, ifndef, elif, else, endif) : `parse_preprocess_cond` or else/endif ast constant
20. Preprocess_Macro : `parse_simple_preprocess`
21. Preprocess_Pragma : `parse_pragma`
22. Preprocess_Unsupported : `parse_simple_preprocess`
23. StaticAssert : `parse_static_assert`
24. The following compound into a resolved definition or declaration:
1. Attributes (Standard, GNU, MSVC) : `parse_attributes`
2. Specifiers (consteval, constexpr, constinit, explicit, forceinline, inline, mutable, neverinline, static, volatile, virtual)
3. Possible Destructor : `parse_destructor`
4. Possible User defined operator cast : `parse_operator_cast`
5. Possible Constructor : `parse_constructor`
6. Something that has the following: (identifier, const, unsigned, signed, short, long, bool, char, int, double)
1. Possible Constructor `parse_constructor`
2. Possible Operator, Function, or varaible : `parse_operator_function_or_variable`
25. Something completely unknown (will just make untyped...) : `parse_untyped`
## `parse_comment`
1. Just wrap the token into a cached string ( the lexer did the processing )
## `parse_compilcated_definition`
This is a helper function used by the following functions to help resolve a declaration or definition:
* `parse_class_struct_body`
* `parse_global_nspace`
* `parse_union`
A portion of the code in `parse_typedef` is very similar to this as both have to resolve a similar issue.
1. Look ahead to the termination token (End statement)
2. Check to see if it fits the pattern for a forward declare
3. If the previous token was an identifier ( `token[-1]` ):
1. Look back one more token : `[-2]`
2. If the token has a closing brace its an inplace definition
3. If the `token[-2]` is an identifier & `token[-3]` is the declaration type, its a variable using a namespaced type.
4. If the `token[-2]` is an indirection, then its a variable using a namespaced/forwarded type.
5. If the `token[-2]` is an assign classifier, and the starting tokens were the which type with possible `class` token after, its an enum forward declaration.
6. If any of the above is the case, `parse_operator_function_or_variable`
4. If the `token[2]` is a vendor fundamental type (builtin) then it is an enum forward declaration.
5. If the previous token was a closing curly brace, its a definition : `parse_forward_or_definition`
6. If the previous token was a closing square brace, its an array definition : `parse_operator_function_or_variable`
## `parse_define`
1. Define directive
2. Get identifier
3. Get Content (Optional)
## `parse_forward_or_definition`
* Parse any of the following for either a forward declaration or definition:
1. Decl_Class : `parse_class`
2. Decl_Enum : `parse_enum`
3. Decl_Struct : `parse_struct`
4. Decl_Union : `parse_union`
## `parse_function_after_name`
This is needed as a function defintion is not easily resolvable early on, as such this function handles resolving a function
after its been made ceratin that the type of declaration or definition is indeed for a function signature.
By the point this function is called the following are known : export module flag, attributes, specifiers, return type, & name
1. `parse_parameters`
2. parse postfix specifiers (we do not check if the specifier here is correct or not to be here... yet)
3. If there is a body : `parse_body`
4. Otherwise :
1. Statment end
2. Check for inline comment
## `parse_function_body`
Currently there is no actual parsing of the function body. Any content with the braces is shoved into an execution AST node.
In the future statements and expressions will be parsed.
1. Open curly brace
2. Grab all tokens between the brace and the closing brace, shove them in a execution AST node.
3. Closing curly brace
## `parse_global_nspace`
1. Make sure this is being called for a valid type (namespace, global body, export body, linkage body)
2. If its not a global body, consume the opening curly brace
3. Parse the body (Possible options):
1. Ignore dangling end statements
2. NewLine : ast constant
3. Comment : `parse_comment`
4. Decl_Cass : `parse_complicated_definition`
5. Decl_Enum : `parse_complicated_definition`
6. Decl_Extern_Linkage : `parse_extern_link`
7. Decl_Namespace : `parse_namespace`
8. Decl_Struct : `parse_complicated_definition`
9. Decl_Template : `parse_template`
10. Decl_Typedef : `parse_typedef`
11. Decl_Union : `parse_complicated_definition`
12. Decl_Using : `parse_using`
13. Preprocess_Define : `parse_define`
14. Preprocess_Include : `parse_include`
15. Preprocess_If, IfDef, IfNotDef, Elif : `parse_preprocess_cond`
16. Preprocess_Else : ast constant
17. Preprocess_Endif : ast constant
18. Preprocess_Macro : `parse_simple_preprocess`
19. Preprocess_Pragma : `parse_pragma`
20. Preprocess_Unsupported : `parse_simple_preprocess`
21. StaticAssert : `parse_static_assert`
22. Module_Export : `parse_export_body`
23. Module_Import : NOT_IMPLEMENTED
24. The following compound into a resolved definition or declaration:
1. Attributes ( Standard, GNU, MSVC, Macro ) : `parse_attributes`
2. Specifiers ( consteval, constexpr, constinit, extern, forceinline, global, inline, internal_linkage, neverinline, static )
3. Is either ( identifier, const specifier, long, short, signed, unsigned, bool, char, double, int)
1. Attempt to parse as construtor or destructor : `parse_global_nspace_constructor_destructor`
2. If its an operator cast (definition outside class) : `parse_operator_cast`
3. Its an operator, function, or varaible : `parse_operator_function_or_varaible`
4. If its not a global body, consume the closing curly brace
## `parse_global_nspace_constructor_destructor`
1. Look ahead for the start of the arguments for a possible constructor/destructor
2. Go back past the identifier
3. Check to see if its a destructor by checking for the `~`
4. Continue the next token should be a `::`
5. Determine if the next valid identifier (ignoring possible template parameters) is the same as the first identifier of the function.
6. If it is we have either a constructor or destructor so parse using their respective functions (`parse_constructor`, `parse_destructor`).
## `parse_identifier`
This is going to get heavily changed down the line to have a more broken down "identifier expression" so that the qualifier, template args, etc, can be distinguished between the targeted identifier.
The function can parse all of them, however the AST node compresses them all into a string.
1. Consume first identifier
2. `parse_template_args`
3. While there is a static symbol accessor ( `::` )
1. Consume `::`
2. Consume member identifier
3. `parse_template args` (for member identifier)
4. If a `~` is encounted and the scope is for a destructor's identifier, do not consume it and return with what parsed.
## `parse_include`
1. Consume include directive
2. Consume the path
## `parse_operator_after_ret_type`
This is needed as a operator defintion is not easily resolvable early on, as such this function handles resolving a operator after its been made ceratin that the type of declaration or definition is indeed for a operator signature.
By the point this function is called the following are known : export module flag, attributes, specifiers, return type
1. If there is any qualifiers for the operator, parse them
2. Consume operator keyword
3. Determine the operator type (This will be offloaded to the lexer moreso than how it is now) & consume
4. `parse_params`
5. If there is no parameters this is operator is a member of pointer if its symbols is a *.
6. Parse postfix specifiers
7. If there is a opening curly brace, `parse function_body`
8. Otherwise: consume end statement, check for inline comment.
## `parse_operator_function_or_variable`
When this function is called, attribute and specifiers may have been resolved, however what comes next can still be either an operator, function, or varaible.
1. Check for preprocessor macro, if there is one : `parse_simple_preprocess`
2. `parse_type` (Does the bulk of the work)
3. Begin lookahead to see if we get qualifiers or we eventually find the operator declaration
4. If we find an operator keyword : `parse_operator_after_ret_type`
5. otherwise :
1. `parse_identifier`
2. If we se a opening parenthesis (capture start), its a function : `parse_function_after_name`
3. Its a variable : `parse_variable_after_name`
## `parse_pragma`
1. Consume pragma directive
2. Process the token content into cached string
## `parse_params`
1. Consume either a `(` or `<` based on `use_template_capture` arg
2. If the we immdiately find a closing token, consume it and finish.
3. If we encounter a varadic argument, consume it and return a `param_varadic` ast constant
4. `parse_type`
5. If we have a macro, parse it (Unreal has macros as tags to parameters and or as entire arguments).
6. So long as next token isn't a comma
a. If we have an identifier
1. Consume it
2. Check for assignment:
a. Consume assign operator
b. Parse the expression
7. While we continue to encounter commas
a. Consume them
b. Repeat steps 3 to 6.2.b
8. Consume the closing token
## `parse_preprocess_cond`
1. Parse conditional directive
2. Process directive's content expression
## `parse_simple_preprocess`
There is still decent room for improvement in this setup. Right now the entire macro's relevant tokens are shoved into an untyped AST. It would be better to store it instead in an `AST_Macro` node instead down the line.
1. Consume the macro token
2. Check for an opening curly brace
1. Consume opening curly brace
2. Until the closing curly is encountered consume all tokens.
3. If the parent context is a typedef
1. Check for end stement
1. Consume it
2. Consume potential inline comment
3. Otherwise do steps 3 to 3.1.2
4. Shove it all in an untyped string
## `parse_static_assert`
1. Consume static assert and opening curly brace
2. Consume all tokens until the the closing brace is reached.
3. Consume curly brace and end statement
4. Place all tokens within braces into a content for the assert.
## `parse_template_args`
This will get changed heavily once we have better support for typename expressions
1. Consume opening angle bracket
2. Consume all tokens until closing angle bracket
3. Consme closing angle bracket
4. Return the currtok with the ammended length.
## `parse_variable_after_name`
This is needed as a variable defintion is not easily resolvable early on, it takes a long evaluation period before its known that the declaration or definition is a variable. As such this function handles resolving a variable.
By the point this function is called the following are known : export module flag, attributes, specifiers, value type, name
1. If its an assignment, parse the assignment expression (currently to an untyped string)
2. If its an opening curly brace, parse the expression within (currnelty to an untyped stirng).
1. Consume the closing curly brace
3. If its a `:`, we're dealing with bitfield definition:
1. Consume the assign classifier
2. Consume the expression (currently to an untyped string)
4. If a comma is encountered : `parse_variable declaration_list`
5. Consume statement end
6. Check for inline comment
## `parse_variable_declaration_list`
1. Consume the comma
2. Parse specifiers
3. `parse_variable_after_name`
## `parse_class`
1. `parse_class_struct`
## `parse_constructor`
This currently doesn't support postfix specifiers (planning to in the future)
1. `parse_identifier`
2. `parse_parameters`
3. If currtok is a `:`
1. Consume `:`
2. Parse the initializer list
3. `parse_function_body`
4. If currtok is an opening curly brace
1. `parse_function_body`
5. Otherwise:
1. Consume statement end
2. Check for inline comment
## `parse_destructor`
1. Check for and consume virtual specifier
2. Check for the `~` operator
3. `parse_identifier`
4. Consume opening and closing parenthesis
5. Check for assignment operator:
1. Consume assignment op
2. Consume pure specifier `0`
6. If not pure virtual & currtok is opening curly brace:
1. `parse_function_body`
7. Otherwise:
1. Consume end statement
2. If currtok is comment : `parse_comment`
## `parse_enum`
1. Consume enum token
2. Check for and consume class token
3. `parse_attributes`
4. If there is an identifier consume it
5. Check for a `:`
1. Consume `:`
2. `parse_type`
6. If there is a body parse it (Consume `{`):
1. Newline : ast constant
2. Comment : `parse_comment`
3. Preprocess_Define : `parse_define`
4. Preprocess_Conditional (if, ifdef, ifndef, elif ) : `parse_preprocess_cond`
5. Preprocess_Else : ast constant
6. Preprocess_Endif : ast constant
7. Preprocess_Macro : `parse_simple_preprocess`
8. Preprocess_Pragma : `parse_pragma`
9. Preprocess_Unsupported : `parse_smple_preprocess`
10. An actual enum entry
1. Consume identifier
2. If there is an assignment operator:
1. Consume operator
2. Consume the expression (assigned to untyped string for now)
3. If a macro is encountered consume it (Unreal UMETA macro support)
3. If there is a comma, consume it
## `parse_export_body`
1. `parse_global_nspace`
## `parse_extern_link_body`
1. `parse_global_nspace`
## `parse_extern_link`
1. Consume Decl_Extern_Linkage
2. Consume the linkage identifier
3. `parse_extern_link_body`
## `parse_friend`
1. Consume `friend`
2. `parse_type`
3. If the currok is an identifier its a function declaration or definition
1. `parse_function_after_name`
4. Consume end statement so long as its not a function definion
5. Check for inline comment, `parse_comment` if exists
## `parse_function`
1. Check and parse for `export`
2. `parse_attributes`
3. Parse specifiers
4. `parse_type`
5. `parse_identifier`
6. `parse_function_after_name`
## `parse_namespace`
1. Consume namespace declaration
2. Parse identifier
3. `parse_global_namespace`
## `parse_operator`
1. Check for and parse export declaration
2. `parse_attributes`
3. Parse specifiers
4. `parse_type`
5. `parse_operator_after_ret_type`
## `parse_operator_cast`
1. Look for and parse a qualifier namespace for the cast (in-case this is defined outside the class's scope)
2. Consume operator declaration
3. `parse_type`
4. Consume opening and closing parethesis
5. Check for a const qualifiying specifier
6. Check to see if this is a definition (`{`)
1. Consume `{`
2. Parse body to untyped string (parsing statement and expressions not supported yet)
3. Consume `}`
7. Otherwise:
1. Consume end statement
2. Check for and consume comment : `parse_comment`
## `parse_struct`
1. `parse_class_struct`
## `parse_template`
Note: This currently doesn't support templated operator casts (going to need to add support for it)
1. Check for and parse export declaration
2. Consume template declaration
3. `parse_params`
4. Parse for any of the following:
1. Decl_Class : `parse_class`
2. Decl_Struct : `parse_struct`
3. Decl_Union : `parse_union`
4. Decl_Using : `parse_using`
5. The following compound into a resolved definition or declaration:
1. `parse_attributes`
2. Parse specifiers
3. Attempt to parse as constructor or destructor: `parse_global_nspace_constructor_destructor`
4. Otherwise: `parse_operator_function_or_variable`
## `parse_type`
This function's implementation is awful and not done correctly. It will most likely be overhauled in the future as I plan to segement the AST_Type into several AST varaints along with sub-types to help produce robust type expressions.
Hopefully I won't need to make authentic type expressions as I was hopeing to avoid that...
### Current Algorithim
Anything that is in the qualifier capture of the function typename is treated as an expression abstracted as an untyped string
1. `parse_attributes`
2. Parse specifiers
3. If the `parse_type` was called from a template parse, check to see if class was used instead of typname and consume as name.
4. This is where things get ugly for each of these depend on what the next token is.
1. If its an in-place definition of a class, enum, struct, or union:
2. If its a decltype (Not supported yet but draft impl there)
3. If its a compound native type expression (unsigned, char, short, long, int, float, dobule, etc )
4. Ends up being a regular type alias of an identifier
5. Parse specifiers (postfix)
6. We need to now look ahead to see If we're dealing with a function typename
7. If wer're dealing with a function typename:
1. Shove the specifiers, and identifier code we have so far into a return type typename's Name (untyped string)
1. Reset the specifiers code for the top-level typeanme
2. Check to see if the next token is an identifier:
1. `parse_identifier`
3. Check to see if the next token is capture start and is not the last capture ("qualifier capture"):
1. Consume `(`
2. Consume expresssion between capture
3. Consume `)`
4. `parse_params`
5. Parse postfix specifiers
8. Check for varaidic argument (param pack) token:
1. Consume varadic argument token
### WIP - Alternative Algorithim
Currently wrapped up via macro: `GEN_USE_NEW_TYPENAME_PARSING`
Anything that is in the qualifier capture of the function typename is treated as an expression abstracted as an untyped string
1. `parse_attributes`
2. Parse specifiers (prefix)
3. This is where things get ugly for each of these depend on what the next token is.
1. If its an in-place definition of a class, enum, struct, or union:
2. If its a decltype (Not supported yet but draft impl there)
3. If its a compound native type expression (unsigned, char, short, long, int, float, dobule, etc )
4. Ends up being a regular type alias of an identifier
4. Parse specifiers (postfix)
1. If any specifiers are found populate specifiers code with them.
5. We need to now look ahead to see If we're dealing with a function typename
6. If wer're dealing with a function typename:
1. Shove the specifiers, and identifier code we have so far into a return type typename's Name (untyped string)
1. Reset the specifiers code for the top-level typename
2. Check to see if the next token is an identifier:
1. `parse_identifier`
3. Check to see if the next token is capture start and is not the last capture ("qualifier capture"):
1. Consume `(`
2. Parse binding specifiers
3. `parse_identifier`
4. `parse_parameters` -> params_nested
5. Consume `)`
6. Construct a nested function typename definition for the qualifier `Name`
4. `parse_params` - > params
5. Parse postfix specifiers
7. Check for varaidic argument (param pack) token:
1. Consume varadic argument token
### **Later: Algorithim based on typename expressions**
## `parse_typedef`
1. Check for export module specifier
2. typedef keyword
3. If its a preprocess macro: Get the macro name
4. Else:
1. Check to see if its a complicated definition (in-place enum, class, struct, union)
2. If its a complicated definition:
1. Perform the look ahead similar to `parse_complicated_definition`'s implementation
2. Check to see if its a forward declaration : `parse_forward_declaration`
3. If end[-1] is an identifier:
1. Its either an in-place, varaible type qualified identifier, or indirection type:
1. `parse_foward_or_definition`
4. else if end[-1] is a closing curly brace
1. Its a definition: `parse_forward_or_definition`
5. else if end[-1] is a closing square brace
2. Its an array definition: `parse_type`
3. Else : `parse-type`
4. Check for identifier : Consume the token
5. `parse_array_decl`
5. Consume end statement
6. Check for inline comment : `parse_comment`
## `parse_union`
1. Check for export module specifier
2. union keyword
3. `parse_attributes`
4. Check for identifier
5. Parse the body (Possible options):
1. Newline
2. Comment
3. Decl_Class
4. Decl_Enum
5. Decl_Struct
6. Decl_Union
7. Preprocess_Define
8. Preprocess_Conditional (if, ifdef, ifndef, elif, else, endif)
9. Preprocess_Macro
10. Preprocess_Pragma
11. Unsupported preprocess directive
12. Variable
6. If its not an inplace definiton: End Statement
## `parse_using`
1. Check for export module specifier
2. using keyword
3. Check to see if its a using namespace
4. Get the identifier
5. If its a regular using declaration:
1. `parse_attributes`
2. `parse_type`
3. `parse_array_decl`
6. End statement
7. Check for inline comment
## `parse_variable`
1. Check for export module specifier
2. `parse_attributes`
3. `parse specifiers`
4. `parse_type`
5. `parse_identifier`
6. `parse_variable_after_name`
+42 -25
View File
@@ -1,33 +1,39 @@
## Navigation
[Top](../Readme.md)
<- [docs - General](Readme.md)
# Parsing
The library features a naive parser tailored for only what the library needs to construct the supported syntax of C++ into its AST.
The library features a naive single-pass parser tailored for only what the library needs to construct the supported syntax of C++ into its AST for *"front-end"* meta-programming purposes.
This parser does not, and should not do the compiler's job. By only supporting this minimal set of features, the parser is kept (so far) around ~5600 loc. I hope to keep it under 10k loc worst case.
This parser does not, and should not do the compiler's job. By only supporting this minimal set of features, the parser is kept (so far) around ~7000 loc. I hope to keep it under 10k loc worst case.
You can think of this parser of a frontend parser vs a semantic parser. Its intuitively similar to WYSIWYG. What you precerive as the syntax from the user-side before the compiler gets a hold of it, is what you get.
You can think of this parser as *frontend parser* vs a *semantic parser*. Its intuitively similar to WYSIWYG. What you ***precerive*** as the syntax from the user-side before the compiler gets a hold of it, is what you get.
User exposed interface:
```cpp
CodeClass parse_class ( StrC class_def );
CodeConstructor parse_constructor ( StrC constructor_def );
CodeDestructor parse_destructor ( StrC destructor_def );
CodeEnum parse_enum ( StrC enum_def );
CodeBody parse_export_body ( StrC export_def );
CodeExtern parse_extern_link ( StrC exten_link_def );
CodeFriend parse_friend ( StrC friend_def );
CodeFn parse_function ( StrC fn_def );
CodeBody parse_global_body ( StrC body_def );
CodeNS parse_namespace ( StrC namespace_def );
CodeOperator parse_operator ( StrC operator_def );
CodeOpCast parse_operator_cast( StrC operator_def );
CodeStruct parse_struct ( StrC struct_def );
CodeTemplate parse_template ( StrC template_def );
CodeType parse_type ( StrC type_def );
CodeTypedef parse_typedef ( StrC typedef_def );
CodeUnion parse_union ( StrC union_def );
CodeUsing parse_using ( StrC using_def );
CodeVar parse_variable ( StrC var_def );
CodeClass parse_class ( Str class_def );
CodeConstructor parse_constructor ( Str constructor_def );
CodeDestructor parse_destructor ( Str destructor_def );
CodeEnum parse_enum ( Str enum_def );
CodeBody parse_export_body ( Str export_def );
CodeExtern parse_extern_link ( Str exten_link_def );
CodeFriend parse_friend ( Str friend_def );
CodeFn parse_function ( Str fn_def );
CodeBody parse_global_body ( Str body_def );
CodeNS parse_namespace ( Str namespace_def );
CodeOperator parse_operator ( Str operator_def );
CodeOpCast parse_operator_cast( Str operator_def );
CodeStruct parse_struct ( Str struct_def );
CodeTemplate parse_template ( Str template_def );
CodeType parse_type ( Str type_def );
CodeTypedef parse_typedef ( Str typedef_def );
CodeUnion parse_union ( Str union_def );
CodeUsing parse_using ( Str using_def );
CodeVar parse_variable ( Str var_def );
```
To parse file buffers, use the `parse_global_body` function.
@@ -47,10 +53,11 @@ The keywords supported for the preprocessor are:
* endif
* pragma
Each directive `#` line is considered one preproecessor unit, and will be treated as one Preprocessor AST. *These ASTs will be considered members or entries of braced scope they reside within*.
Each directive `#` line is considered one preproecessor unit, and will be treated as one Preprocessor AST.
If a directive is used with an unsupported keyword its will be processed as an untyped AST.
The preprocessor lines are stored as members of their associated scope they are parsed within. ( Global, Namespace, Class/Struct )
The preprocessor lines are stored as members of their associated scope they are parsed within. ( Global, Namespace, Class/Struct )
***Again (Its not standard): These ASTs will be considered members or entries of braced scope they reside within***
Any preprocessor definition abuse that changes the syntax of the core language is unsupported and will fail to parse if not kept within an execution scope (function body, or expression assignment).
Exceptions:
@@ -59,6 +66,8 @@ Exceptions:
* Disable with: `#define GEN_PARSER_DISABLE_MACRO_FUNCTION_SIGNATURES`
* typedefs allow for a preprocessed macro: `typedef MACRO();`
* Disable with: `#define GEN_PARSER_DISABLE_MACRO_TYPEDEF`
* Macros can behave as typenames
* There is some macro support in paramters for functions or templates *(Specifically added to support parsing Unreal Engine source)*.
*(Exceptions are added on an on-demand basis)*
*(See functions `parse_operator_function_or_variable` and `parse_typedef` )*
@@ -67,15 +76,23 @@ Adding your own exceptions is possible by simply modifying the parser to allow f
*Note: You could interpret this strictness as a feature. This would allow the user to see if their codebase or a third-party's codebase some some egregious preprocessor abuse.*
If a macro is not defined withint e scope of parsing a set of files, it can be defined beforehand by:
* Appending the [`PreprocessorDefines`](https://github.com/Ed94/gencpp/blob/a18b5b97aa5cfd20242065cbf53462a623cd18fa/base/components/header_end.hpp#L137) array.
* For functional macros a "(" just needs to be added after the name like: `<name>(` so that it will tokenize its arguments as part of the token during lexing.
* Defining a CodeDefine using `def_define`. The definition will be processed by the interface for user into `PreprocessorDefines`.
* This can be prevented by setting the optional prameter `dont_append_preprocess_defines`.
The lexing and parsing takes shortcuts from whats expected in the standard.
* Numeric literals are not checked for validity.
* The parse API treats any execution scope definitions with no validation and are turned into untyped Code ASTs.
* The parse API treats any execution scope definitions with no validation and are turned into untyped Code ASTs. (There is a [todo](https://github.com/Ed94/gencpp/issues/49) to add support)
* *This includes the assignment of variables.*
* Attributes ( `[[]]` (standard), `__declspec` (Microsoft), or `__attribute__` (GNU) )
* Assumed to *come before specifiers* (`const`, `constexpr`, `extern`, `static`, etc) for a function or right afterthe return type.
* Or in the usual spot for class, structs, (*right after the declaration keyword*)
* typedefs have attributes with the type (`parse_type`)
* Parsing attributes can be extended to support user defined macros by defining `GEN_DEFINE_ATTRIBUTE_TOKENS` (see `gen.hpp` for the formatting)
* This is useful for example: parsing Unreal `Module_API` macros.
Empty lines used throughout the file are preserved for formatting purposes during ast serialization.
+10 -127
View File
@@ -1,135 +1,18 @@
# gencpp
## Navigation
An attempt at simple staged metaprogramming for c/c++.
# base
The library API is a composition of code element constructors.
These build up a code AST to then serialize with a file builder.
[Top](../Readme.md)
This code base attempts follow the [handmade philosophy](https://handmade.network/manifesto).
Its not meant to be a black box metaprogramming utility, it should be easy to intergrate into a user's project domain.
* [docs](../docs/Readme.md)
## Notes
# Singleheader
**On Partial Hiatus: Working on handmade hero for now. Only fixes will be pushed as I come across them until I get what I want done from the series**
Creates a single header file version of the library using `singleheader.cpp`.
Follows the same convention seen in the gb, stb, and zpl libraries.
This project is still in development (very much an alpha state), so expect bugs and missing features.
See [issues](https://github.com/Ed94/gencpp/issues) for a list of known bugs or todos.
The library can already be used to generate code just fine, but the parser is where the most work is needed. If your C++ isn't "down to earth" expect issues.
A `natvis` and `natstepfilter` are provided in the scripts directory (its outdated, I'll update this readme when its not).
***The editor and scanner have not been implemented yet. The scanner will come first, then the editor.***
A C variant is hosted [here](https://github.com/Ed94/genc); I will complete it when this library is feature complete, it should be easier to make than this...
## Usage
A metaprogram is built to generate files before the main program is built. We'll term runtime for this program as `GEN_TIME`. The metaprogram's core implementation are within `gen.hpp` and `gen.cpp` in the project directory.
`gen.cpp` \`s `main()` is defined as `gen_main()` which the user will have to define once for their program. There they will dictate everything that should be generated.
In order to keep the locality of this code within the same files the following pattern may be used (although this pattern isn't required at all):
Within `program.cpp` :
```cpp
#ifdef GEN_TIME
#include "gen.hpp"
...
u32 gen_main()
{
...
}
#endif
// "Stage" agnostic code.
#ifndef GEN_TIME
#include "program.gen.cpp"
// Regular runtime dependent on the generated code here.
#endif
If using the library's provided build scripts:
```ps1
.\build.ps1 <compiler> <debug or omit> singleheader
```
The design uses a constructive builder API for the code to generate.
The user is provided `Code` objects that are used to build up the AST.
Example using each construction interface:
### Upfront
Validation and construction through a functional interface.
```cpp
Code t_uw = def_type( name(uw) );
Code t_allocator = def_type( name(allocator) );
Code t_string_const = def_type( name(char), def_specifiers( args( ESpecifier::Const, ESpecifier::Ptr ) ));
Code header;
{
Code num = def_variable( t_uw, name(Num) );
Code cap = def_variable( t_uw, name(Capacity) );
Code mem_alloc = def_variable( t_allocator, name(Allocator) );
Code body = def_struct_body( args( num, cap, mem_alloc ) );
header = def_struct( name(ArrayHeader), __, __, body );
}
```
### Parse
Validation through ast construction.
```cpp
Code header = parse_struct( code(
struct ArrayHeader
{
uw Num;
uw Capacity;
allocator Allocator;
};
));
```
### Untyped
No validation, just glorified text injection.
```cpp
Code header = code_str(
struct ArrayHeader
{
uw Num;
uw Capacity;
allocator Allocator;
};
);
```
`name` is a helper macro for providing a string literal with its size, intended for the name parameter of functions.
`code` is a helper macro for providing a string literal with its size, but intended for code string parameters.
`args` is a helper macro for providing the number of arguments to varadic constructors.
`code_str` is a helper macro for writting `untyped_str( code( <content> ))`
All three constrcuton interfaces will generate the following C code:
```cpp
struct ArrayHeader
{
uw Num;
uw Capacity;
allocator Allocator;
};
```
**Note: The formatting shown here is not how it will look. For your desired formatting its recommended to run a pass through the files with an auto-formatter.**
*(The library currently uses clang-format for formatting, beware its pretty slow...)*
## Building
See the [scripts directory](scripts/).
+67 -137
View File
@@ -1,62 +1,42 @@
## Documentation
# General Docs
The project has no external dependencies beyond:
[Top](../Readme.md)
* `errno.h`
* `stat.h`
* `stdarg.h`
* `stddef.h`
* `stdio.h`
* `copyfile.h` (Mac)
* `types.h` (Linux)
* `unistd.h` (Linux/Mac)
* `intrin.h` (Windows)
* `io.h` (Windows with gcc)
* `windows.h` (Windows)
Contains:
Dependencies for the project are wrapped within `GENCPP_ROLL_OWN_DEPENDENCIES` (Defining it will disable them).
The majority of the dependency's implementation was derived from the [c-zpl library](https://github.com/zpl-c/zpl).
* [AST_Design](./AST_Design.md): Overview of ASTs
* [AST Types](./AST_Types.md): Listing of all AST types along with their Code type interface.
* [Parsing](./Parsing.md): Overview of the parsing interface.
* [Parser Algo](./Parser_Algo.md): In-depth breakdown of the parser's implementation.
This library was written in a subset of C++ where the following are not used at all:
* RAII (Constructors/Destructors), lifetimes are managed using named static or regular functions.
* Language provide dynamic dispatch, RTTI
* Object-Oriented Inheritance
* Exceptions
Polymorphic & Member-functions are used as an ergonomic choice, along with a conserative use of operator overloads.
There are only 4 template definitions in the entire library. (`Array<Type>`, `Hashtable<Type>`, `swap<Type>`, and `AST/Code::cast<Type>`)
Two generic templated containers are used throughout the library:
* `template< class Type> struct Array`
* `template< class Type> struct HashTable`
Both Code and AST definitions have a `template< class Type> Code/AST :: cast()`. Its just an alternative way to explicitly cast to each other.
`template< class Type> swap( Type& a, Type& b)` is used over a macro.
Otherwise the library is free of any templates.
### *WHAT IS NOT PROVIDED*
### *CURRENTLY UNSUPPORTED*
**There is no support for validating expressions.**
Its difficult to parse without enough benefits (At the metaprogramming level).
I plan to add this only at the tail of the project parsing milestone.
Its a [todo](https://github.com/Ed94/gencpp/issues/49)
**Only trivial template support is provided.**
The intention is for only simple, non-recursive substitution.
The parameters of the template are treated like regular parameter AST entries.
**Only trivial template support is provided.**
The intention is for only simple, non-recursive substitution.
The parameters of the template are treated like regular parameter AST entries.
This means that the typename entry for the parameter AST would be either:
* `class`
* `typename`
* A fundamental type, function, or pointer type.
Anything beyond this usage is not supported by parse_template for arguments (at least not intentionally).
Use at your own mental peril.
***Concepts and Constraints are not supported***
Its a [todo](https://github.com/Ed94/gencpp/issues/21)
*Concepts and Constraints are not supported, its usage is non-trivial substitution.*
### Feature Macros:
* `GEN_DEFINE_ATTRIBUTE_TOKENS` : Allows user to define their own attribute macros for use in parsing.
* This can be generated using base.cpp.
* `GEN_DEFINE_LIBRARY_CORE_CONSTANTS` : Optional typename codes as they are non-standard to C/C++ and not necessary to library usage
* `GEN_DONT_ENFORCE_GEN_TIME_GUARD` : By default, the library ( gen.hpp/ gen.cpp ) expects the macro `GEN_TIME` to be defined, this disables that.
* `GEN_ENFORCE_STRONG_CODE_TYPES` : Enforces casts to filtered code types.
* `GEN_EXPOSE_BACKEND` : Will expose symbols meant for internal use only.
* `GEN_ROLL_OWN_DEPENDENCIES` : Optional override so that user may define the dependencies themselves.
* `GEN_DONT_ALLOW_INVALID_CODE` (Not implemented yet) : Will fail when an invalid code is constructed, parsed, or serialized.
* `GEN_C_LIKE_PP` : Setting to `<true or 1>` Will prevent usage of function defnitions using references and structs with member functions. Structs will still have user-defined operator conversions, for-range support, and other operator overloads
### The Data & Interface
@@ -65,87 +45,33 @@ As mentioned in root readme, the user is provided Code objects by calling the co
The AST is managed by the library and provided to the user via its interface.
However, the user may specifiy memory configuration.
Data layout of AST struct (Subject to heavily change with upcoming redesign):
[Data layout of AST struct (Subject to heavily change with upcoming todos)](../base/components/ast.hpp#L396-461)
```cpp
union {
struct
{
AST* InlineCmt; // Class, Constructor, Destructor, Enum, Friend, Functon, Operator, OpCast, Struct, Typedef, Using, Variable
AST* Attributes; // Class, Enum, Function, Struct, Typedef, Union, Using, Variable
AST* Specs; // Destructor, Function, Operator, Typename, Variable
union {
AST* InitializerList; // Constructor
AST* ParentType; // Class, Struct, ParentType->Next has a possible list of interfaces.
AST* ReturnType; // Function, Operator, Typename
AST* UnderlyingType; // Enum, Typedef
AST* ValueType; // Parameter, Variable
};
union {
AST* Macro; // Parameters
AST* BitfieldSize; // Variable (Class/Struct Data Member)
AST* Params; // Constructor, Function, Operator, Template, Typename
};
union {
AST* ArrExpr; // Typename
AST* Body; // Class, Constructr, Destructor, Enum, Function, Namespace, Struct, Union
AST* Declaration; // Friend, Template
AST* Value; // Parameter, Variable
};
union {
AST* NextVar; // Variable; Possible way to handle comma separated variables declarations. ( , NextVar->Specs NextVar->Name NextVar->ArrExpr = NextVar->Value )
AST* SpecsFuncSuffix; // Only used with typenames, to store the function suffix if typename is function signature.
};
};
StringCached Content; // Attributes, Comment, Execution, Include
struct {
SpecifierT ArrSpecs[AST::ArrSpecs_Cap]; // Specifiers
AST* NextSpecs; // Specifiers
};
};
union {
AST* Prev;
AST* Front;
AST* Last;
};
union {
AST* Next;
AST* Back;
};
AST* Parent;
StringCached Name;
CodeT Type;
ModuleFlag ModuleFlags;
union {
b32 IsFunction; // Used by typedef to not serialize the name field.
b32 IsParamPack; // Used by typename to know if type should be considered a parameter pack.
OperatorT Op;
AccessSpec ParentAccess;
s32 NumEntries;
};
s32 Token; // Handle to the token, stored in the CodeFile (Otherwise unretrivable)
```
https://github.com/Ed94/gencpp/blob/eea4ebf5c40d5d87baa465abfb1be30845b2377e/base/components/ast.hpp#L396-L461
*`CodeT` is a typedef for `ECode::Type` which has an underlying type of `u32`*
*`StringCahced` is a typedef for `Str` (a string slice), to denote it is an interned string*
*`CodeType` is enum taggin the type of code. Has an underlying type of `u32`*
*`OperatorT` is a typedef for `EOperator::Type` which has an underlying type of `u32`*
*`StringCahced` is a typedef for `String const`, to denote it is an interned string*
*`String` is the dynamically allocated string type for the library*
*`StrBuilder` is the dynamically allocated string type for the library*
AST widths are setup to be AST_POD_Size.
AST widths are setup to be AST_POD_Size.
The width dictates how much the static array can hold before it must give way to using an allocated array:
```cpp
constexpr static
uw ArrSpecs_Cap =
int AST_ArrSpecs_Cap =
(
AST_POD_Size
- sizeof(AST*) * 3
- sizeof(Code)
- sizeof(StringCached)
- sizeof(CodeT)
- sizeof(Code) * 2
- sizeof(Token*)
- sizeof(Code)
- sizeof(CodeType)
- sizeof(ModuleFlag)
- sizeof(u32)
)
/ sizeof(SpecifierT) -1; // -1 for 4 extra bytes (Odd num of AST*)
/ sizeof(Specifier) - 1;
```
*Ex: If the AST_POD_Size is 128 the capacity of the static array is 20.*
@@ -153,39 +79,39 @@ uw ArrSpecs_Cap =
Data Notes:
* The allocator definitions used are exposed to the user incase they want to dictate memory usage
* You'll find the memory handling in `init`, `deinit`, `reset`, `gen_string_allocator`, `get_cached_string`, `make_code`.
* Allocators are defined with the `AllocatorInfo` structure found in `dependencies\memory.hpp`
* You'll find the memory handling in `init`, `deinit`, `reset`, `gen_strbuilder_allocator`, `get_cached_string`, `make_code`.
* Allocators are defined with the `AllocatorInfo` structure found in [`memory.hpp`](../base/dependencies/memory.hpp)
* Most of the work is just defining the allocation procedure:
```cpp
void* ( void* allocator_data, AllocType type, sw size, sw alignment, void* old_memory, sw old_size, u64 flags );
void* ( void* allocator_data, AllocType type, ssize size, ssize alignment, void* old_memory, ssize old_size, u64 flags );
```
* ASTs are wrapped for the user in a Code struct which is a wrapper for a AST* type.
* Both AST and Code have member symbols but their data layout is enforced to be POD types.
* Code types have member symbols but their data layout is enforced to be POD types.
* This library treats memory failures as fatal.
* Cached Strings are stored in their own set of arenas. AST constructors use cached strings for names, and content.
* `StringArenas`, `StringCache`, `Allocator_StringArena`, and `Allocator_StringTable` are the associated containers or allocators.
* Strings used for serialization and file buffers are not contained by those used for cached strings.
* They are currently using `GlobalAllocator`, which are tracked array of arenas that grows as needed (adds buckets when one runs out).
* Memory within the buckets is not reused, so its inherently wasteful.
* I will be augmenting the single arena with a simple slag allocator.
* Linked lists used children nodes on bodies, and parameters.
* I will be augmenting the default allocator with virtual memory & a slab allocator in the [future](https://github.com/Ed94/gencpp/issues/12)
* Intrusive linked lists used children nodes on bodies, and parameters.
* Its intended to generate the AST in one go and serialize after. The constructors and serializer are designed to be a "one pass, front to back" setup.
* Allocations can be tuned by defining the folloiwng macros:
* Allocations can be tuned by defining the folloiwng macros (will be moved to runtime configuration in the future):
* `GEN_GLOBAL_BUCKET_SIZE` : Size of each bucket area for the global allocator
* `GEN_CODEPOOL_NUM_BLOCKS` : Number of blocks per code pool in the code allocator
* `GEN_SIZE_PER_STRING_ARENA` : Size per arena used with string caching.
* `GEN_MAX_COMMENT_LINE_LENGTH` : Longest length a comment can have per line.
* `GEN_MAX_NAME_LENGTH` : Max length of any identifier.
* `GEN_MAX_UNTYPED_STR_LENGTH` : Max content length for any untyped code.
* `GEN_TOKEN_FMT_TOKEN_MAP_MEM_SIZE` : token_fmt_va uses local_persit memory of this size for the hashtable.
* `TokenMap_FixedArena` : token_fmt_va uses local_persit memory of this arena type for the hashtable.
* `GEN_LEX_ALLOCATOR_SIZE`
* `GEN_BUILDER_STR_BUFFER_RESERVE`
The following CodeTypes are used which the user may optionally use strong typing with if they enable: `GEN_ENFORCE_STRONG_CODE_TYPES`
* CodeBody : Has support for `for-range` iterating across Code objects.
* CodeBody : Has support for `for : range` iterating across Code objects.
* CodeAttributes
* CodeComment
* CodeClass
@@ -202,13 +128,13 @@ The following CodeTypes are used which the user may optionally use strong typing
* CodeNS
* CodeOperator
* CodeOpCast
* CodeParam : Has support for `for-range` iterating across parameters.
* CodeParams : Has support for `for : range` iterating across parameters.
* CodePreprocessCond
* CodePragma
* CodeSpecifiers : Has support for `for-range` iterating across specifiers.
* CodeSpecifiers : Has support for `for : range` iterating across specifiers.
* CodeStruct
* CodeTemplate
* CodeType
* CodeTypename
* CodeTypedef
* CodeUnion
* CodeUsing
@@ -293,6 +219,7 @@ Code <name>
```
When using the body functions, its recommended to use the args macro to auto determine the number of arguments for the varadic:
```cpp
def_global_body( args( ht_entry, array_ht_entry, hashtable ));
@@ -300,7 +227,7 @@ def_global_body( args( ht_entry, array_ht_entry, hashtable ));
def_global_body( 3, ht_entry, array_ht_entry, hashtable );
```
If a more incremental approach is desired for the body ASTs, `Code def_body( CodeT type )` can be used to create an empty body.
If a more incremental approach is desired for the body ASTs, `Code def_body( CodeT type )` can be used to create an empty body.
When the members have been populated use: `AST::validate_body` to verify that the members are valid entires for that type.
### Parse construction
@@ -352,7 +279,7 @@ Interface :
* untyped_fmt
* untyped_token_fmt
During serialization any untyped Code AST has its string value directly injected inline of whatever context the content existed as an entry within.
During serialization any untyped Code AST has its string value directly injected inline of whatever context the content existed as an entry within.
Even though these are not validated from somewhat correct c/c++ syntax or components, it doesn't mean that Untyped code can be added as any component of a Code AST:
* Untyped code cannot have children, thus there cannot be recursive injection this way.
@@ -373,6 +300,7 @@ Code <name> = untyped_str( code(
```
Optionally, `code_str`, and `code_fmt` macros can be used so that the code macro doesn't have to be used:
```cpp
Code <name> = code_str( <some code without "" quotes > )
```
@@ -380,7 +308,7 @@ Code <name> = code_str( <some code without "" quotes > )
Template metaprogramming in the traditional sense becomes possible with the use of `token_fmt` and parse constructors:
```cpp
StrC value = txt("Something");
Str value = txt("Something");
char const* template_str = txt(
Code with <key> to replace with token_values
@@ -402,8 +330,8 @@ The following are provided predefined by the library as they are commonly used:
* `module_global_fragment`
* `module_private_fragment`
* `fmt_newline`
* `param_varaidc` (Used for varadic definitions)
* `pragma_once`
* `param_varaidc` (Used for varadic definitions)
* `preprocess_else`
* `preprocess_endif`
* `spec_const`
@@ -412,13 +340,14 @@ The following are provided predefined by the library as they are commonly used:
* `spec_constinit`
* `spec_extern_linkage` (extern)
* `spec_final`
* `Spec_gb_inline`
* `spec_forceinline`
* `spec_global` (global macro)
* `spec_inline`
* `spec_internal_linkage` (internal macro)
* `spec_local_persist` (local_persist macro)
* `spec_mutable`
* `spec_neverinline`
* `spec_noexcept`
* `spec_override`
* `spec_ptr`
* `spec_pure`
@@ -450,8 +379,8 @@ Optionally the following may be defined if `GEN_DEFINE_LIBRARY_CODE_CONSTANTS` i
* `t_u16`
* `t_u32`
* `t_u64`
* `t_sw` (ssize_t)
* `t_uw` (size_t)
* `t_ssize` (ssize_t)
* `t_usize` (size_t)
* `t_f32`
* `t_f64`
@@ -469,15 +398,12 @@ and have the desired specifiers assigned to them beforehand.
## Code generation and modification
There are three provided auxillary interfaces:
There are two provided auxillary interfaces:
* Builder
* Editor
* Scanner
Editor and Scanner are disabled by default, use `GEN_FEATURE_EDITOR` and `GEN_FEATURE_SCANNER` to enable them.
### Builder is a similar object to the jai language's string_builder
### Builder is a similar object to the jai language's strbuilder_builder
* The purpose of it is to generate a file.
* A file is specified and opened for writing using the open( file_path) function.
@@ -486,4 +412,8 @@ Editor and Scanner are disabled by default, use `GEN_FEATURE_EDITOR` and `GEN_FE
### Scanner Auxillary Interface
Provides *(eventually)* `scan_file` to automatically populate a CodeFile which contains a parsed AST (`Code`) of the file, with any contextual failures that are reported from the parser.
* The purpose is to scan or parse files
* Some with two basic functions to convert a fil to code: `scan_file` and `parse_file`
* `scan_file`: Merely grabs the file and stores it in an untyped Code.
* `parse_file`: Will parse the file using `parse_global_body` and return a `CodeBody`.
* Two basic functions for grabbing columns from a CSV: `parse_csv_one_column` and `parse_csv_two_columns`
-62
View File
@@ -1,62 +0,0 @@
// This file was generated automatially by gencpp's bootstrap.cpp (See: https://github.com/Ed94/gencpp)
#include "gen.builder.hpp"
GEN_NS_BEGIN
Builder Builder::open( char const* path )
{
Builder result;
FileError error = file_open_mode( &result.File, EFileMode_WRITE, path );
if ( error != EFileError_NONE )
{
log_failure( "gen::File::open - Could not open file: %s", path );
return result;
}
result.Buffer = String::make_reserve( GlobalAllocator, Builder_StrBufferReserve );
// log_fmt("$Builder - Opened file: %s\n", result.File.filename );
return result;
}
void Builder::pad_lines( s32 num )
{
Buffer.append( "\n" );
}
void Builder::print( Code code )
{
String str = code->to_string();
// const sw len = str.length();
// log_fmt( "%s - print: %.*s\n", File.filename, len > 80 ? 80 : len, str.Data );
Buffer.append( str );
}
void Builder::print_fmt( char const* fmt, ... )
{
sw res;
char buf[GEN_PRINTF_MAXLEN] = { 0 };
va_list va;
va_start( va, fmt );
res = str_fmt_va( buf, count_of( buf ) - 1, fmt, va ) - 1;
va_end( va );
// log_fmt( "$%s - print_fmt: %.*s\n", File.filename, res > 80 ? 80 : res, buf );
Buffer.append( buf, res );
}
void Builder::write()
{
bool result = file_write( &File, Buffer, Buffer.length() );
if ( result == false )
log_failure( "gen::File::write - Failed to write to file: %s\n", file_name( &File ) );
log_fmt( "Generated: %s\n", File.filename );
file_close( &File );
Buffer.free();
}
GEN_NS_END
-24
View File
@@ -1,24 +0,0 @@
// This file was generated automatially by gencpp's bootstrap.cpp (See: https://github.com/Ed94/gencpp)
#pragma once
#include "gen.hpp"
GEN_NS_BEGIN
struct Builder
{
FileInfo File;
String Buffer;
static Builder open( char const* path );
void pad_lines( s32 num );
void print( Code );
void print_fmt( char const* fmt, ... );
void write();
};
GEN_NS_END
-12844
View File
File diff suppressed because it is too large Load Diff
File diff suppressed because it is too large Load Diff
File diff suppressed because it is too large Load Diff
+24268 -4443
View File
File diff suppressed because it is too large Load Diff
File diff suppressed because it is too large Load Diff
-593
View File
@@ -1,593 +0,0 @@
// This file was generated automatially by gencpp's bootstrap.cpp (See: https://github.com/Ed94/gencpp)
#pragma once
#include "gen.hpp"
GEN_NS_BEGIN
#pragma region ADT
enum ADT_Type : u32
{
EADT_TYPE_UNINITIALISED, /* node was not initialised, this is a programming error! */
EADT_TYPE_ARRAY,
EADT_TYPE_OBJECT,
EADT_TYPE_STRING,
EADT_TYPE_MULTISTRING,
EADT_TYPE_INTEGER,
EADT_TYPE_REAL,
};
enum ADT_Props : u32
{
EADT_PROPS_NONE,
EADT_PROPS_NAN,
EADT_PROPS_NAN_NEG,
EADT_PROPS_INFINITY,
EADT_PROPS_INFINITY_NEG,
EADT_PROPS_FALSE,
EADT_PROPS_TRUE,
EADT_PROPS_NULL,
EADT_PROPS_IS_EXP,
EADT_PROPS_IS_HEX,
// Used internally so that people can fill in real numbers they plan to write.
EADT_PROPS_IS_PARSED_REAL,
};
enum ADT_NamingStyle : u32
{
EADT_NAME_STYLE_DOUBLE_QUOTE,
EADT_NAME_STYLE_SINGLE_QUOTE,
EADT_NAME_STYLE_NO_QUOTES,
};
enum ADT_AssignStyle : u32
{
EADT_ASSIGN_STYLE_COLON,
EADT_ASSIGN_STYLE_EQUALS,
EADT_ASSIGN_STYLE_LINE,
};
enum ADT_DelimStyle : u32
{
EADT_DELIM_STYLE_COMMA,
EADT_DELIM_STYLE_LINE,
EADT_DELIM_STYLE_NEWLINE,
};
enum ADT_Error : u32
{
EADT_ERROR_NONE,
EADT_ERROR_INTERNAL,
EADT_ERROR_ALREADY_CONVERTED,
EADT_ERROR_INVALID_TYPE,
EADT_ERROR_OUT_OF_MEMORY,
};
struct ADT_Node
{
char const* name;
struct ADT_Node* parent;
/* properties */
ADT_Type type : 4;
u8 props : 4;
#ifndef GEN_PARSER_DISABLE_ANALYSIS
u8 cfg_mode : 1;
u8 name_style : 2;
u8 assign_style : 2;
u8 delim_style : 2;
u8 delim_line_width : 4;
u8 assign_line_width : 4;
#endif
/* adt data */
union
{
char const* string;
Array<ADT_Node> nodes; ///< zpl_array
struct
{
union
{
f64 real;
s64 integer;
};
#ifndef GEN_PARSER_DISABLE_ANALYSIS
/* number analysis */
s32 base;
s32 base2;
u8 base2_offset : 4;
s8 exp : 4;
u8 neg_zero : 1;
u8 lead_digit : 1;
#endif
};
};
};
/* ADT NODE LIMITS
* delimiter and assignment segment width is limited to 128 whitespace symbols each.
* real number limits decimal position to 128 places.
* real number exponent is limited to 64 digits.
*/
/**
* @brief Initialise an ADT object or array
*
* @param node
* @param backing Memory allocator used for descendants
* @param name Node's name
* @param is_array
* @return error code
*/
u8 adt_make_branch( ADT_Node* node, AllocatorInfo backing, char const* name, b32 is_array );
/**
* @brief Destroy an ADT branch and its descendants
*
* @param node
* @return error code
*/
u8 adt_destroy_branch( ADT_Node* node );
/**
* @brief Initialise an ADT leaf
*
* @param node
* @param name Node's name
* @param type Node's type (use zpl_adt_make_branch for container nodes)
* @return error code
*/
u8 adt_make_leaf( ADT_Node* node, char const* name, ADT_Type type );
/**
* @brief Fetch a node using provided URI string.
*
* This method uses a basic syntax to fetch a node from the ADT. The following features are available
* to retrieve the data:
*
* - "a/b/c" navigates through objects "a" and "b" to get to "c"
* - "arr/[foo=123]/bar" iterates over "arr" to find any object with param "foo" that matches the value "123", then gets its field called "bar"
* - "arr/3" retrieves the 4th element in "arr"
* - "arr/[apple]" retrieves the first element of value "apple" in "arr"
*
* @param node ADT node
* @param uri Locator string as described above
* @return zpl_adt_node*
*
* @see code/apps/examples/json_get.c
*/
ADT_Node* adt_query( ADT_Node* node, char const* uri );
/**
* @brief Find a field node within an object by the given name.
*
* @param node
* @param name
* @param deep_search Perform search recursively
* @return zpl_adt_node * node
*/
ADT_Node* adt_find( ADT_Node* node, char const* name, b32 deep_search );
/**
* @brief Allocate an unitialised node within a container at a specified index.
*
* @param parent
* @param index
* @return zpl_adt_node * node
*/
ADT_Node* adt_alloc_at( ADT_Node* parent, sw index );
/**
* @brief Allocate an unitialised node within a container.
*
* @param parent
* @return zpl_adt_node * node
*/
ADT_Node* adt_alloc( ADT_Node* parent );
/**
* @brief Move an existing node to a new container at a specified index.
*
* @param node
* @param new_parent
* @param index
* @return zpl_adt_node * node
*/
ADT_Node* adt_move_node_at( ADT_Node* node, ADT_Node* new_parent, sw index );
/**
* @brief Move an existing node to a new container.
*
* @param node
* @param new_parent
* @return zpl_adt_node * node
*/
ADT_Node* adt_move_node( ADT_Node* node, ADT_Node* new_parent );
/**
* @brief Swap two nodes.
*
* @param node
* @param other_node
* @return
*/
void adt_swap_nodes( ADT_Node* node, ADT_Node* other_node );
/**
* @brief Remove node from container.
*
* @param node
* @return
*/
void adt_remove_node( ADT_Node* node );
/**
* @brief Initialise a node as an object
*
* @param obj
* @param name
* @param backing
* @return
*/
b8 adt_set_obj( ADT_Node* obj, char const* name, AllocatorInfo backing );
/**
* @brief Initialise a node as an array
*
* @param obj
* @param name
* @param backing
* @return
*/
b8 adt_set_arr( ADT_Node* obj, char const* name, AllocatorInfo backing );
/**
* @brief Initialise a node as a string
*
* @param obj
* @param name
* @param value
* @return
*/
b8 adt_set_str( ADT_Node* obj, char const* name, char const* value );
/**
* @brief Initialise a node as a float
*
* @param obj
* @param name
* @param value
* @return
*/
b8 adt_set_flt( ADT_Node* obj, char const* name, f64 value );
/**
* @brief Initialise a node as a signed integer
*
* @param obj
* @param name
* @param value
* @return
*/
b8 adt_set_int( ADT_Node* obj, char const* name, s64 value );
/**
* @brief Append a new node to a container as an object
*
* @param parent
* @param name
* @return*
*/
ADT_Node* adt_append_obj( ADT_Node* parent, char const* name );
/**
* @brief Append a new node to a container as an array
*
* @param parent
* @param name
* @return*
*/
ADT_Node* adt_append_arr( ADT_Node* parent, char const* name );
/**
* @brief Append a new node to a container as a string
*
* @param parent
* @param name
* @param value
* @return*
*/
ADT_Node* adt_append_str( ADT_Node* parent, char const* name, char const* value );
/**
* @brief Append a new node to a container as a float
*
* @param parent
* @param name
* @param value
* @return*
*/
ADT_Node* adt_append_flt( ADT_Node* parent, char const* name, f64 value );
/**
* @brief Append a new node to a container as a signed integer
*
* @param parent
* @param name
* @param value
* @return*
*/
ADT_Node* adt_append_int( ADT_Node* parent, char const* name, s64 value );
/* parser helpers */
/**
* @brief Parses a text and stores the result into an unitialised node.
*
* @param node
* @param base
* @return*
*/
char* adt_parse_number( ADT_Node* node, char* base );
/**
* @brief Parses a text and stores the result into an unitialised node.
* This function expects the entire input to be a number.
*
* @param node
* @param base
* @return*
*/
char* adt_parse_number_strict( ADT_Node* node, char* base_str );
/**
* @brief Parses and converts an existing string node into a number.
*
* @param node
* @return
*/
ADT_Error adt_str_to_number( ADT_Node* node );
/**
* @brief Parses and converts an existing string node into a number.
* This function expects the entire input to be a number.
*
* @param node
* @return
*/
ADT_Error adt_str_to_number_strict( ADT_Node* node );
/**
* @brief Prints a number into a file stream.
*
* The provided file handle can also be a memory mapped stream.
*
* @see zpl_file_stream_new
* @param file
* @param node
* @return
*/
ADT_Error adt_print_number( FileInfo* file, ADT_Node* node );
/**
* @brief Prints a string into a file stream.
*
* The provided file handle can also be a memory mapped stream.
*
* @see zpl_file_stream_new
* @param file
* @param node
* @param escaped_chars
* @param escape_symbol
* @return
*/
ADT_Error adt_print_string( FileInfo* file, ADT_Node* node, char const* escaped_chars, char const* escape_symbol );
#pragma endregion ADT
#pragma region CSV
enum CSV_Error : u32
{
ECSV_Error__NONE,
ECSV_Error__INTERNAL,
ECSV_Error__UNEXPECTED_END_OF_INPUT,
ECSV_Error__MISMATCHED_ROWS,
};
typedef ADT_Node CSV_Object;
GEN_DEF_INLINE u8 csv_parse( CSV_Object* root, char* text, AllocatorInfo allocator, b32 has_header );
u8 csv_parse_delimiter( CSV_Object* root, char* text, AllocatorInfo allocator, b32 has_header, char delim );
void csv_free( CSV_Object* obj );
GEN_DEF_INLINE void csv_write( FileInfo* file, CSV_Object* obj );
GEN_DEF_INLINE String csv_write_string( AllocatorInfo a, CSV_Object* obj );
void csv_write_delimiter( FileInfo* file, CSV_Object* obj, char delim );
String csv_write_string_delimiter( AllocatorInfo a, CSV_Object* obj, char delim );
/* inline */
GEN_IMPL_INLINE u8 csv_parse( CSV_Object* root, char* text, AllocatorInfo allocator, b32 has_header )
{
return csv_parse_delimiter( root, text, allocator, has_header, ',' );
}
GEN_IMPL_INLINE void csv_write( FileInfo* file, CSV_Object* obj )
{
csv_write_delimiter( file, obj, ',' );
}
GEN_IMPL_INLINE String csv_write_string( AllocatorInfo a, CSV_Object* obj )
{
return csv_write_string_delimiter( a, obj, ',' );
}
#pragma endregion CSV
// This is a simple file reader that reads the entire file into memory.
// It has an extra option to skip the first few lines for undesired includes.
// This is done so that includes can be kept in dependency and component files so that intellisense works.
Code scan_file( char const* path )
{
FileInfo file;
FileError error = file_open_mode( &file, EFileMode_READ, path );
if ( error != EFileError_NONE )
{
GEN_FATAL( "scan_file: Could not open: %s", path );
}
sw fsize = file_size( &file );
if ( fsize <= 0 )
{
GEN_FATAL( "scan_file: %s is empty", path );
}
String str = String::make_reserve( GlobalAllocator, fsize );
file_read( &file, str, fsize );
str.get_header().Length = fsize;
// Skip GEN_INTELLISENSE_DIRECTIVES preprocessor blocks
// Its designed so that the directive should be the first thing in the file.
// Anything that comes before it will also be omitted.
{
#define current ( *scanner )
#define matched 0
#define move_fwd() \
do \
{ \
++scanner; \
--left; \
} while ( 0 )
const StrC directive_start = txt( "ifdef" );
const StrC directive_end = txt( "endif" );
const StrC def_intellisense = txt( "GEN_INTELLISENSE_DIRECTIVES" );
bool found_directive = false;
char const* scanner = str.Data;
s32 left = fsize;
while ( left )
{
// Processing directive.
if ( current == '#' )
{
move_fwd();
while ( left && char_is_space( current ) )
move_fwd();
if ( ! found_directive )
{
if ( left && str_compare( scanner, directive_start.Ptr, directive_start.Len ) == matched )
{
scanner += directive_start.Len;
left -= directive_start.Len;
while ( left && char_is_space( current ) )
move_fwd();
if ( left && str_compare( scanner, def_intellisense.Ptr, def_intellisense.Len ) == matched )
{
scanner += def_intellisense.Len;
left -= def_intellisense.Len;
found_directive = true;
}
}
// Skip to end of line
while ( left && current != '\r' && current != '\n' )
move_fwd();
move_fwd();
if ( left && current == '\n' )
move_fwd();
continue;
}
if ( left && str_compare( scanner, directive_end.Ptr, directive_end.Len ) == matched )
{
scanner += directive_end.Len;
left -= directive_end.Len;
// Skip to end of line
while ( left && current != '\r' && current != '\n' )
move_fwd();
move_fwd();
if ( left && current == '\n' )
move_fwd();
// sptr skip_size = fsize - left;
if ( ( scanner + 2 ) >= ( str.Data + fsize ) )
{
mem_move( str, scanner, left );
str.get_header().Length = left;
break;
}
mem_move( str, scanner, left );
str.get_header().Length = left;
break;
}
}
move_fwd();
}
#undef move_fwd
#undef matched
#undef current
}
file_close( &file );
return untyped_str( str );
}
#if 0
struct CodeFile
{
using namespace Parser;
String FilePath;
TokArray Tokens;
Array<ParseFailure> ParseFailures;
Code CodeRoot;
};
namespace Parser
{
struct ParseFailure
{
String Reason;
Code Node;
};
}
CodeFile scan_file( char const* path )
{
using namespace Parser;
CodeFile
result = {};
result.FilePath = String::make( GlobalAllocator, path );
Code code = scan_file( path );
result.CodeRoot = code;
ParseContext context = parser_get_last_context();
result.Tokens = context.Tokens;
result.ParseFailures = context.Failures;
return result;
}
#endif
GEN_NS_END
-3
View File
@@ -14,9 +14,6 @@ Import-Module $format_cpp
$vendor = $null
$release = $null
$verbose = $false
[bool] $bootstrap = $false
[bool] $singleheader = $false
[bool] $test = $false
[array] $vendors = @( "clang", "msvc" )
+9 -8
View File
@@ -195,11 +195,12 @@ if ( $vendor -match "clang" )
$compiler_args += $flag_no_optimization
}
if ( $debug ) {
$compiler_args += ( $flag_define + 'Build_Debug=1' )
$compiler_args += ( $flag_define + 'GEN_BUILD_DEBUG=1' )
$compiler_args += $flag_debug, $flag_debug_codeview, $flag_profiling_debug
}
else {
$compiler_args += ( $flag_define + 'Build_Debug=0' )
$compiler_args += ( $flag_define + 'GEN_BUILD_DEBUG=0' )
$compiler_args += ( $flag_define + 'GEN_BUILD_DEBUG=0' )
}
$warning_ignores | ForEach-Object {
@@ -275,11 +276,11 @@ if ( $vendor -match "clang" )
$compiler_args += $flag_no_optimization
}
if ( $debug ) {
$compiler_args += ( $flag_define + 'Build_Debug=1' )
$compiler_args += ( $flag_define + 'GEN_BUILD_DEBUG=1' )
$compiler_args += $flag_debug, $flag_debug_codeview, $flag_profiling_debug
}
else {
$compiler_args += ( $flag_define + 'Build_Debug=0' )
$compiler_args += ( $flag_define + 'GEN_BUILD_DEBUG=0' )
}
$warning_ignores | ForEach-Object {
@@ -399,7 +400,7 @@ if ( $vendor -match "msvc" )
if ( $debug )
{
$compiler_args += $flag_debug
$compiler_args += ( $flag_define + 'Build_Debug=1' )
$compiler_args += ( $flag_define + 'GEN_BUILD_DEBUG=1' )
$compiler_args += ( $flag_path_debug + $path_output + '\' )
$compiler_args += $flag_link_win_rt_static_debug
@@ -408,7 +409,7 @@ if ( $vendor -match "msvc" )
}
}
else {
$compiler_args += ( $flag_define + 'Build_Debug=0' )
$compiler_args += ( $flag_define + 'GEN_BUILD_DEBUG=0' )
$compiler_args += $flag_link_win_rt_static
}
$compiler_args += $includes | ForEach-Object { $flag_include + $_ }
@@ -485,7 +486,7 @@ if ( $vendor -match "msvc" )
if ( $debug )
{
$compiler_args += $flag_debug
$compiler_args += ( $flag_define + 'Build_Debug=1' )
$compiler_args += ( $flag_define + 'GEN_BUILD_DEBUG=1' )
$compiler_args += ( $flag_path_debug + $path_output + '\' )
$compiler_args += $flag_link_win_rt_static_debug
@@ -494,7 +495,7 @@ if ( $vendor -match "msvc" )
}
}
else {
$compiler_args += ( $flag_define + 'Build_Debug=0' )
$compiler_args += ( $flag_define + 'GEN_BUILD_DEBUG=0' )
$compiler_args += $flag_link_win_rt_static
}
$compiler_args += $includes | ForEach-Object { $flag_include + $_ }