diff --git a/docs/AST_Types.md b/docs/AST_Types.md
index 9c1fc46..3f96ceb 100644
--- a/docs/AST_Types.md
+++ b/docs/AST_Types.md
@@ -140,6 +140,7 @@ Code Prev;
Code Next;
parser::Token* Tok;
Code Parent;
+StringCached Name;
CodeT Type;
```
@@ -155,6 +156,12 @@ Serialization:
{
}
+
+// Constructor Source Implementation
+ ::~Name>( )
+{
+
+}
```
## Define
@@ -191,6 +198,7 @@ Code Prev;
Code Next;
parser::Token* Tok;
Code Parent;
+StringCached Name;
CodeT Type;
```
@@ -205,6 +213,12 @@ Serialization:
{
}
+
+// Destructor Source Implementation
+ ::~Name>( )
+{
+
+}
```
## Enum
@@ -468,12 +482,13 @@ Serialization:
}
```
-## Parameters
+## Parameters (AST_Param)
Fields:
```cpp
CodeType ValueType;
+Code Macro;
Code Value;
CodeParam Last;
CodeParam Next;
@@ -487,7 +502,9 @@ s32 NumEntries;
Serialization:
```cpp
- , ...
+, ...
+
+ , ...
```
## Pragma
diff --git a/docs/Parser_Algo.md b/docs/Parser_Algo.md
index 9fb6c93..d085f3b 100644
--- a/docs/Parser_Algo.md
+++ b/docs/Parser_Algo.md
@@ -119,12 +119,20 @@ Below is an outline of the general alogirithim used for these internal procedure
5. If adjacent opening bracket
1. Repeat array declaration parse until no brackets remain
+## `parse_assignment_expression`
+
+1. Eat the assignment operator
+2. Make sure there is content or at least an end statement after.
+3. Flatten the assignment expression to an untyped Code string.
+
## `parse_attributes`
1. Check for standard attribute
2. Check for GNU attribute
3. Check for MSVC attribute
4. Check for a token registered as an attribute
+ a. Check and grab the arguments of a token registered of an attribute if it has any.
+5. Repeat for chained attributes. Flatten them to a single attribute AST node.
## `parse_class_struct`
@@ -142,39 +150,40 @@ Below is an outline of the general alogirithim used for these internal procedure
1. Opening curly brace
2. Parse the body (Possible options):
- 1. Newline : ast constant
- 2. Comment : `parse_comment`
- 3. Access_Public : ast constant
- 4. Access_Protected : ast constant
- 5. Access_Private : ast constant
- 6. Decl_Class : `parse_complicated_definition`
- 7. Decl_Enum : `parse_complicated_definition`
- 8. Decl_Friend : `parse_friend`
- 9. Decl_Operator : `parse_operator_cast`
- 10. Decl_Struct : `parse_complicated_definition`
- 11. Decl_Template : `parse_template`
- 12. Decl_Typedef : `parse_typedef`
- 13. Decl_Union : `parse_complicated_definition`
- 14. Decl_Using : `parse_using`
- 15. Operator == '~'
+ 1. Ignore dangling end statements
+ 2. Newline : ast constant
+ 3. Comment : `parse_comment`
+ 4. Access_Public : ast constant
+ 5. Access_Protected : ast constant
+ 6. Access_Private : ast constant
+ 7. Decl_Class : `parse_complicated_definition`
+ 8. Decl_Enum : `parse_complicated_definition`
+ 9. Decl_Friend : `parse_friend`
+ 10. Decl_Operator : `parse_operator_cast`
+ 11. Decl_Struct : `parse_complicated_definition`
+ 12. Decl_Template : `parse_template`
+ 13. Decl_Typedef : `parse_typedef`
+ 14. Decl_Union : `parse_complicated_definition`
+ 15. Decl_Using : `parse_using`
+ 16. Operator == '~'
1. `parse_destructor`
- 16. Preprocess_Define : `parse_define`
- 17. Preprocess_Include : `parse_include`
- 18. Preprocess_Conditional (if, ifdef, ifndef, elif, else, endif) : `parse_preprocess_cond` or else/endif ast constant
- 19. Preprocess_Macro : `parse_simple_preprocess`
- 20. Preprocess_Pragma : `parse_pragma`
- 21. Preprocess_Unsupported : `parse_simple_preprocess`
- 22. StaticAssert : `parse_static_assert`
- 23. The following compound into a resolved definition or declaration:
+ 17. Preprocess_Define : `parse_define`
+ 18. Preprocess_Include : `parse_include`
+ 19. Preprocess_Conditional (if, ifdef, ifndef, elif, else, endif) : `parse_preprocess_cond` or else/endif ast constant
+ 20. Preprocess_Macro : `parse_simple_preprocess`
+ 21. Preprocess_Pragma : `parse_pragma`
+ 22. Preprocess_Unsupported : `parse_simple_preprocess`
+ 23. StaticAssert : `parse_static_assert`
+ 24. The following compound into a resolved definition or declaration:
1. Attributes (Standard, GNU, MSVC) : `parse_attributes`
- 2. Specifiers (consteval, constexpr, constinit, forceinline, inline, mutable, neverinline, static, volatile)
+ 2. Specifiers (consteval, constexpr, constinit, explicit, forceinline, inline, mutable, neverinline, static, volatile, virtual)
3. Possible Destructor : `parse_destructor`
4. Possible User defined operator cast : `parse_operator_cast`
5. Possible Constructor : `parse_constructor`
6. Something that has the following: (identifier, const, unsigned, signed, short, long, bool, char, int, double)
1. Possible Constructor `parse_constructor`
2. Possible Operator, Function, or varaible : `parse_operator_function_or_variable`
- 24. Something completely unknown (will just make untyped...) : `parse_untyped`
+ 25. Something completely unknown (will just make untyped...) : `parse_untyped`
## `parse_comment`
@@ -197,15 +206,17 @@ A portion of the code in `parse_typedef` is very similar to this as both have to
2. If the token has a closing brace its an inplace definition
3. If the `token[-2]` is an identifier & `token[-3]` is the declaration type, its a variable using a namespaced type.
4. If the `token[-2]` is an indirection, then its a variable using a namespaced/forwarded type.
- 5. If any of the above is the case, `parse_operator_function_or_variable`
-4. If the previous token was a closing curly brace, its a definition : `parse_forward_or_definition`
-5. If the previous token was a closing square brace, its an array definition : `parse_operator_function_or_variable`
+ 5. If the `token[-2]` is an assign classifier, and the starting tokens were the which type with possible `class` token after, its an enum forward declaration.
+ 6. If any of the above is the case, `parse_operator_function_or_variable`
+4. If the `token[2]` is a vendor fundamental type (builtin) then it is an enum forward declaration.
+5. If the previous token was a closing curly brace, its a definition : `parse_forward_or_definition`
+6. If the previous token was a closing square brace, its an array definition : `parse_operator_function_or_variable`
## `parse_define`
1. Define directive
2. Get identifier
-3. Get Content
+3. Get Content (Optional)
## `parse_forward_or_definition`
@@ -243,36 +254,47 @@ In the future statements and expressions will be parsed.
1. Make sure this is being called for a valid type (namespace, global body, export body, linkage body)
2. If its not a global body, consume the opening curly brace
3. Parse the body (Possible options):
- 1. NewLine : ast constant
- 2. Comment : `parse_comment`
- 3. Decl_Cass : `parse_complicated_definition`
- 4. Decl_Enum : `parse_complicated_definition`
- 5. Decl_Extern_Linkage : `parse_extern_link`
- 6. Decl_Namespace : `parse_namespace`
- 7. Decl_Struct : `parse_complicated_definition`
- 8. Decl_Template : `parse_template`
- 9. Decl_Typedef : `parse_typedef`
- 10. Decl_Union : `parse_complicated_definition`
- 11. Decl_Using : `parse_using`
- 12. Preprocess_Define : `parse_define`
- 13. Preprocess_Include : `parse_include`
- 14. Preprocess_If, IfDef, IfNotDef, Elif : `parse_preprocess_cond`
- 15. Preprocess_Else : ast constant
- 16. Preprocess_Endif : ast constant
- 17. Preprocess_Macro : `parse_simple_preprocess`
- 18. Preprocess_Pragma : `parse_pragma`
- 19. Preprocess_Unsupported : `parse_simple_preprocess`
- 20. StaticAssert : `parse_static_assert`
- 21. Module_Export : `parse_export_body`
- 22. Module_Import : NOT_IMPLEMENTED
- 23. The following compound into a resolved definition or declaration:
+ 1. Ignore dangling end statements
+ 2. NewLine : ast constant
+ 3. Comment : `parse_comment`
+ 4. Decl_Cass : `parse_complicated_definition`
+ 5. Decl_Enum : `parse_complicated_definition`
+ 6. Decl_Extern_Linkage : `parse_extern_link`
+ 7. Decl_Namespace : `parse_namespace`
+ 8. Decl_Struct : `parse_complicated_definition`
+ 9. Decl_Template : `parse_template`
+ 10. Decl_Typedef : `parse_typedef`
+ 11. Decl_Union : `parse_complicated_definition`
+ 12. Decl_Using : `parse_using`
+ 13. Preprocess_Define : `parse_define`
+ 14. Preprocess_Include : `parse_include`
+ 15. Preprocess_If, IfDef, IfNotDef, Elif : `parse_preprocess_cond`
+ 16. Preprocess_Else : ast constant
+ 17. Preprocess_Endif : ast constant
+ 18. Preprocess_Macro : `parse_simple_preprocess`
+ 19. Preprocess_Pragma : `parse_pragma`
+ 20. Preprocess_Unsupported : `parse_simple_preprocess`
+ 21. StaticAssert : `parse_static_assert`
+ 22. Module_Export : `parse_export_body`
+ 23. Module_Import : NOT_IMPLEMENTED
+ 24. The following compound into a resolved definition or declaration:
1. Attributes ( Standard, GNU, MSVC, Macro ) : `parse_attributes`
2. Specifiers ( consteval, constexpr, constinit, extern, forceinline, global, inline, internal_linkage, neverinline, static )
3. Is either ( identifier, const specifier, long, short, signed, unsigned, bool, char, double, int)
- 1. If its an operator cast (definition outside class) : `parse_operator_cast`
- 2. Its an operator, function, or varaible : `parse_operator_function_or_varaible`
+ 1. Attempt to parse as constrcutor or destructor : `parse_global_nspace_constructor_destructor`
+ 2. If its an operator cast (definition outside class) : `parse_operator_cast`
+ 3. Its an operator, function, or varaible : `parse_operator_function_or_varaible`
4. If its not a global body, consume the closing curly brace
+## `parse_global_nspace_constructor_destructor`
+
+1. Look ahead for the start of the arguments for a possible constructor/destructor
+2. Go back past the identifier
+3. Check to see if its a destructor by checking for the `~`
+4. Continue the next token should be a `::`
+5. Determine if the next valid identifier (ignoring possible template parameters) is the same as the first identifier of the function.
+6. If it is we have either a constructor or destructor so parse using their respective functions (`parse_constructor`, `parse_destructor`).
+
## `parse_identifier`
This is going to get heavily changed down the line to have a more broken down "identifier expression" so that the qualifier, template args, etc, can be distinguished between the targeted identifier.
@@ -284,6 +306,7 @@ The function can parse all of them, however the AST node compresses them all int
1. Consume `::`
2. Consume member identifier
3. `parse_template args` (for member identifier)
+ 4. If a `~` is encounted and the scope is for a destructor's identifier, do not consume it and return with what parsed.
## `parse_include`
@@ -329,15 +352,17 @@ When this function is called, attribute and specifiers may have been resolved, h
2. If the we immdiately find a closing token, consume it and finish.
3. If we encounter a varadic argument, consume it and return a `param_varadic` ast constant
4. `parse_type`
-5. If we have an identifier
+5. If we have a macro, parse it (Unreal has macros as tags to parameters and or as entire arguments).
+6. So long as next token isn't a comma
+ a. If we have an identifier
1. Consume it
2. Check for assignment:
- 1. Consume assign operator
- 2. Parse the expression
-6. While we continue to encounter commas
- 1. Consume them
- 2. Repeat steps 3 to 5.2.2
-7. Consume the closing token
+ a. Consume assign operator
+ b. Parse the expression
+7. While we continue to encounter commas
+ a. Consume them
+ b. Repeat steps 3 to 6.2.b
+8. Consume the closing token
## `parse_preprocess_cond`
@@ -456,6 +481,7 @@ This currently doesn't support postfix specifiers (planning to in the future)
2. If there is an assignment operator:
1. Consume operator
2. Consume the expression (assigned to untyped string for now)
+ 3. If a macro is encountered consume it (Unreal UMETA macro support)
3. If there is a comma, consume it
## `parse_export_body`
@@ -476,10 +502,9 @@ This currently doesn't support postfix specifiers (planning to in the future)
1. Consume `friend`
2. `parse_type`
-3. If the currok is an identifier its a function declaration (there is no support for inline definitions yet)
- 1. `parse_identifier`
- 2. `parse_params`
-4. Consume end statement
+3. If the currok is an identifier its a function declaration or definition
+ 1. `parse_function_after_name`
+4. Consume end statement so long as its not a function definion
5. Check for inline comment, `parse_comment` if exists
## `parse_function`
@@ -540,7 +565,8 @@ Note: This currently doesn't support templated operator casts (going to need to
5. The following compound into a resolved definition or declaration:
1. `parse_attributes`
2. Parse specifiers
- 3. `parse_operator_function_or_variable`
+ 3. Attempt to parse as constructor or destructor: `parse_global_nspace_constructor_destructor`
+ 4. Otherwise: `parse_operator_function_or_variable`
## `parse_type`
@@ -553,14 +579,15 @@ Anything that is in the qualifier capture of the function typename is treated as
1. `parse_attributes`
2. Parse specifiers
-3. This is where things get ugly for each of these depend on what the next token is.
+3. If the `parse_type` was called from a template parse, check to see if class was used instead of typname and consume as name.
+4. This is where things get ugly for each of these depend on what the next token is.
1. If its an in-place definition of a class, enum, struct, or union:
2. If its a decltype (Not supported yet but draft impl there)
3. If its a compound native type expression (unsigned, char, short, long, int, float, dobule, etc )
4. Ends up being a regular type alias of an identifier
-4. Parse specifiers (postfix)
-5. We need to now look ahead to see If we're dealing with a function typename
-6. If wer're dealing with a function typename:
+5. Parse specifiers (postfix)
+6. We need to now look ahead to see If we're dealing with a function typename
+7. If wer're dealing with a function typename:
1. Shove the specifiers, and identifier code we have so far into a return type typename's Name (untyped string)
1. Reset the specifiers code for the top-level typeanme
2. Check to see if the next token is an identifier:
@@ -571,7 +598,7 @@ Anything that is in the qualifier capture of the function typename is treated as
3. Consume `)`
4. `parse_params`
5. Parse postfix specifiers
-7. Check for varaidic argument (param pack) token:
+8. Check for varaidic argument (param pack) token:
1. Consume varadic argument token
### WIP - Alternative Algorithim
diff --git a/docs/Parsing.md b/docs/Parsing.md
index 91deb93..3c2348a 100644
--- a/docs/Parsing.md
+++ b/docs/Parsing.md
@@ -2,7 +2,7 @@
The library features a naive parser tailored for only what the library needs to construct the supported syntax of C++ into its AST.
-This parser does not, and should not do the compiler's job. By only supporting this minimal set of features, the parser is kept (so far) around 5500 loc. I hope to keep it under 10k loc worst case.
+This parser does not, and should not do the compiler's job. By only supporting this minimal set of features, the parser is kept (so far) around ~5600 loc. I hope to keep it under 10k loc worst case.
You can think of this parser of a frontend parser vs a semantic parser. Its intuitively similar to WYSIWYG. What you precerive as the syntax from the user-side before the compiler gets a hold of it, is what you get.
@@ -73,7 +73,7 @@ The lexing and parsing takes shortcuts from whats expected in the standard.
* The parse API treats any execution scope definitions with no validation and are turned into untyped Code ASTs.
* *This includes the assignment of variables.*
* Attributes ( `[[]]` (standard), `__declspec` (Microsoft), or `__attribute__` (GNU) )
- * Assumed to *come before specifiers* (`const`, `constexpr`, `extern`, `static`, etc) for a function
+ * Assumed to *come before specifiers* (`const`, `constexpr`, `extern`, `static`, etc) for a function or right afterthe return type.
* Or in the usual spot for class, structs, (*right after the declaration keyword*)
* typedefs have attributes with the type (`parse_type`)
* Parsing attributes can be extended to support user defined macros by defining `GEN_DEFINE_ATTRIBUTE_TOKENS` (see `gen.hpp` for the formatting)
diff --git a/docs/Readme.md b/docs/Readme.md
index d1f2bea..9c0f6a9 100644
--- a/docs/Readme.md
+++ b/docs/Readme.md
@@ -82,6 +82,7 @@ union {
AST* ValueType; // Parameter, Variable
};
union {
+ AST* Macro; // Parameters
AST* BitfieldSize; // Variable (Class/Struct Data Member)
AST* Params; // Constructor, Function, Operator, Template, Typename
};
@@ -461,6 +462,7 @@ The AST and constructors will be able to validate that the arguments provided fo
* If return type must match a parameter
* If number of parameters is correct
* If added as a member symbol to a class or struct, that operator matches the requirements for the class (types match up)
+* There is no support for validating new & delete operations (yet)
The user is responsible for making sure the code types provided are correct
and have the desired specifiers assigned to them beforehand.
diff --git a/project/Readme.md b/project/Readme.md
index 04ec394..d1f2030 100644
--- a/project/Readme.md
+++ b/project/Readme.md
@@ -10,8 +10,7 @@ Just like the `gen.` they include their components: `dependencies/.`. These are optional extensions or tools for the library.
-**TODO : Right now the library is not finished, as such the first self-hosting iteration is still WIP**
-Both libraries use *pre-generated* (self-hosting I guess) version of the library to then generate the latest version of itself.
+Both libraries use *pre-generated* (self-hosting I guess) version of the library to then generate the latest version of itself.
The default `gen.bootstrap.cpp` located in the project folder is meant to be produce a standard segmented library, where the components of the library
have relatively dedicated header and source files. Dependencies included at the top of the file and each header starting with a pragma once.
@@ -52,7 +51,7 @@ Names or Content fields are interned strings and thus showed be cached using `ge
The library has its code segmented into component files, use it to help create a derived version without needing to have to rewrite a generated file directly or build on top of the header via composition or inheritance.
-The parser is documented under `docs/Parsing.md` and `docs/Parser_Algo.md`.
+The parser is documented under `docs/Parsing.md` and `docs/Parser_Algo.md`.
## A note on compilation and runtime generation speed
diff --git a/project/components/parser.cpp b/project/components/parser.cpp
index 6f131b9..632c510 100644
--- a/project/components/parser.cpp
+++ b/project/components/parser.cpp
@@ -554,6 +554,40 @@ Code parse_array_decl()
return { nullptr };
}
+internal inline
+Code parse_assignment_expression()
+{
+ Code expr = { nullptr };
+
+ eat( TokType::Operator );
+ // =
+
+ Token expr_tok = currtok;
+
+ if ( currtok.Type == TokType::Statement_End && currtok.Type != TokType::Comma )
+ {
+ log_failure( "Expected expression after assignment operator\n%s", Context.to_string() );
+ Context.pop();
+ return CodeInvalid;
+ }
+
+ s32 level = 0;
+ while ( left && currtok.Type != TokType::Statement_End && (currtok.Type != TokType::Comma || level > 0) )
+ {
+ if (currtok.Type == TokType::Capture_Start)
+ level++;
+ else if (currtok.Type == TokType::Capture_End)
+ level--;
+
+ eat( currtok.Type );
+ }
+
+ expr_tok.Length = ( ( sptr )currtok.Text + currtok.Length ) - ( sptr )expr_tok.Text - 1;
+ expr = untyped_str( expr_tok );
+ // =
+ return expr;
+}
+
internal inline
CodeAttributes parse_attributes()
{
@@ -1311,14 +1345,17 @@ CodeDefine parse_define()
eat( TokType::Identifier );
// #define
+ // Defines don't necessarily need content.
+#if 0
if ( ! check( TokType::Preprocess_Content ))
{
log_failure( "Error, expected content after #define %s\n%s", define->Name, Context.to_string() );
Context.pop();
return CodeInvalid;
}
+#endif
- if ( currtok.Length == 0 )
+ if ( check(TokType::Preprocess_Content) && currtok.Length != 0 )
{
define->Content = get_cached_string( currtok );
eat( TokType::Preprocess_Content );
@@ -1336,40 +1373,6 @@ CodeDefine parse_define()
return define;
}
-internal inline
-Code parse_assignment_expression()
-{
- Code expr = { nullptr };
-
- eat( TokType::Operator );
- // =
-
- Token expr_tok = currtok;
-
- if ( currtok.Type == TokType::Statement_End && currtok.Type != TokType::Comma )
- {
- log_failure( "Expected expression after assignment operator\n%s", Context.to_string() );
- Context.pop();
- return CodeInvalid;
- }
-
- s32 level = 0;
- while ( left && currtok.Type != TokType::Statement_End && (currtok.Type != TokType::Comma || level > 0) )
- {
- if (currtok.Type == TokType::Capture_Start)
- level++;
- else if (currtok.Type == TokType::Capture_End)
- level--;
-
- eat( currtok.Type );
- }
-
- expr_tok.Length = ( ( sptr )currtok.Text + currtok.Length ) - ( sptr )expr_tok.Text - 1;
- expr = untyped_str( expr_tok );
- // =
- return expr;
-}
-
internal inline
Code parse_forward_or_definition( TokType which, bool is_inplace )
{
@@ -3192,7 +3195,8 @@ CodeVar parse_variable_after_name(
Note(Ed): This does not support the following:
* Function Pointers
*/
-internal CodeVar parse_variable_declaration_list()
+internal
+CodeVar parse_variable_declaration_list()
{
push_scope();
@@ -4328,7 +4332,8 @@ CodeTemplate parse_template()
The excess whitespace cannot be stripped however, because there is no semantic awareness within the first capture group.
*/
-internal CodeType parse_type( bool from_template, bool* typedef_is_function )
+internal
+CodeType parse_type( bool from_template, bool* typedef_is_function )
{
push_scope();
diff --git a/scripts/gencpp.natvis b/scripts/gencpp.natvis
index 00222f9..7b94b25 100644
--- a/scripts/gencpp.natvis
+++ b/scripts/gencpp.natvis
@@ -121,6 +121,20 @@
+
+ {Name} Type: {Type}
+
+ - InlineCmt
+ - Specs
+ - InitializerList
+ - Params
+ - Body
+ - Parent
+ - Prev
+ - Next
+
+
+
{Name} Type: {Type}