mirror of
https://github.com/Ed94/gencpp.git
synced 2024-12-22 07:44:45 -08:00
Progress on parser documentation
This commit is contained in:
parent
15847f10b9
commit
adbcb2a83b
@ -4133,13 +4133,6 @@ CodeFn parser_parse_function()
|
|||||||
}
|
}
|
||||||
// <export> <Attributes> <Specifiers>
|
// <export> <Attributes> <Specifiers>
|
||||||
|
|
||||||
// Note(Ed): We're enforcing that using this codepath requires non-macro jank.
|
|
||||||
// Code macro_stmt = parse_macro_as_definiton(attributes, specifiers);
|
|
||||||
// if (macro_stmt) {
|
|
||||||
// parser_pop(& _ctx->parser);
|
|
||||||
// return macro_stmt;
|
|
||||||
// }
|
|
||||||
|
|
||||||
CodeTypename ret_type = parser_parse_type(parser_not_from_template, nullptr);
|
CodeTypename ret_type = parser_parse_type(parser_not_from_template, nullptr);
|
||||||
if ( cast(Code, ret_type) == Code_Invalid ) {
|
if ( cast(Code, ret_type) == Code_Invalid ) {
|
||||||
parser_pop(& _ctx->parser);
|
parser_pop(& _ctx->parser);
|
||||||
|
@ -132,8 +132,6 @@ enum MacroType : u16
|
|||||||
MT_Expression, // A macro is assumed to be a expression if not resolved.
|
MT_Expression, // A macro is assumed to be a expression if not resolved.
|
||||||
MT_Statement,
|
MT_Statement,
|
||||||
MT_Typename,
|
MT_Typename,
|
||||||
MT_Attribute, // More of a note to the parser than anythign else (attributes should be defined in the user attribues def).
|
|
||||||
MT_Specifier, // More of a note to the parser than anythign else (specifiers should be defined in the user attribues def).
|
|
||||||
MT_Block_Start, // Not Supported yet
|
MT_Block_Start, // Not Supported yet
|
||||||
MT_Block_End, // Not Supported yet
|
MT_Block_End, // Not Supported yet
|
||||||
MT_Case_Statement, // Not Supported yet
|
MT_Case_Statement, // Not Supported yet
|
||||||
@ -160,8 +158,6 @@ Str macrotype_to_str( MacroType type )
|
|||||||
{ "Statement", sizeof("Statement") - 1 },
|
{ "Statement", sizeof("Statement") - 1 },
|
||||||
{ "Expression", sizeof("Expression") - 1 },
|
{ "Expression", sizeof("Expression") - 1 },
|
||||||
{ "Typename", sizeof("Typename") - 1 },
|
{ "Typename", sizeof("Typename") - 1 },
|
||||||
{ "Attribute(Macro)", sizeof("Attribute(Macro)") - 1 },
|
|
||||||
{ "Specifier(Macro)", sizeof("Specifier(Macro)") - 1 },
|
|
||||||
{ "Block_Start", sizeof("Block_Start") - 1 },
|
{ "Block_Start", sizeof("Block_Start") - 1 },
|
||||||
{ "Block_End", sizeof("Block_End") - 1 },
|
{ "Block_End", sizeof("Block_End") - 1 },
|
||||||
{ "Case_Statement", sizeof("Case_Statement") - 1 },
|
{ "Case_Statement", sizeof("Case_Statement") - 1 },
|
||||||
|
@ -12,7 +12,9 @@ gencpp uses a hand-written recursive descent parser. Both the lexer and parser c
|
|||||||
|
|
||||||
### Lexer
|
### Lexer
|
||||||
|
|
||||||
The lex procedure does the lexical pass of content provided as a `Str` type.
|
File: [lexer.cpp](../base/components/lexer.cpp)
|
||||||
|
|
||||||
|
The `lex` procedure does the lexical pass of content provided as a `Str` type.
|
||||||
The tokens are stored (for now) in `Lexer_Tokens`.
|
The tokens are stored (for now) in `Lexer_Tokens`.
|
||||||
|
|
||||||
Fields:
|
Fields:
|
||||||
@ -24,17 +26,15 @@ s32 Idx;
|
|||||||
|
|
||||||
What token types are supported can be found in [ETokType.csv](../base/enums/ETokType.csv) you can also find the token types in [ETokType.h](../base/components/gen/etoktype.cpp) , which is the generated enum from the csv file.
|
What token types are supported can be found in [ETokType.csv](../base/enums/ETokType.csv) you can also find the token types in [ETokType.h](../base/components/gen/etoktype.cpp) , which is the generated enum from the csv file.
|
||||||
|
|
||||||
Tokens are defined with the struct `gen::parser::Token`:
|
|
||||||
|
|
||||||
Fields:
|
|
||||||
|
|
||||||
```cpp
|
```cpp
|
||||||
char const* Text;
|
struct Token
|
||||||
sptr Length;
|
{
|
||||||
|
Str Text;
|
||||||
TokType Type;
|
TokType Type;
|
||||||
s32 Line;
|
s32 Line;
|
||||||
s32 Column;
|
s32 Column;
|
||||||
u32 Flags;
|
u32 Flags;
|
||||||
|
}
|
||||||
```
|
```
|
||||||
|
|
||||||
Flags is a bitfield made up of TokFlags (Token Flags):
|
Flags is a bitfield made up of TokFlags (Token Flags):
|
||||||
@ -52,25 +52,17 @@ Flags is a bitfield made up of TokFlags (Token Flags):
|
|||||||
* `TF_EndDefinition` : Can be interpreted as an end definition for a scope.
|
* `TF_EndDefinition` : Can be interpreted as an end definition for a scope.
|
||||||
* `TF_Formatting` : Considered a part of the formatting
|
* `TF_Formatting` : Considered a part of the formatting
|
||||||
* `TF_Literal` : Anything considered a literal by C++.
|
* `TF_Literal` : Anything considered a literal by C++.
|
||||||
|
* `TF_Macro_Functional` : Used to indicate macro token is flagged as `MF_Functional`.
|
||||||
I plan to replace IsAssign with a general flags field and properly keep track of all operator types instead of abstracting it away to `ETokType::Operator`.
|
* `TF_Macro_Expects_Body` : Used to indicate macro token is flagged as `MF_Expects_Body`.
|
||||||
|
|
||||||
Traversing the tokens is done with the following interface macros:
|
|
||||||
|
|
||||||
| Macro | Description |
|
|
||||||
| --- | --- |
|
|
||||||
| `currtok_noskip` | Get the current token without skipping whitespace |
|
|
||||||
| `currtok` | Get the current token, skip any whitespace tokens |
|
|
||||||
| `prevtok` | Get the previous token (does not skip whitespace) |
|
|
||||||
| `nexttok` | Get the next token (does not skip whitespace) |
|
|
||||||
| `eat( Token Type )` | Check to see if the current token is of the given type, if so, advance Token's index to the next token |
|
|
||||||
| `left` | Get the number of tokens left in the token array |
|
|
||||||
| `check_noskip` | Check to see if the current token is of the given type, without skipping whitespace |
|
|
||||||
| `check` | Check to see if the current token is of the given type, skip any whitespace tokens |
|
|
||||||
|
|
||||||
### Parser
|
### Parser
|
||||||
|
|
||||||
The parser has a limited user interface, only specific types of definitions or statements are expected to be provided by the user directly when using to construct an AST dynamically (See SOA for example). It however does attempt to provide capability to parse a full C/C++ from production codebases.
|
Files:
|
||||||
|
|
||||||
|
* [interface.parsering.cpp](../base/components/interface.parsing.cpp)
|
||||||
|
* [parser.cpp](../base/components/parser.cpp)
|
||||||
|
|
||||||
|
The parser has a limited user interface, only specific types of definitions or statements are expected to be provided by the user directly when using to construct an AST dynamically. It however does attempt to provide capability to parse a full C/C++ from production codebases.
|
||||||
|
|
||||||
Each public user interface procedure has the following format:
|
Each public user interface procedure has the following format:
|
||||||
|
|
||||||
@ -89,8 +81,7 @@ Each public user interface procedure has the following format:
|
|||||||
}
|
}
|
||||||
```
|
```
|
||||||
|
|
||||||
The most top-level parsing procedure used for C/C++ file parsing is `parse_global_body`:
|
The most top-level parsing procedure used for C/C++ file parsing is `parse_global_body`.
|
||||||
|
|
||||||
It uses a helper procedure called `parse_global_nspace`.
|
It uses a helper procedure called `parse_global_nspace`.
|
||||||
|
|
||||||
Each internal procedure will have the following format:
|
Each internal procedure will have the following format:
|
||||||
@ -111,126 +102,300 @@ internal
|
|||||||
}
|
}
|
||||||
```
|
```
|
||||||
|
|
||||||
|
The parsing implementation contains throughut the codeapths to indicate how far their contextual AST node has been resolved.
|
||||||
|
|
||||||
|
Example:
|
||||||
|
|
||||||
|
```c
|
||||||
|
internal
|
||||||
|
CodeFn parser_parse_function()
|
||||||
|
{
|
||||||
|
push_scope();
|
||||||
|
|
||||||
|
Specifier specs_found[16] = { Spec_NumSpecifiers };
|
||||||
|
s32 NumSpecifiers = 0;
|
||||||
|
|
||||||
|
CodeAttributes attributes = { nullptr };
|
||||||
|
CodeSpecifiers specifiers = { nullptr };
|
||||||
|
ModuleFlag mflags = ModuleFlag_None;
|
||||||
|
|
||||||
|
if ( check(Tok_Module_Export) ) {
|
||||||
|
mflags = ModuleFlag_Export;
|
||||||
|
eat( Tok_Module_Export );
|
||||||
|
}
|
||||||
|
// <export>
|
||||||
|
|
||||||
|
attributes = parse_attributes();
|
||||||
|
// <export> <Attributes>
|
||||||
|
|
||||||
|
while ( left && tok_is_specifier(currtok) )
|
||||||
|
{
|
||||||
|
Specifier spec = str_to_specifier( tok_to_str(currtok) );
|
||||||
|
|
||||||
|
switch ( spec )
|
||||||
|
{
|
||||||
|
GEN_PARSER_FUNCTION_ALLOWED_SPECIFIERS_CASES:
|
||||||
|
break;
|
||||||
|
|
||||||
|
default:
|
||||||
|
log_failure( "Invalid specifier %S for functon\n%SB", spec_to_str(spec), parser_to_strbuilder(_ctx->parser) );
|
||||||
|
parser_pop(& _ctx->parser);
|
||||||
|
return InvalidCode;
|
||||||
|
}
|
||||||
|
|
||||||
|
if ( spec == Spec_Const )
|
||||||
|
continue;
|
||||||
|
|
||||||
|
specs_found[NumSpecifiers] = spec;
|
||||||
|
NumSpecifiers++;
|
||||||
|
eat( currtok.Type );
|
||||||
|
}
|
||||||
|
|
||||||
|
if ( NumSpecifiers ) {
|
||||||
|
specifiers = def_specifiers_arr( NumSpecifiers, specs_found );
|
||||||
|
}
|
||||||
|
// <export> <Attributes> <Specifiers>
|
||||||
|
|
||||||
|
CodeTypename ret_type = parser_parse_type(parser_not_from_template, nullptr);
|
||||||
|
if ( cast(Code, ret_type) == Code_Invalid ) {
|
||||||
|
parser_pop(& _ctx->parser);
|
||||||
|
return InvalidCode;
|
||||||
|
}
|
||||||
|
// <export> <Attributes> <Specifiers> <ReturnType>
|
||||||
|
|
||||||
|
Token name = parse_identifier(nullptr);
|
||||||
|
_ctx->parser.Scope->Name = name.Text;
|
||||||
|
if ( ! tok_is_valid(name) ) {
|
||||||
|
parser_pop(& _ctx->parser);
|
||||||
|
return InvalidCode;
|
||||||
|
}
|
||||||
|
// <export> <Attributes> <Specifiers> <ReturnType> <Name>
|
||||||
|
|
||||||
|
CodeFn result = parse_function_after_name( mflags, attributes, specifiers, ret_type, name );
|
||||||
|
// <export> <Attributes> <Specifiers> <ReturnType> <Name> ...
|
||||||
|
|
||||||
|
parser_pop(& _ctx->parser);
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
In the above `parse_function` implementation:
|
||||||
|
|
||||||
|
`// <intutive expression of AST component> ...`
|
||||||
|
|
||||||
|
Will be conventionlly used where by that point in time for the codepath: `<intutive expression of AST component>` should be resolved for the AST.
|
||||||
|
|
||||||
|
## Outline of parsing codepaths
|
||||||
|
|
||||||
Below is an outline of the general alogirithim used for these internal procedures. The intention is to provide a basic briefing to aid the user in traversing the actual code definitions. These appear in the same order as they are in the `parser.cpp` file
|
Below is an outline of the general alogirithim used for these internal procedures. The intention is to provide a basic briefing to aid the user in traversing the actual code definitions. These appear in the same order as they are in the `parser.cpp` file
|
||||||
|
|
||||||
***NOTE: This is still heavily in an alpha state. A large swaph of this can change, make sure these docs are up to date before considering them 1:1 with the repo commit your considering.***
|
***NOTE: This is still heavily in an alpha state. A large swaph of this can change, make sure these docs are up to date before considering them 1:1 with the repo commit your considering.***
|
||||||
|
|
||||||
## `parse_array_decl`
|
## `parse_array_decl`
|
||||||
|
|
||||||
1. Check if its an array declaration with no expression.
|
1. Push parser scope
|
||||||
1. Consume and return empty array declaration
|
2. Check for empty array `[]`
|
||||||
2. Opening square bracket
|
1. Return untyped string with single space if found
|
||||||
3. Consume expression
|
3. Check for opening bracket `[`
|
||||||
4. Closing square bracket
|
1. Validate parser not at EOF
|
||||||
5. If adjacent opening bracket
|
2. Reject empty array expression
|
||||||
1. Repeat array declaration parse until no brackets remain
|
3. Capture expression tokens until closing bracket
|
||||||
|
4. Calculate expression span length
|
||||||
## `parse_assignment_expression`
|
5. Convert to untyped string
|
||||||
|
4. Validate and consume closing bracket `]`
|
||||||
1. Eat the assignment operator
|
5. Handle multi-dimensional case
|
||||||
2. Make sure there is content or at least an end statement after.
|
1. If adjacent `[` detected, recursively parse
|
||||||
3. Flatten the assignment expression to an untyped Code string.
|
2. Link array expressions via Next pointer
|
||||||
|
6. Pop parser scope
|
||||||
|
7. Return array expression or NullCode on failure
|
||||||
|
|
||||||
## `parse_attributes`
|
## `parse_attributes`
|
||||||
|
|
||||||
1. Check for standard attribute
|
1. Push parser scope and initialize tracking
|
||||||
2. Check for GNU attribute
|
1. Store initial token position
|
||||||
3. Check for MSVC attribute
|
2. Initialize length counter
|
||||||
4. Check for a token registered as an attribute
|
2. Process attributes while available
|
||||||
a. Check and grab the arguments of a token registered of an attribute if it has any.
|
1. Handle C++ style attributes `[[...]]`
|
||||||
5. Repeat for chained attributes. Flatten them to a single attribute AST node.
|
1. Consume opening `[[`
|
||||||
|
2. Capture content until closing `]]`
|
||||||
|
3. Calculate attribute span length
|
||||||
|
2. Handle GNU style `__attribute__((...))`
|
||||||
|
1. Consume `__attribute__` keyword and opening `((`
|
||||||
|
2. Capture content until closing `))`
|
||||||
|
3. Calculate attribute span length
|
||||||
|
3. Handle MSVC style `__declspec(...)`
|
||||||
|
1. Consume `__declspec` and opening `(`
|
||||||
|
2. Capture content until closing `)`
|
||||||
|
3. Calculate attribute span length
|
||||||
|
4. Handle macro-style attributes
|
||||||
|
1. Consume attribute token
|
||||||
|
2. If followed by parentheses
|
||||||
|
1. Handle nested parentheses tracking
|
||||||
|
2. Capture content maintaining paren balance
|
||||||
|
3. Calculate attribute span length
|
||||||
|
3. Generate attribute code if content captured
|
||||||
|
1. Create attribute text span from start position and length
|
||||||
|
2. Strip formatting from attribute text
|
||||||
|
3. Construct Code node
|
||||||
|
1. Set type to `CT_PlatformAttributes`
|
||||||
|
2. Cache and set Name and Content fields
|
||||||
|
4. Return as CodeAttributes
|
||||||
|
4. Pop parser scope
|
||||||
|
5. Return NullCode if no attributes found
|
||||||
|
|
||||||
## `parse_class_struct`
|
## `parse_class_struct`
|
||||||
|
|
||||||
1. Check for export module specifier
|
1. Validate token type is class or struct
|
||||||
2. class or struct keyword
|
1. Return InvalidCode if validation fails
|
||||||
3. `parse_attributes`
|
2. Initialize class/struct metadata
|
||||||
4. If identifier : `parse_identifier`
|
1. Access specifier (default)
|
||||||
5. Parse inherited parent or interfaces
|
2. Parent class/struct reference
|
||||||
6. If opening curly brace : `parse_class_struct_body`
|
3. Class/struct body
|
||||||
7. If not an inplace definition
|
4. Attributes
|
||||||
1. End statement
|
5. Module flags
|
||||||
2. Check for inline comment
|
3. Parse module export flag if present
|
||||||
|
1. Set ModuleFlag_Export
|
||||||
|
2. Consume export token
|
||||||
|
4. Consume class/struct token
|
||||||
|
5. Parse attributes via `parse_attributes()`
|
||||||
|
6. Parse class/struct identifier
|
||||||
|
1. Update parser scope name
|
||||||
|
7. Initialize interface array (4KB arena)
|
||||||
|
8. Parse inheritance/implementation
|
||||||
|
1. If classifier token (`:`) present
|
||||||
|
1. Parse access specifier if exists
|
||||||
|
2. Parse parent class/struct name
|
||||||
|
3. Parse additional interfaces
|
||||||
|
1. Separated by commas
|
||||||
|
2. Optional access specifiers
|
||||||
|
3. Store in interface array
|
||||||
|
9. Parse class body if present
|
||||||
|
1. Triggered by opening brace
|
||||||
|
2. Parse via `parse_class_struct_body`
|
||||||
|
10. Handle statement termination
|
||||||
|
1. Skip for inplace definitions
|
||||||
|
2. Consume semicolon
|
||||||
|
3. Parse inline comment if present
|
||||||
|
11. Construct result node
|
||||||
|
1. Create class definition if Tok_Decl_Class
|
||||||
|
2. Create struct definition if Tok_Decl_Struct
|
||||||
|
3. Attach inline comment if exists
|
||||||
|
12. Cleanup interface array and return result
|
||||||
|
|
||||||
## `parse_class_struct_body`
|
## `parse_class_struct_body`
|
||||||
|
|
||||||
1. Opening curly brace
|
1. Initialize scope and body structure
|
||||||
2. Parse the body (Possible options):
|
1. Push parser scope
|
||||||
1. Ignore dangling end statements
|
2. Consume opening brace
|
||||||
2. Newline : ast constant
|
3. Create code node with `CT_Class_Body` or `CT_Struct_Body` type
|
||||||
3. Comment : `parse_comment`
|
2. Parse body members while not at closing brace
|
||||||
4. Access_Public : ast constant
|
1. Initialize member parsing state
|
||||||
5. Access_Protected : ast constant
|
1. Code member (InvalidCode)
|
||||||
6. Access_Private : ast constant
|
2. Attributes (null)
|
||||||
7. Decl_Class : `parse_complicated_definition`
|
3. Specifiers (null)
|
||||||
8. Decl_Enum : `parse_complicated_definition`
|
4. Function expectation flag
|
||||||
9. Decl_Friend : `parse_friend`
|
2. Handle preprocessor hash if present
|
||||||
10. Decl_Operator : `parse_operator_cast`
|
3. Process member by token type in switch statement
|
||||||
11. Decl_Struct : `parse_complicated_definition`
|
1. Statement termination - warn and skip
|
||||||
12. Decl_Template : `parse_template`
|
2. Newline - format member
|
||||||
13. Decl_Typedef : `parse_typedef`
|
3. Comments - parse comment
|
||||||
14. Decl_Union : `parse_complicated_definition`
|
4. Access specifiers - `public/protected/private`
|
||||||
15. Decl_Using : `parse_using`
|
5. Declarations - `class/enum/struct/union/typedef/using`
|
||||||
16. Operator == '~'
|
6. Operators - `destructors/casts`
|
||||||
1. `parse_destructor`
|
7. Preprocessor directives - `define/include/conditionals/pragmas`
|
||||||
17. Preprocess_Define : `parse_define`
|
8. Preprocessor statement macros
|
||||||
18. Preprocess_Include : `parse_include`
|
9. Report naked preprocossor expression macros detected as an error.
|
||||||
19. Preprocess_Conditional (if, ifdef, ifndef, elif, else, endif) : `parse_preprocess_cond` or else/endif ast constant
|
10. Static assertions
|
||||||
20. Preprocess_Macro : `parse_simple_preprocess`
|
11. Attributes and specifiers
|
||||||
21. Preprocess_Pragma : `parse_pragma`
|
1. Parse attributes
|
||||||
22. Preprocess_Unsupported : `parse_simple_preprocess`
|
2. Parse valid member specifiers
|
||||||
23. StaticAssert : `parse_static_assert`
|
3. Handle `attribute-specifier-attribute` case
|
||||||
24. The following compound into a resolved definition or declaration:
|
12. Identifiers and types
|
||||||
1. Attributes (Standard, GNU, MSVC) : `parse_attributes`
|
1. Check for constructors
|
||||||
2. Specifiers (consteval, constexpr, constinit, explicit, forceinline, inline, mutable, neverinline, static, volatile, virtual)
|
2. Parse `operator/function/variable`
|
||||||
3. Possible Destructor : `parse_destructor`
|
13. Default - capture unknown content until closing brace
|
||||||
4. Possible User defined operator cast : `parse_operator_cast`
|
4. Validate member parsing
|
||||||
5. Possible Constructor : `parse_constructor`
|
1. Return InvalidCode if member invalid
|
||||||
6. Something that has the following: (identifier, const, unsigned, signed, short, long, bool, char, int, double)
|
2. Append valid member to body
|
||||||
1. Possible Constructor `parse_constructor`
|
3. Finalize body
|
||||||
2. Possible Operator, Function, or varaible : `parse_operator_function_or_variable`
|
1. Consume closing brace
|
||||||
25. Something completely unknown (will just make untyped...) : `parse_untyped`
|
2. Pop parser scope
|
||||||
|
3. Return completed CodeBody
|
||||||
|
|
||||||
## `parse_comment`
|
## `parse_comment`
|
||||||
|
|
||||||
1. Just wrap the token into a cached string ( the lexer did the processing )
|
1. Just wrap the token into a cached string ( the lexer did the processing )
|
||||||
|
|
||||||
## `parse_compilcated_definition`
|
## `parse_complicated_definition`
|
||||||
|
|
||||||
This is a helper function used by the following functions to help resolve a declaration or definition:
|
1. Initialize parsing context
|
||||||
|
1. Push scope
|
||||||
|
2. Set inplace flag false
|
||||||
|
3. Get token array reference
|
||||||
|
2. Scan ahead for statement termination
|
||||||
|
1. Track brace nesting level
|
||||||
|
2. Find first semicolon at level 0
|
||||||
|
3. Handle declaration variants
|
||||||
|
1. Forward declaration case
|
||||||
|
1. Check if only 2 tokens before semicolon
|
||||||
|
2. Parse via `parse_forward_or_definition`
|
||||||
|
2. Function with trailing specifiers
|
||||||
|
1. Identify trailing specifiers
|
||||||
|
2. Check for function pattern
|
||||||
|
3. Parse as `operator/function/variable`
|
||||||
|
4. Return `InvalidCode` if pattern invalid
|
||||||
|
3. Identifier-based declarations
|
||||||
|
1. Check identifier patterns
|
||||||
|
1. Inplace definition `{...} id;`
|
||||||
|
2. Namespace type variable `which id id;`
|
||||||
|
3. Enum with class qualifier
|
||||||
|
4. `Pointer/reference` types
|
||||||
|
2. Parse as `operator/function/variable` if valid
|
||||||
|
3. Return `InvalidCode` if pattern invalid
|
||||||
|
4. Basic type declarations
|
||||||
|
1. Validate enum class pattern
|
||||||
|
2. Parse via `parser_parse_enum`
|
||||||
|
3. Return `InvalidCode` if invalid
|
||||||
|
5. Direct definitions
|
||||||
|
1. Handle closing brace - `parse_forward_or_definition`
|
||||||
|
2. Handle array definitions - `parse_operator_function_or_variable`
|
||||||
|
3. Return InvalidCode for unknown patterns
|
||||||
|
|
||||||
* `parse_class_struct_body`
|
## `parse_assignment_expression`
|
||||||
* `parse_global_nspace`
|
|
||||||
* `parse_union`
|
|
||||||
|
|
||||||
A portion of the code in `parse_typedef` is very similar to this as both have to resolve a similar issue.
|
1. Initialize expression parsing
|
||||||
|
1. Null expression pointer
|
||||||
1. Look ahead to the termination token (End statement)
|
2. Consume assignment operator token
|
||||||
2. Check to see if it fits the pattern for a forward declare
|
3. Capture initial expression token
|
||||||
3. If the previous token was an identifier ( `token[-1]` ):
|
2. Validate expression presence
|
||||||
1. Look back one more token : `[-2]`
|
1. Check for immediate termination
|
||||||
2. If the token has a closing brace its an inplace definition
|
2. Return `InvalidCode` if missing expression
|
||||||
3. If the `token[-2]` is an identifier & `token[-3]` is the declaration type, its a variable using a namespaced type.
|
3. Parse balanced expression
|
||||||
4. If the `token[-2]` is an indirection, then its a variable using a namespaced/forwarded type.
|
1. Track nesting level for
|
||||||
5. If the `token[-2]` is an assign classifier, and the starting tokens were the which type with possible `class` token after, its an enum forward declaration.
|
1. Curly braces
|
||||||
6. If any of the above is the case, `parse_operator_function_or_variable`
|
2. Parentheses
|
||||||
4. If the `token[2]` is a vendor fundamental type (builtin) then it is an enum forward declaration.
|
2. Continue until
|
||||||
5. If the previous token was a closing curly brace, its a definition : `parse_forward_or_definition`
|
1. End of input, or
|
||||||
6. If the previous token was a closing square brace, its an array definition : `parse_operator_function_or_variable`
|
2. Statement terminator, or
|
||||||
|
3. Unnested comma
|
||||||
## `parse_define`
|
3. Consume tokens sequentially
|
||||||
|
4. Generate expression code
|
||||||
1. Define directive
|
1. Calculate expression span length
|
||||||
2. Get identifier
|
2. Convert to untyped string
|
||||||
3. Get Content (Optional)
|
3. Return expression node
|
||||||
|
|
||||||
## `parse_forward_or_definition`
|
## `parse_forward_or_definition`
|
||||||
|
|
||||||
* Parse any of the following for either a forward declaration or definition:
|
1. Declaration type routing
|
||||||
1. Decl_Class : `parse_class`
|
1. Class (`Tok_Decl_Class`) -> `parser_parse_class`
|
||||||
2. Decl_Enum : `parse_enum`
|
2. Enum (`Tok_Decl_Enum`) -> `parser_parse_enum`
|
||||||
3. Decl_Struct : `parse_struct`
|
3. Struct (`Tok_Decl_Struct`) -> `parser_parse_struct`
|
||||||
4. Decl_Union : `parse_union`
|
4. Union (`Tok_Decl_Union`) -> `parser_parse_union`
|
||||||
|
2. Error handling
|
||||||
|
1. Return `InvalidCode` for unsupported token types
|
||||||
|
2. Log failure with parser context
|
||||||
|
|
||||||
|
`is_inplace` flag propagates to specialized codepaths to maintain parsing context.
|
||||||
|
|
||||||
## `parse_function_after_name`
|
## `parse_function_after_name`
|
||||||
|
|
||||||
@ -239,80 +404,191 @@ after its been made ceratin that the type of declaration or definition is indeed
|
|||||||
|
|
||||||
By the point this function is called the following are known : export module flag, attributes, specifiers, return type, & name
|
By the point this function is called the following are known : export module flag, attributes, specifiers, return type, & name
|
||||||
|
|
||||||
1. `parse_parameters`
|
1. Parameter parsing
|
||||||
2. parse postfix specifiers (we do not check if the specifier here is correct or not to be here... yet)
|
1. Push scope
|
||||||
3. If there is a body : `parse_body`
|
2. Parse parameter list with parentheses
|
||||||
4. Otherwise :
|
2. Post-parameter specifier processing
|
||||||
1. Statment end
|
1. Collect trailing specifiers
|
||||||
2. Check for inline comment
|
2. Initialize or append to existing specifiers
|
||||||
|
3. Parse function termination
|
||||||
|
1. Function body case
|
||||||
|
1. Parse body if open brace found
|
||||||
|
2. Validate body type (`CT_Function_Body` or `CT_Untyped`)
|
||||||
|
2. Pure virtual case
|
||||||
|
1. Handle "`= 0`" syntax
|
||||||
|
2. Append pure specifier
|
||||||
|
3. Forward declaration case
|
||||||
|
1. Consume statement terminator
|
||||||
|
4. Handle inline comments for all cases
|
||||||
|
4. Construct function node
|
||||||
|
1. Strip whitespace from name
|
||||||
|
2. Initialize `CodeFn` with base properties
|
||||||
|
1. Name (cached, stripped)
|
||||||
|
2. Module flags
|
||||||
|
3. Set node type
|
||||||
|
1. `CT_Function` if body present
|
||||||
|
2. `CT_Function_Fwd` if declaration only
|
||||||
|
4. Attach components
|
||||||
|
1. Attributes if present
|
||||||
|
2. Specifiers if present
|
||||||
|
3. Return type
|
||||||
|
4. Parameters if present
|
||||||
|
5. Inline comment if present
|
||||||
|
5. Cleanup and return
|
||||||
|
1. Pop scope
|
||||||
|
2. Return completed function node
|
||||||
|
|
||||||
## `parse_function_body`
|
## `parse_function_body`
|
||||||
|
|
||||||
Currently there is no actual parsing of the function body. Any content with the braces is shoved into an execution AST node.
|
Currently there is no actual parsing of the function body. Any content with the braces is shoved into an execution AST node.
|
||||||
In the future statements and expressions will be parsed.
|
In the future statements and expressions will be parsed.
|
||||||
|
|
||||||
1. Open curly brace
|
1. Initialize body parsing
|
||||||
2. Grab all tokens between the brace and the closing brace, shove them in a execution AST node.
|
1. Push scope
|
||||||
3. Closing curly brace
|
2. Consume opening brace
|
||||||
|
3. Create CodeBody with CT_Function_Body type
|
||||||
|
2. Capture function content
|
||||||
|
1. Record start token position
|
||||||
|
2. Track brace nesting level
|
||||||
|
3. Consume tokens while
|
||||||
|
1. Input remains AND
|
||||||
|
2. Not at unmatched closing brace
|
||||||
|
4. Update level counters
|
||||||
|
1. Increment on open brace
|
||||||
|
2. Decrement on closing brace when level > 0
|
||||||
|
3. Process captured content
|
||||||
|
1. Calculate content length via pointer arithmetic
|
||||||
|
2. Create execution block if content exists
|
||||||
|
1. Construct string span from start position and length
|
||||||
|
2. Wrap in execution node
|
||||||
|
3. Append to body
|
||||||
|
4. Finalize
|
||||||
|
1. Consume closing brace
|
||||||
|
2. Pop scope
|
||||||
|
3. Return cast body node
|
||||||
|
|
||||||
## `parse_global_nspace`
|
## `parse_global_nspace`
|
||||||
|
|
||||||
1. Make sure this is being called for a valid type (namespace, global body, export body, linkage body)
|
1. State initialization
|
||||||
2. If its not a global body, consume the opening curly brace
|
1. Push parser scope
|
||||||
3. Parse the body (Possible options):
|
2. Validate namespace type (Global, Namespace, Export, Extern Linkage)
|
||||||
1. Ignore dangling end statements
|
3. Consume opening brace for non-global scopes
|
||||||
2. NewLine : ast constant
|
4. Initialize `CodeBody` with specified body type: `which`
|
||||||
3. Comment : `parse_comment`
|
2. Member parsing loop (while not at closing brace)
|
||||||
4. Decl_Cass : `parse_complicated_definition`
|
1. Reset parse state
|
||||||
5. Decl_Enum : `parse_complicated_definition`
|
* Member code
|
||||||
6. Decl_Extern_Linkage : `parse_extern_link`
|
* Attributes
|
||||||
7. Decl_Namespace : `parse_namespace`
|
* Specifiers
|
||||||
8. Decl_Struct : `parse_complicated_definition`
|
* Function expectation flag
|
||||||
9. Decl_Template : `parse_template`
|
2. Member type handling
|
||||||
10. Decl_Typedef : `parse_typedef`
|
1. Declarations
|
||||||
11. Decl_Union : `parse_complicated_definition`
|
* `Class/Struct/Union/Enum` via `parse_complicated_definition`
|
||||||
12. Decl_Using : `parse_using`
|
* `Template/Typedef/Using` via dedicated parsers
|
||||||
13. Preprocess_Define : `parse_define`
|
* `Namespace/Export/Extern` declarations
|
||||||
14. Preprocess_Include : `parse_include`
|
2. Preprocessor directivess
|
||||||
15. Preprocess_If, IfDef, IfNotDef, Elif : `parse_preprocess_cond`
|
* Include/Define
|
||||||
16. Preprocess_Else : ast constant
|
* Conditionals `(if / ifdef / ifndef / elif / else / endif)`
|
||||||
17. Preprocess_Endif : ast constant
|
* Pragmas
|
||||||
18. Preprocess_Macro : `parse_simple_preprocess`
|
* Preprocessor statement macros
|
||||||
19. Preprocess_Pragma : `parse_pragma`
|
* Report naked preprocossor expression macros detected as an error.
|
||||||
20. Preprocess_Unsupported : `parse_simple_preprocess`
|
3. Comments/Formatting
|
||||||
21. StaticAssert : `parse_static_assert`
|
* Newlines
|
||||||
22. Module_Export : `parse_export_body`
|
* Comments
|
||||||
23. Module_Import : NOT_IMPLEMENTED
|
4. Static assertions
|
||||||
24. The following compound into a resolved definition or declaration:
|
3. Attributes and specifiers
|
||||||
1. Attributes ( Standard, GNU, MSVC, Macro ) : `parse_attributes`
|
1. Parse attributes if present
|
||||||
2. Specifiers ( consteval, constexpr, constinit, extern, forceinline, global, inline, internal_linkage, neverinline, static )
|
2. Collect valid specifiers (max 16)
|
||||||
3. Is either ( identifier, const specifier, long, short, signed, unsigned, bool, char, double, int)
|
3. Handle `consteval` for function expectation
|
||||||
1. Attempt to parse as construtor or destructor : `parse_global_nspace_constructor_destructor`
|
4. Identifier resolution
|
||||||
2. If its an operator cast (definition outside class) : `parse_operator_cast`
|
1. Check `constructor/destructor` implementation
|
||||||
3. Its an operator, function, or varaible : `parse_operator_function_or_varaible`
|
2. Look ahead for user defined operator implementation outside of class
|
||||||
4. If its not a global body, consume the closing curly brace
|
3. Default to `operator/function/variable` parse
|
||||||
|
3. Member validation/storage
|
||||||
|
1. Validate parsed member
|
||||||
|
2. Append to body if valid
|
||||||
|
3. Return `InvalidCode` on parse failure
|
||||||
|
4. Scope finalization
|
||||||
|
1. Consume closing brace for non-global scopes
|
||||||
|
2. Pop parser scope
|
||||||
|
3. Return completed body
|
||||||
|
|
||||||
## `parse_global_nspace_constructor_destructor`
|
## `parse_global_nspace_constructor_destructor`
|
||||||
|
|
||||||
1. Look ahead for the start of the arguments for a possible constructor/destructor
|
1. Forward Token Analysis
|
||||||
2. Go back past the identifier
|
1. Scan for parameter list opening parenthesis
|
||||||
3. Check to see if its a destructor by checking for the `~`
|
2. Template Expression Handling
|
||||||
4. Continue the next token should be a `::`
|
* Track template nesting depth
|
||||||
5. Determine if the next valid identifier (ignoring possible template parameters) is the same as the first identifier of the function.
|
* Account for nested parentheses within templates
|
||||||
6. If it is we have either a constructor or destructor so parse using their respective functions (`parse_constructor`, `parse_destructor`).
|
* Skip until template closure or parameter start
|
||||||
|
|
||||||
|
```cpp
|
||||||
|
// Valid patterns:
|
||||||
|
ClassName :: ClassName(...)
|
||||||
|
ClassName :: ~ClassName(...)
|
||||||
|
ClassName< T ... > :: ClassName(...)
|
||||||
|
```
|
||||||
|
|
||||||
|
2. Constructor/Destructor Identification
|
||||||
|
1. Token Validation Sequence
|
||||||
|
* Verify identifier preceding parameters
|
||||||
|
* Check for destructor indicator (`~`)
|
||||||
|
* Validate scope resolution operator (`::`)
|
||||||
|
2. Left-side Token Analysis
|
||||||
|
* Process nested template expressions
|
||||||
|
* Maintain template/capture level tracking
|
||||||
|
* Locate matching identifier token
|
||||||
|
3. Parser Resolution
|
||||||
|
1. Name Pattern Validation
|
||||||
|
* Compare identifier tokens for exact match
|
||||||
|
2. Specialized Parsing
|
||||||
|
* Route to `parser_parse_destructor` for '~' prefix
|
||||||
|
* Route to `parser_parse_constructor` for direct match
|
||||||
|
3. Apply specifiers to resulting node
|
||||||
|
4. Return result (`NullCode` on pattern mismatch)
|
||||||
|
|
||||||
|
### Implementation Constraints
|
||||||
|
|
||||||
|
* Cannot definitively distinguish nested namespaces with identical names
|
||||||
|
* Return type detection requires parser enhancement
|
||||||
|
* Template parameter validation is syntax-based only
|
||||||
|
* Future enhancement: Implement type parsing with rollback capability
|
||||||
|
|
||||||
## `parse_identifier`
|
## `parse_identifier`
|
||||||
|
|
||||||
This is going to get heavily changed down the line to have a more broken down "identifier expression" so that the qualifier, template args, etc, can be distinguished between the targeted identifier.
|
This is going to get heavily changed down the line to have a more broken down "identifier expression" so that the qualifier, template args, etc, can be distinguished between the targeted identifier.
|
||||||
The function can parse all of them, however the AST node compresses them all into a string.
|
The function can parse all of them, however the AST node compresses them all into a string.
|
||||||
|
|
||||||
1. Consume first identifier
|
1. Initialize identifier context
|
||||||
2. `parse_template_args`
|
1. Push parser scope
|
||||||
3. While there is a static symbol accessor ( `::` )
|
2. Capture initial token as name
|
||||||
1. Consume `::`
|
3. Set scope name from token text
|
||||||
2. Consume member identifier
|
2. Process initial identifier component
|
||||||
3. `parse_template args` (for member identifier)
|
1. Consume identifier token
|
||||||
4. If a `~` is encounted and the scope is for a destructor's identifier, do not consume it and return with what parsed.
|
2. Parse template arguments if present
|
||||||
|
3. Handle qualified identifiers (loop while `::` found)
|
||||||
|
1. Consume static access operator
|
||||||
|
2. Validate token sequence:
|
||||||
|
1. Handle destructor operator (`~`)
|
||||||
|
* Validate destructor parsing context
|
||||||
|
* Update name span if valid
|
||||||
|
* Return invalid on context mismatch
|
||||||
|
2. Process member function pointer (`*`)
|
||||||
|
* Set possible_member_function flag if context allows
|
||||||
|
* Return invalid if pointer unexpected
|
||||||
|
3. Verify identifier token follows
|
||||||
|
3. Update identifier span
|
||||||
|
1. Extend name length to include new qualifier
|
||||||
|
2. Consume identifier token
|
||||||
|
3. Parse additional template arguments
|
||||||
|
4. Return completed identifier token
|
||||||
|
|
||||||
|
Technical notes:
|
||||||
|
|
||||||
|
* Current implementation treats identifier as single token span
|
||||||
|
* TODO: Refactor to AST-based identifier representation for:
|
||||||
|
* Distinct qualifier/symbol tracking
|
||||||
|
* Improved semantic analysis capabilities
|
||||||
|
* Better support for nested symbol resolution
|
||||||
|
|
||||||
## `parse_include`
|
## `parse_include`
|
||||||
|
|
||||||
@ -323,16 +599,45 @@ The function can parse all of them, however the AST node compresses them all int
|
|||||||
|
|
||||||
This is needed as a operator defintion is not easily resolvable early on, as such this function handles resolving a operator after its been made ceratin that the type of declaration or definition is indeed for a operator signature.
|
This is needed as a operator defintion is not easily resolvable early on, as such this function handles resolving a operator after its been made ceratin that the type of declaration or definition is indeed for a operator signature.
|
||||||
|
|
||||||
By the point this function is called the following are known : export module flag, attributes, specifiers, return type
|
By the point this function is called the following are known : export module flag, attributes, specifiers, and return type
|
||||||
|
|
||||||
1. If there is any qualifiers for the operator, parse them
|
1. Initialize operator context
|
||||||
2. Consume operator keyword
|
1. Push scope
|
||||||
3. Determine the operator type (This will be offloaded to the lexer moreso than how it is now) & consume
|
2. Parse qualified namespace identifier
|
||||||
4. `parse_params`
|
3. Consume `operator` keyword
|
||||||
5. If there is no parameters this is operator is a member of pointer if its symbols is a *.
|
2. Operator identification
|
||||||
6. Parse postfix specifiers
|
1. Validate operator token presence
|
||||||
7. If there is a opening curly brace, `parse function_body`
|
2. Set scope name from operator token
|
||||||
8. Otherwise: consume end statement, check for inline comment.
|
3. Map operator token to internal operator enum:
|
||||||
|
* Arithmetic: `+, -, *, /, %`
|
||||||
|
* Assignment: `+=, -=, *=, /=, %=, =`
|
||||||
|
* Bitwise: `&, |, ^, ~, >>`
|
||||||
|
* Logical: `&&, ||, !, ==`
|
||||||
|
* Comparison: `<, >, <=, >=`
|
||||||
|
* Member access: `->, ->*`
|
||||||
|
* Special: `(), [], new, delete`
|
||||||
|
4. Handle array variants for new/delete
|
||||||
|
3. Parameter and specifier processing
|
||||||
|
1. Parse parameter list
|
||||||
|
2. Handle multiply/member-pointer ambiguity
|
||||||
|
3. Collect trailing specifiers
|
||||||
|
4. Merge with existing specifiers
|
||||||
|
4. Function body handling
|
||||||
|
1. Parse implementation if present
|
||||||
|
2. Otherwise consume statement terminator
|
||||||
|
3. Capture inline comments
|
||||||
|
5. Result construction
|
||||||
|
1. Create operator node with:
|
||||||
|
* Operator type
|
||||||
|
* Namespace qualification
|
||||||
|
* Parameters
|
||||||
|
* Return type
|
||||||
|
* Implementation body
|
||||||
|
* Specifiers
|
||||||
|
* Attributes
|
||||||
|
* Module flags
|
||||||
|
2. Attach inline comments
|
||||||
|
6. Pop scope
|
||||||
|
|
||||||
## `parse_operator_function_or_variable`
|
## `parse_operator_function_or_variable`
|
||||||
|
|
||||||
|
@ -6,9 +6,9 @@
|
|||||||
|
|
||||||
# Parsing
|
# Parsing
|
||||||
|
|
||||||
The library features a naive single-pass parser tailored for only what the library needs to construct the supported syntax of C++ into its AST for *"front-end"* meta-programming purposes.
|
The library features a naive single-pass parser, tailored for only what the library needs; for construction of C++ code into gencpp's AST for *"front-end"* meta-programming purposes.
|
||||||
|
|
||||||
This parser does not, and should not do the compiler's job. By only supporting this minimal set of features, the parser is kept (so far) around ~7000 loc. I hope to keep it under 10k loc worst case.
|
This parser does not, and should not do the compiler's job. By only supporting this minimal set of features, the parser is kept (so far) around ~7000 loc. I hope to keep it under 10-15k loc worst case.
|
||||||
|
|
||||||
You can think of this parser as *frontend parser* vs a *semantic parser*. Its intuitively similar to WYSIWYG. What you ***precerive*** as the syntax from the user-side before the compiler gets a hold of it, is what you get.
|
You can think of this parser as *frontend parser* vs a *semantic parser*. Its intuitively similar to WYSIWYG. What you ***precerive*** as the syntax from the user-side before the compiler gets a hold of it, is what you get.
|
||||||
|
|
||||||
@ -17,6 +17,7 @@ User exposed interface:
|
|||||||
```cpp
|
```cpp
|
||||||
CodeClass parse_class ( Str class_def );
|
CodeClass parse_class ( Str class_def );
|
||||||
CodeConstructor parse_constructor ( Str constructor_def );
|
CodeConstructor parse_constructor ( Str constructor_def );
|
||||||
|
CodeDefine parse_define ( Str define_def );
|
||||||
CodeDestructor parse_destructor ( Str destructor_def );
|
CodeDestructor parse_destructor ( Str destructor_def );
|
||||||
CodeEnum parse_enum ( Str enum_def );
|
CodeEnum parse_enum ( Str enum_def );
|
||||||
CodeBody parse_export_body ( Str export_def );
|
CodeBody parse_export_body ( Str export_def );
|
||||||
@ -53,7 +54,7 @@ The keywords supported for the preprocessor are:
|
|||||||
* endif
|
* endif
|
||||||
* pragma
|
* pragma
|
||||||
|
|
||||||
Each directive `#` line is considered one preproecessor unit, and will be treated as one Preprocessor AST.
|
Each directive `#` line is considered one preproecessor unit, and will be treated as one Preprocessor AST node.
|
||||||
If a directive is used with an unsupported keyword its will be processed as an untyped AST.
|
If a directive is used with an unsupported keyword its will be processed as an untyped AST.
|
||||||
|
|
||||||
The preprocessor lines are stored as members of their associated scope they are parsed within. ( Global, Namespace, Class/Struct )
|
The preprocessor lines are stored as members of their associated scope they are parsed within. ( Global, Namespace, Class/Struct )
|
||||||
@ -62,29 +63,89 @@ The preprocessor lines are stored as members of their associated scope they are
|
|||||||
Any preprocessor definition abuse that changes the syntax of the core language is unsupported and will fail to parse if not kept within an execution scope (function body, or expression assignment).
|
Any preprocessor definition abuse that changes the syntax of the core language is unsupported and will fail to parse if not kept within an execution scope (function body, or expression assignment).
|
||||||
Exceptions:
|
Exceptions:
|
||||||
|
|
||||||
* function signatures are allowed for a preprocessed macro: `neverinline MACRO() { ... }`
|
* varaible definitions are allowed for a preprocessed macro `extern MACRO();`
|
||||||
|
* function definitions are allowed for a preprocessed macro: `neverinline MACRO() { ... }`
|
||||||
* Disable with: `#define GEN_PARSER_DISABLE_MACRO_FUNCTION_SIGNATURES`
|
* Disable with: `#define GEN_PARSER_DISABLE_MACRO_FUNCTION_SIGNATURES`
|
||||||
* typedefs allow for a preprocessed macro: `typedef MACRO();`
|
* typedefs allow for a preprocessed macro: `typedef MACRO();`
|
||||||
* Disable with: `#define GEN_PARSER_DISABLE_MACRO_TYPEDEF`
|
* Disable with: `#define GEN_PARSER_DISABLE_MACRO_TYPEDEF`
|
||||||
* Macros can behave as typenames
|
* Macros can behave as typenames
|
||||||
* There is some macro support in paramters for functions or templates *(Specifically added to support parsing Unreal Engine source)*.
|
* There is some macro support in parameters for functions or templates *(Specifically added to support parsing Unreal Engine source)*.
|
||||||
|
|
||||||
*(Exceptions are added on an on-demand basis)*
|
*(Exceptions are added on an on-demand basis)*
|
||||||
*(See functions `parse_operator_function_or_variable` and `parse_typedef` )*
|
*(See functions `parse_operator_function_or_variable` and `parse_typedef` )*
|
||||||
|
|
||||||
Adding your own exceptions is possible by simply modifying the parser to allow for the syntax you need.
|
Adding your own exceptions is possible by simply modifying the parser to allow for the syntax you need.
|
||||||
|
|
||||||
*Note: You could interpret this strictness as a feature. This would allow the user to see if their codebase or a third-party's codebase some some egregious preprocessor abuse.*
|
*Note: You could interpret this strictness as a feature. This would allow the user to see if their codebase or a third-party's codebase contains some egregious preprocessor abuse.*
|
||||||
|
|
||||||
If a macro is not defined withint e scope of parsing a set of files, it can be defined beforehand by:
|
Macros used within a file should be registered by the user before parsing. This can be done two ways:
|
||||||
|
|
||||||
* Appending the [`PreprocessorDefines`](https://github.com/Ed94/gencpp/blob/a18b5b97aa5cfd20242065cbf53462a623cd18fa/base/components/header_end.hpp#L137) array.
|
1. The register macro interface within [interface.hpp](../base/components/interface.hpp).
|
||||||
* For functional macros a "(" just needs to be added after the name like: `<name>(` so that it will tokenize its arguments as part of the token during lexing.
|
2. Using `def_define` to create a CodeDefine and making sure to not set `opts.dont_register_to_preprocess_macros` to `true`.
|
||||||
* Defining a CodeDefine using `def_define`. The definition will be processed by the interface for user into `PreprocessorDefines`.
|
|
||||||
* This can be prevented by setting the optional prameter `dont_append_preprocess_defines`.
|
|
||||||
|
|
||||||
The lexing and parsing takes shortcuts from whats expected in the standard.
|
## Registering macros
|
||||||
|
|
||||||
|
While the registeration of macros in the meta-program's side for parsing can be considered tedius, its necessary for the parser to accurately resolve the macros intent in one pass and it provides in a sense hygenics in verifying that they are used as intended.
|
||||||
|
|
||||||
|
The following can be used to register a macro:
|
||||||
|
|
||||||
|
```c
|
||||||
|
GEN_API void register_macro( Macro macro );
|
||||||
|
GEN_API void register_macros( s32 num, ... );
|
||||||
|
GEN_API void register_macros_arr( s32 num, Macro* macros );
|
||||||
|
```
|
||||||
|
|
||||||
|
The Macro typename is defined with the following in [parser_types.hpp](../base/components/parser_types.hpp):
|
||||||
|
|
||||||
|
```c
|
||||||
|
struct Macro
|
||||||
|
{
|
||||||
|
StrCached Name;
|
||||||
|
MacroType Type;
|
||||||
|
MacroFlags Flags;
|
||||||
|
};
|
||||||
|
```
|
||||||
|
|
||||||
|
The macro can be designated one of the following types:
|
||||||
|
|
||||||
|
* `MT_Expression`: Intended to resolve to an expression expansion.
|
||||||
|
* `MT_Statement`: Intended to resolve an statement expansion.
|
||||||
|
* `MT_Typename`: Intended to resolve to a typename.
|
||||||
|
|
||||||
|
Additioonally tthe following flags may be set:
|
||||||
|
|
||||||
|
* `MF_Functional`: The macro intended to be passed arguments are at least have the calling `()` as part of its usage.
|
||||||
|
* `MF_Expects_Body`: The parser should expect a braced-body `{ ... }` after the macro signature `<name> <params>`
|
||||||
|
* `MF_Allow_As_Identifier`: Will allow the macro to be an acceptable token/s when an `Tok_Identifier` is expected.
|
||||||
|
* `MF_Allow_As_Attribute`: Will allow the macro to be an acceptable token/s when an attribute token/s is expected.
|
||||||
|
* `MF_Allow_As_Definition`: Will allow the macro be an acceptable token/s when the parser expects a declartion or definition to resolve after attributes or specifiers have been identified beforehand.
|
||||||
|
* This flag requires that the macro is of type `MT_Statement` to make any sense of usage.
|
||||||
|
|
||||||
|
If a macro is not define the following warning will be issued if `GEN_BUILD_DEBUG=1` during lexing within [lexer.cpp](../base/components/lexer.cpp) - `lex_preprocessor_define`:
|
||||||
|
|
||||||
|
```c
|
||||||
|
log_fmt("Warning: '%S' was not registered before the lexer processed its #define directive, it will be registered as a expression macro\n"
|
||||||
|
, name.Text
|
||||||
|
);
|
||||||
|
```
|
||||||
|
|
||||||
|
Further within the same scope, the lexer will issue a warning if it detects a macro was not flagged as function but has an open parenthesis `(` token right after is name with no whitespace:
|
||||||
|
|
||||||
|
```c
|
||||||
|
log_fmt("Warning: %S registered macro is not flagged as functional yet the definition detects opening parenthesis '(' for arguments\n"
|
||||||
|
, name.Text
|
||||||
|
);
|
||||||
|
```
|
||||||
|
|
||||||
|
Macros are tracked using a `MacroTable Macros;` defined as a member of the library's `Context`.
|
||||||
|
|
||||||
|
```c
|
||||||
|
typedef HashTable(Macro) MacroTable;
|
||||||
|
```
|
||||||
|
|
||||||
|
## Notes
|
||||||
|
|
||||||
|
* Empty lines used throughout the file are preserved for formatting purposes during ast serialization (they have a dedicated Token: `Tok_NewLine`).
|
||||||
* Numeric literals are not checked for validity.
|
* Numeric literals are not checked for validity.
|
||||||
* The parse API treats any execution scope definitions with no validation and are turned into untyped Code ASTs. (There is a [todo](https://github.com/Ed94/gencpp/issues/49) to add support)
|
* The parse API treats any execution scope definitions with no validation and are turned into untyped Code ASTs. (There is a [todo](https://github.com/Ed94/gencpp/issues/49) to add support)
|
||||||
* *This includes the assignment of variables.*
|
* *This includes the assignment of variables.*
|
||||||
@ -95,4 +156,4 @@ The lexing and parsing takes shortcuts from whats expected in the standard.
|
|||||||
* Parsing attributes can be extended to support user defined macros by defining `GEN_DEFINE_ATTRIBUTE_TOKENS` (see `gen.hpp` for the formatting)
|
* Parsing attributes can be extended to support user defined macros by defining `GEN_DEFINE_ATTRIBUTE_TOKENS` (see `gen.hpp` for the formatting)
|
||||||
* This is useful for example: parsing Unreal `Module_API` macros.
|
* This is useful for example: parsing Unreal `Module_API` macros.
|
||||||
|
|
||||||
Empty lines used throughout the file are preserved for formatting purposes during ast serialization.
|
**The lexer & parser do not gracefully attempt to continue when it comes across incorrect code, and doesn't properly track errors into a listing (yet).**
|
||||||
|
Loading…
Reference in New Issue
Block a user