From 904dabe6a1358df81372f4177f26cec7b0e67cc8 Mon Sep 17 00:00:00 2001 From: Ed_ Date: Tue, 5 May 2026 20:40:21 -0400 Subject: [PATCH] feat(mcp): Validate C++ tools against real-world gencpp components and improve enum support --- extract_samples.py | 47 + src/file_cache.py | 109 +- .../base_component.h | 0 .../complex_template.h | 0 .../component_registry.cpp | 0 .../component_registry.h | 0 .../verify_samples.py | 0 tests/assets/gencpp_samples/ast.hpp | 457 ++ tests/assets/gencpp_samples/parser.cpp | 5784 +++++++++++++++++ tests/assets/gencpp_samples/types.hpp | 181 + tests/repro_bug.py | 31 + tests/test_ts_cpp_tools.py | 87 +- 12 files changed, 6643 insertions(+), 53 deletions(-) create mode 100644 extract_samples.py rename tests/assets/{gencpp_samples => cpp_samples}/base_component.h (100%) rename tests/assets/{gencpp_samples => cpp_samples}/complex_template.h (100%) rename tests/assets/{gencpp_samples => cpp_samples}/component_registry.cpp (100%) rename tests/assets/{gencpp_samples => cpp_samples}/component_registry.h (100%) rename tests/assets/{gencpp_samples => cpp_samples}/verify_samples.py (100%) create mode 100644 tests/assets/gencpp_samples/ast.hpp create mode 100644 tests/assets/gencpp_samples/parser.cpp create mode 100644 tests/assets/gencpp_samples/types.hpp create mode 100644 tests/repro_bug.py diff --git a/extract_samples.py b/extract_samples.py new file mode 100644 index 0000000..dc05aa9 --- /dev/null +++ b/extract_samples.py @@ -0,0 +1,47 @@ +import os + +log_path = 'logs/20260325_103727/outputs/output_0013.txt' +with open(log_path, 'r', encoding='utf-8') as f: + content = f.read() + +def extract(start_pat, end_pat, out_name): + start_marker = f'### `{start_pat}`' + end_marker = f'### `{end_pat}`' + + start_pos = content.find(start_marker) + if start_pos == -1: + print(f"Start marker not found for {out_name}") + return + + end_pos = content.find(end_marker, start_pos) + if end_pos == -1: + print(f"End marker not found for {out_name}") + return + + block = content[start_pos:end_pos] + code_start = block.find('```cpp\r\n') + if code_start == -1: + code_start = block.find('```hpp\r\n') + if code_start == -1: + code_start = block.find('```cpp\n') + if code_start == -1: + code_start = block.find('```hpp\n') + + if code_start == -1: + print(f"Code start marker not found for {out_name}") + return + + code_start_offset = block.find('\n', code_start) + 1 + code_end = block.rfind('```') + + final_code = block[code_start_offset:code_end].strip() + + out_path = os.path.join('tests/assets/gencpp_samples', out_name) + os.makedirs(os.path.dirname(out_path), exist_ok=True) + with open(out_path, 'w', encoding='utf-8', newline='\r\n') as f_out: + f_out.write(final_code) + print(f"Extracted {out_name}") + +extract('C:/projects/gencpp/base/components/parser.cpp', 'C:/projects/gencpp/base/components/lexer.cpp', 'parser.cpp') +extract('C:/projects/gencpp/base/components/ast.hpp', 'C:/projects/gencpp/base/components/interface.parsing.cpp', 'ast.hpp') +extract('C:/projects/gencpp/base/components/types.hpp', 'C:/projects/gencpp/base/components/interface.hpp', 'types.hpp') diff --git a/src/file_cache.py b/src/file_cache.py index e6c7c47..ad73d7f 100644 --- a/src/file_cache.py +++ b/src/file_cache.py @@ -94,15 +94,15 @@ class ASTParser: _ast_cache[path] = (mtime, tree) return tree - def _get_name(self, node: tree_sitter.Node, code: str) -> str: + def _get_name(self, node: tree_sitter.Node, code_bytes: bytes) -> str: name_node = node.child_by_field_name("name") if name_node: - return code[name_node.start_byte:name_node.end_byte] + return code_bytes[name_node.start_byte:name_node.end_byte].decode("utf8", errors="replace") if node.type in ("function_definition", "field_declaration"): def find_id(n: tree_sitter.Node) -> str: if n.type in ("identifier", "field_identifier", "qualified_identifier", "destructor_name"): - return code[n.start_byte:n.end_byte] + return code_bytes[n.start_byte:n.end_byte].decode("utf8", errors="replace") # Try field name 'declarator' first d = n.child_by_field_name("declarator") if d: @@ -118,19 +118,20 @@ class ASTParser: if node.type == "template_declaration": for child in node.children: - if child.type in ("function_definition", "class_definition", "class_specifier", "struct_specifier", "field_declaration"): - return self._get_name(child, code) + if child.type in ("function_definition", "class_definition", "class_specifier", "struct_specifier", "enum_specifier", "enum_definition", "field_declaration"): + return self._get_name(child, code_bytes) - if node.type in ("struct_specifier", "class_specifier", "class_definition", "namespace_definition"): + if node.type in ("struct_specifier", "class_specifier", "class_definition", "enum_specifier", "enum_definition", "namespace_definition"): for child in node.children: if child.type in ("type_identifier", "identifier", "namespace_identifier"): - return code[child.start_byte:child.end_byte] + return code_bytes[child.start_byte:child.end_byte].decode("utf8", errors="replace") return "" def get_skeleton(self, code: str, path: Optional[str] = None) -> str: """ Returns a skeleton of a Python file (preserving docstrings, stripping function bodies). """ + code_bytes = code.encode("utf8") tree = self.get_cached_tree(path, code) edits: List[Tuple[int, int, str]] = [] @@ -170,10 +171,10 @@ class ASTParser: walk(tree.root_node) # Apply edits in reverse to maintain byte offsets edits.sort(key=lambda x: x[0], reverse=True) - code_bytes = bytearray(code, "utf8") + code_bytearray = bytearray(code_bytes) for start, end, replacement in edits: - code_bytes[start:end] = bytes(replacement, "utf8") - return code_bytes.decode("utf8") + code_bytearray[start:end] = bytes(replacement, "utf8") + return code_bytearray.decode("utf8") def get_curated_view(self, code: str, path: Optional[str] = None) -> str: """ @@ -181,6 +182,7 @@ class ASTParser: Preserves function bodies if they have @core_logic decorator or # [HOT] comment. Otherwise strips bodies but preserves docstrings. """ + code_bytes = code.encode("utf8") tree = self.get_cached_tree(path, code) edits: List[Tuple[int, int, str]] = [] @@ -197,7 +199,7 @@ class ASTParser: for child in parent.children: if child.type == "decorator": # decorator -> ( '@', identifier ) or ( '@', call ) - if "@core_logic" in code[child.start_byte:child.end_byte]: + if b"@core_logic" in code_bytes[child.start_byte:child.end_byte]: return True return False @@ -207,8 +209,8 @@ class ASTParser: while stack: curr = stack.pop() if curr.type == "comment": - comment_text = code[curr.start_byte:curr.end_byte] - if "[HOT]" in comment_text: + comment_bytes = code_bytes[curr.start_byte:curr.end_byte] + if b"[HOT]" in comment_bytes: return True for child in curr.children: stack.append(child) @@ -241,16 +243,17 @@ class ASTParser: walk(tree.root_node) # Apply edits in reverse to maintain byte offsets edits.sort(key=lambda x: x[0], reverse=True) - code_bytes = bytearray(code, "utf8") + code_bytearray = bytearray(code_bytes) for start, end, replacement in edits: - code_bytes[start:end] = bytes(replacement, "utf8") - return code_bytes.decode("utf8") + code_bytearray[start:end] = bytes(replacement, "utf8") + return code_bytearray.decode("utf8") def get_targeted_view(self, code: str, function_names: List[str], path: Optional[str] = None) -> str: """ Returns a targeted view of the code including only the specified functions and their dependencies up to depth 2. """ + code_bytes = code.encode("utf8") tree = self.get_cached_tree(path, code) all_functions = {} @@ -258,13 +261,13 @@ class ASTParser: if node.type == "function_definition": name_node = node.child_by_field_name("name") if name_node: - func_name = code[name_node.start_byte:name_node.end_byte] + func_name = code_bytes[name_node.start_byte:name_node.end_byte].decode("utf8", errors="replace") full_name = f"{class_name}.{func_name}" if class_name else func_name all_functions[full_name] = node elif node.type == "class_definition": name_node = node.child_by_field_name("name") if name_node: - cname = code[name_node.start_byte:name_node.end_byte] + cname = code_bytes[name_node.start_byte:name_node.end_byte].decode("utf8", errors="replace") full_cname = f"{class_name}.{cname}" if class_name else cname body = node.child_by_field_name("body") if body: @@ -282,11 +285,11 @@ class ASTParser: func_node = n.child_by_field_name("function") if func_node: if func_node.type == "identifier": - calls.add(code[func_node.start_byte:func_node.end_byte]) + calls.add(code_bytes[func_node.start_byte:func_node.end_byte].decode("utf8", errors="replace")) elif func_node.type == "attribute": attr_node = func_node.child_by_field_name("attribute") if attr_node: - calls.add(code[attr_node.start_byte:attr_node.end_byte]) + calls.add(code_bytes[attr_node.start_byte:attr_node.end_byte].decode("utf8", errors="replace")) for child in n.children: walk_calls(child) walk_calls(node) @@ -329,12 +332,12 @@ class ASTParser: def check_for_targeted(node, parent_class=None): if node.type == "function_definition": name_node = node.child_by_field_name("name") - fname = code[name_node.start_byte:name_node.end_byte] if name_node else "" + fname = code_bytes[name_node.start_byte:name_node.end_byte].decode("utf8", errors="replace") if name_node else "" fullname = f"{parent_class}.{fname}" if parent_class else fname return fullname in all_found if node.type == "class_definition": name_node = node.child_by_field_name("name") - cname = code[name_node.start_byte:name_node.end_byte] if name_node else "" + cname = code_bytes[name_node.start_byte:name_node.end_byte].decode("utf8", errors="replace") if name_node else "" full_cname = f"{parent_class}.{cname}" if parent_class else cname body = node.child_by_field_name("body") if body: @@ -350,7 +353,7 @@ class ASTParser: def walk_edits(node, parent_class=None): if node.type == "function_definition": name_node = node.child_by_field_name("name") - fname = code[name_node.start_byte:name_node.end_byte] if name_node else "" + fname = code_bytes[name_node.start_byte:name_node.end_byte].decode("utf8", errors="replace") if name_node else "" fullname = f"{parent_class}.{fname}" if parent_class else fname if fullname in all_found: body = node.child_by_field_name("body") @@ -376,7 +379,7 @@ class ASTParser: if node.type == "class_definition": if check_for_targeted(node, parent_class): name_node = node.child_by_field_name("name") - cname = code[name_node.start_byte:name_node.end_byte] if name_node else "" + cname = code_bytes[name_node.start_byte:name_node.end_byte].decode("utf8", errors="replace") if name_node else "" full_cname = f"{parent_class}.{cname}" if parent_class else cname body = node.child_by_field_name("body") if body: @@ -400,10 +403,10 @@ class ASTParser: walk_edits(tree.root_node) edits.sort(key=lambda x: x[0], reverse=True) - code_bytes = bytearray(code, "utf8") + code_bytearray = bytearray(code_bytes) for start, end, replacement in edits: - code_bytes[start:end] = bytes(replacement, "utf8") - result = code_bytes.decode("utf8") + code_bytearray[start:end] = bytes(replacement, "utf8") + result = code_bytearray.decode("utf8") result = re.sub(r'\n\s*\n\s*\n+', '\n\n', result) return result.strip() + "\n" @@ -412,6 +415,7 @@ class ASTParser: Returns the full source code for a specific definition by name. Supports 'ClassName::method' or 'method' for C++. """ + code_bytes = code.encode("utf8") tree = self.get_cached_tree(path, code) parts = re.split(r'::|\.', name) @@ -429,9 +433,9 @@ class ASTParser: check_node = sub break - is_interesting = check_node.type in ("function_definition", "class_definition", "class_specifier", "struct_specifier", "namespace_definition", "template_declaration", "field_declaration") + is_interesting = check_node.type in ("function_definition", "class_definition", "class_specifier", "struct_specifier", "enum_specifier", "enum_definition", "namespace_definition", "template_declaration", "field_declaration") if is_interesting: - node_name = self._get_name(check_node, code) + node_name = self._get_name(check_node, code_bytes) if node_name == target: if len(target_parts) == 1: return check_node if child.type != "field_declaration" else child @@ -442,14 +446,14 @@ class ASTParser: body = check_node.child_by_field_name("body") if not body and check_node.type == "template_declaration": for sub in check_node.children: - if sub.type in ("function_definition", "class_definition", "class_specifier", "struct_specifier"): + if sub.type in ("function_definition", "class_definition", "class_specifier", "struct_specifier", "enum_specifier", "enum_definition"): body = sub.child_by_field_name("body") break if body: found = walk(body, next_parts) if found: return found for sub in check_node.children: - if sub.type in ("field_declaration_list", "class_body", "declaration_list"): + if sub.type in ("field_declaration_list", "class_body", "declaration_list", "enum_body"): found = walk(sub, next_parts) if found: return found elif child.type in ("module", "translation_unit", "namespace_definition", "declaration_list", "field_declaration_list", "class_body"): @@ -458,8 +462,8 @@ class ASTParser: return None def deep_search(node: tree_sitter.Node, target: str) -> Optional[tree_sitter.Node]: - if node.type in ("function_definition", "class_definition", "class_specifier", "struct_specifier", "namespace_definition", "template_declaration"): - if self._get_name(node, code) == target: + if node.type in ("function_definition", "class_definition", "class_specifier", "struct_specifier", "enum_specifier", "enum_definition", "namespace_definition", "template_declaration"): + if self._get_name(node, code_bytes) == target: return node for child in node.children: res = deep_search(child, target) @@ -471,7 +475,7 @@ class ASTParser: found_node = deep_search(tree.root_node, name) if found_node: - return code[found_node.start_byte:found_node.end_byte] + return code_bytes[found_node.start_byte:found_node.end_byte].decode("utf8", errors="replace") return f"ERROR: definition '{name}' not found" def get_signature(self, code: str, name: str, path: Optional[str] = None) -> str: @@ -479,6 +483,7 @@ class ASTParser: Returns only the signature part of a function or method. For C/C++, this is the code from the start of the definition until the block start '{'. """ + code_bytes = code.encode("utf8") tree = self.get_cached_tree(path, code) parts = re.split(r'::|\.', name) @@ -496,9 +501,9 @@ class ASTParser: check_node = sub break - is_interesting = check_node.type in ("function_definition", "class_definition", "class_specifier", "struct_specifier", "namespace_definition", "template_declaration", "field_declaration") + is_interesting = check_node.type in ("function_definition", "class_definition", "class_specifier", "struct_specifier", "enum_specifier", "enum_definition", "namespace_definition", "template_declaration", "field_declaration") if is_interesting: - node_name = self._get_name(check_node, code) + node_name = self._get_name(check_node, code_bytes) if node_name == target: if len(target_parts) == 1: return check_node if child.type != "field_declaration" else child @@ -509,14 +514,14 @@ class ASTParser: body = check_node.child_by_field_name("body") if not body and check_node.type == "template_declaration": for sub in check_node.children: - if sub.type in ("function_definition", "class_definition", "class_specifier", "struct_specifier"): + if sub.type in ("function_definition", "class_definition", "class_specifier", "struct_specifier", "enum_specifier", "enum_definition"): body = sub.child_by_field_name("body") break if body: found = walk(body, next_parts) if found: return found for sub in check_node.children: - if sub.type in ("field_declaration_list", "class_body", "declaration_list"): + if sub.type in ("field_declaration_list", "class_body", "declaration_list", "enum_body"): found = walk(sub, next_parts) if found: return found elif child.type in ("module", "translation_unit", "namespace_definition", "declaration_list", "field_declaration_list", "class_body"): @@ -526,7 +531,7 @@ class ASTParser: def deep_search(node: tree_sitter.Node, target: str) -> Optional[tree_sitter.Node]: if node.type in ("function_definition", "template_declaration"): - if self._get_name(node, code) == target: + if self._get_name(node, code_bytes) == target: return node for child in node.children: res = deep_search(child, target) @@ -547,8 +552,8 @@ class ASTParser: body = target_node.child_by_field_name("body") if body: - return code[found_node.start_byte:body.start_byte].strip() - return code[found_node.start_byte:found_node.end_byte].strip() + return code_bytes[found_node.start_byte:body.start_byte].decode("utf8", errors="replace").strip() + return code_bytes[found_node.start_byte:found_node.end_byte].decode("utf8", errors="replace").strip() return f"ERROR: signature for '{name}' not found" @@ -556,6 +561,7 @@ class ASTParser: """ Returns a hierarchical outline of the code (classes, structs, functions, methods). """ + code_bytes = code.encode("utf8") tree = self.get_cached_tree(path, code) output = [] @@ -570,7 +576,7 @@ class ASTParser: label = "[Method]" if indent > 0 else "[Func]" if label: - name = self._get_name(node, code) + name = self._get_name(node, code_bytes) if name: start = node.start_point.row + 1 end = node.end_point.row + 1 @@ -591,6 +597,7 @@ class ASTParser: """ Surgically replace the definition of a class or function by name. """ + code_bytes = code.encode("utf8") tree = self.get_cached_tree(path, code) parts = re.split(r'::|\.', name) @@ -608,9 +615,9 @@ class ASTParser: check_node = sub break - is_interesting = check_node.type in ("function_definition", "class_definition", "class_specifier", "struct_specifier", "namespace_definition", "template_declaration", "field_declaration") + is_interesting = check_node.type in ("function_definition", "class_definition", "class_specifier", "struct_specifier", "enum_specifier", "enum_definition", "namespace_definition", "template_declaration", "field_declaration") if is_interesting: - node_name = self._get_name(check_node, code) + node_name = self._get_name(check_node, code_bytes) if node_name == target: if len(target_parts) == 1: return check_node if child.type != "field_declaration" else child @@ -621,14 +628,14 @@ class ASTParser: body = check_node.child_by_field_name("body") if not body and check_node.type == "template_declaration": for sub in check_node.children: - if sub.type in ("function_definition", "class_definition", "class_specifier", "struct_specifier"): + if sub.type in ("function_definition", "class_definition", "class_specifier", "struct_specifier", "enum_specifier", "enum_definition"): body = sub.child_by_field_name("body") break if body: found = walk(body, next_parts) if found: return found for sub in check_node.children: - if sub.type in ("field_declaration_list", "class_body", "declaration_list"): + if sub.type in ("field_declaration_list", "class_body", "declaration_list", "enum_body"): found = walk(sub, next_parts) if found: return found elif child.type in ("module", "translation_unit", "namespace_definition", "declaration_list", "field_declaration_list", "class_body"): @@ -637,8 +644,8 @@ class ASTParser: return None def deep_search(node: tree_sitter.Node, target: str) -> Optional[tree_sitter.Node]: - if node.type in ("function_definition", "class_definition", "class_specifier", "struct_specifier", "namespace_definition", "template_declaration"): - if self._get_name(node, code) == target: + if node.type in ("function_definition", "class_definition", "class_specifier", "struct_specifier", "enum_specifier", "enum_definition", "namespace_definition", "template_declaration"): + if self._get_name(node, code_bytes) == target: return node for child in node.children: res = deep_search(child, target) @@ -650,9 +657,9 @@ class ASTParser: found_node = deep_search(tree.root_node, name) if found_node: - code_bytes = bytearray(code, "utf8") - code_bytes[found_node.start_byte:found_node.end_byte] = bytes(new_content, "utf8") - return code_bytes.decode("utf8") + code_bytearray = bytearray(code_bytes) + code_bytearray[found_node.start_byte:found_node.end_byte] = bytes(new_content, "utf8") + return code_bytearray.decode("utf8") return f"ERROR: definition '{name}' not found" def reset_client() -> None: diff --git a/tests/assets/gencpp_samples/base_component.h b/tests/assets/cpp_samples/base_component.h similarity index 100% rename from tests/assets/gencpp_samples/base_component.h rename to tests/assets/cpp_samples/base_component.h diff --git a/tests/assets/gencpp_samples/complex_template.h b/tests/assets/cpp_samples/complex_template.h similarity index 100% rename from tests/assets/gencpp_samples/complex_template.h rename to tests/assets/cpp_samples/complex_template.h diff --git a/tests/assets/gencpp_samples/component_registry.cpp b/tests/assets/cpp_samples/component_registry.cpp similarity index 100% rename from tests/assets/gencpp_samples/component_registry.cpp rename to tests/assets/cpp_samples/component_registry.cpp diff --git a/tests/assets/gencpp_samples/component_registry.h b/tests/assets/cpp_samples/component_registry.h similarity index 100% rename from tests/assets/gencpp_samples/component_registry.h rename to tests/assets/cpp_samples/component_registry.h diff --git a/tests/assets/gencpp_samples/verify_samples.py b/tests/assets/cpp_samples/verify_samples.py similarity index 100% rename from tests/assets/gencpp_samples/verify_samples.py rename to tests/assets/cpp_samples/verify_samples.py diff --git a/tests/assets/gencpp_samples/ast.hpp b/tests/assets/gencpp_samples/ast.hpp new file mode 100644 index 0000000..24006b7 --- /dev/null +++ b/tests/assets/gencpp_samples/ast.hpp @@ -0,0 +1,457 @@ +#ifdef INTELLISENSE_DIRECTIVES +#pragma once +#include "parser_types.hpp" +#endif + +/* + ______ ______ ________ __ __ ______ __ + / \ / \| \ | \ | \ / \ | \ +| ▓▓▓▓▓▓\ ▓▓▓▓▓▓\\▓▓▓▓▓▓▓▓ | ▓▓\ | ▓▓ | ▓▓▓▓▓▓\ ______ ____| ▓▓ ______ +| ▓▓__| ▓▓ ▓▓___\▓▓ | ▓▓ | ▓▓▓\| ▓▓ | ▓▓ \▓▓/ \ / ▓▓/ \ +| ▓▓ ▓▓\▓▓ \ | ▓▓ | ▓▓▓▓\ ▓▓ | ▓▓ | ▓▓▓▓▓▓\ ▓▓▓▓▓▓▓ ▓▓▓▓▓▓\ +| ▓▓▓▓▓▓▓▓_\▓▓▓▓▓▓\ | ▓▓ | ▓▓\▓▓ ▓▓ | ▓▓ __| ▓▓ | ▓▓ ▓▓ | ▓▓ ▓▓ ▓▓ +| ▓▓ | ▓▓ \__| ▓▓ | ▓▓ | ▓▓ \▓▓▓▓ | ▓▓__/ \ ▓▓__/ ▓▓ ▓▓__| ▓▓ ▓▓▓▓▓▓▓▓ +| ▓▓ | ▓▓\▓▓ ▓▓ | ▓▓ | ▓▓ \▓▓▓ \▓▓ ▓▓\▓▓ ▓▓\▓▓ ▓▓\▓▓ \ + \▓▓ \▓▓ \▓▓▓▓▓▓ \▓▓ \▓▓ \▓▓ \▓▓▓▓▓▓ \▓▓▓▓▓▓ \▓▓▓▓▓▓▓ \▓▓▓▓▓▓▓ +*/ + +struct AST; +struct AST_Body; +struct AST_Attributes; +struct AST_Comment; +struct AST_Constructor; +// struct AST_BaseClass; +struct AST_Class; +struct AST_Define; +struct AST_DefineParams; +struct AST_Destructor; +struct AST_Enum; +struct AST_Exec; +struct AST_Extern; +struct AST_Include; +struct AST_Friend; +struct AST_Fn; +struct AST_Module; +struct AST_NS; +struct AST_Operator; +struct AST_OpCast; +struct AST_Params; +struct AST_Pragma; +struct AST_PreprocessCond; +struct AST_Specifiers; + +#ifdef GEN_EXECUTION_EXPRESSION_SUPPORT +struct AST_Expr; +struct AST_Expr_Assign; +struct AST_Expr_Alignof; +struct AST_Expr_Binary; +struct AST_Expr_CStyleCast; +struct AST_Expr_FunctionalCast; +struct AST_Expr_CppCast; +struct AST_Expr_ProcCall; +struct AST_Expr_Decltype; +struct AST_Expr_Comma; // TODO(Ed) : This is a binary op not sure if it needs its own AST... +struct AST_Expr_AMS; // Access Member Symbol +struct AST_Expr_Sizeof; +struct AST_Expr_Subscript; +struct AST_Expr_Ternary; +struct AST_Expr_UnaryPrefix; +struct AST_Expr_UnaryPostfix; +struct AST_Expr_Element; + +struct AST_Stmt; +struct AST_Stmt_Break; +struct AST_Stmt_Case; +struct AST_Stmt_Continue; +struct AST_Stmt_Decl; +struct AST_Stmt_Do; +struct AST_Stmt_Expr; // TODO(Ed) : Is this distinction needed? (Should it be a flag instead?) +struct AST_Stmt_Else; +struct AST_Stmt_If; +struct AST_Stmt_For; +struct AST_Stmt_Goto; +struct AST_Stmt_Label; +struct AST_Stmt_Switch; +struct AST_Stmt_While; +#endif + +struct AST_Struct; +struct AST_Template; +struct AST_Typename; +struct AST_Typedef; +struct AST_Union; +struct AST_Using; +struct AST_Var; + +#if GEN_COMPILER_C +typedef AST* Code; +#else +struct Code; +#endif + +#if GEN_COMPILER_C +typedef AST_Body* CodeBody; +typedef AST_Attributes* CodeAttributes; +typedef AST_Comment* CodeComment; +typedef AST_Class* CodeClass; +typedef AST_Constructor* CodeConstructor; +typedef AST_Define* CodeDefine; +typedef AST_DefineParams* CodeDefineParams; +typedef AST_Destructor* CodeDestructor; +typedef AST_Enum* CodeEnum; +typedef AST_Exec* CodeExec; +typedef AST_Extern* CodeExtern; +typedef AST_Include* CodeInclude; +typedef AST_Friend* CodeFriend; +typedef AST_Fn* CodeFn; +typedef AST_Module* CodeModule; +typedef AST_NS* CodeNS; +typedef AST_Operator* CodeOperator; +typedef AST_OpCast* CodeOpCast; +typedef AST_Params* CodeParams; +typedef AST_PreprocessCond* CodePreprocessCond; +typedef AST_Pragma* CodePragma; +typedef AST_Specifiers* CodeSpecifiers; +#else +struct CodeBody; +struct CodeAttributes; +struct CodeComment; +struct CodeClass; +struct CodeConstructor; +struct CodeDefine; +struct CodeDefineParams; +struct CodeDestructor; +struct CodeEnum; +struct CodeExec; +struct CodeExtern; +struct CodeInclude; +struct CodeFriend; +struct CodeFn; +struct CodeModule; +struct CodeNS; +struct CodeOperator; +struct CodeOpCast; +struct CodeParams; +struct CodePreprocessCond; +struct CodePragma; +struct CodeSpecifiers; +#endif + +#ifdef GEN_EXECUTION_EXPRESSION_SUPPORT + +#if GEN_COMPILER_C +typedef AST_Expr* CodeExpr; +typedef AST_Expr_Assign* CodeExpr_Assign; +typedef AST_Expr_Alignof* CodeExpr_Alignof; +typedef AST_Expr_Binary* CodeExpr_Binary; +typedef AST_Expr_CStyleCast* CodeExpr_CStyleCast; +typedef AST_Expr_FunctionalCast* CodeExpr_FunctionalCast; +typedef AST_Expr_CppCast* CodeExpr_CppCast; +typedef AST_Expr_Element* CodeExpr_Element; +typedef AST_Expr_ProcCall* CodeExpr_ProcCall; +typedef AST_Expr_Decltype* CodeExpr_Decltype; +typedef AST_Expr_Comma* CodeExpr_Comma; +typedef AST_Expr_AMS* CodeExpr_AMS; // Access Member Symbol +typedef AST_Expr_Sizeof* CodeExpr_Sizeof; +typedef AST_Expr_Subscript* CodeExpr_Subscript; +typedef AST_Expr_Ternary* CodeExpr_Ternary; +typedef AST_Expr_UnaryPrefix* CodeExpr_UnaryPrefix; +typedef AST_Expr_UnaryPostfix* CodeExpr_UnaryPostfix; +#else +struct CodeExpr; +struct CodeExpr_Assign; +struct CodeExpr_Alignof; +struct CodeExpr_Binary; +struct CodeExpr_CStyleCast; +struct CodeExpr_FunctionalCast; +struct CodeExpr_CppCast; +struct CodeExpr_Element; +struct CodeExpr_ProcCall; +struct CodeExpr_Decltype; +struct CodeExpr_Comma; +struct CodeExpr_AMS; // Access Member Symbol +struct CodeExpr_Sizeof; +struct CodeExpr_Subscript; +struct CodeExpr_Ternary; +struct CodeExpr_UnaryPrefix; +struct CodeExpr_UnaryPostfix; +#endif + +#if GEN_COMPILER_C +typedef AST_Stmt* CodeStmt; +typedef AST_Stmt_Break* CodeStmt_Break; +typedef AST_Stmt_Case* CodeStmt_Case; +typedef AST_Stmt_Continue* CodeStmt_Continue; +typedef AST_Stmt_Decl* CodeStmt_Decl; +typedef AST_Stmt_Do* CodeStmt_Do; +typedef AST_Stmt_Expr* CodeStmt_Expr; +typedef AST_Stmt_Else* CodeStmt_Else; +typedef AST_Stmt_If* CodeStmt_If; +typedef AST_Stmt_For* CodeStmt_For; +typedef AST_Stmt_Goto* CodeStmt_Goto; +typedef AST_Stmt_Label* CodeStmt_Label; +typedef AST_Stmt_Lambda* CodeStmt_Lambda; +typedef AST_Stmt_Switch* CodeStmt_Switch; +typedef AST_Stmt_While* CodeStmt_While; +#else +struct CodeStmt; +struct CodeStmt_Break; +struct CodeStmt_Case; +struct CodeStmt_Continue; +struct CodeStmt_Decl; +struct CodeStmt_Do; +struct CodeStmt_Expr; +struct CodeStmt_Else; +struct CodeStmt_If; +struct CodeStmt_For; +struct CodeStmt_Goto; +struct CodeStmt_Label; +struct CodeStmt_Lambda; +struct CodeStmt_Switch; +struct CodeStmt_While; +#endif + +// GEN_EXECUTION_EXPRESSION_SUPPORT +#endif + +#if GEN_COMPILER_C +typedef AST_Struct* CodeStruct; +typedef AST_Template* CodeTemplate; +typedef AST_Typename* CodeTypename; +typedef AST_Typedef* CodeTypedef; +typedef AST_Union* CodeUnion; +typedef AST_Using* CodeUsing; +typedef AST_Var* CodeVar; +#else +struct CodeStruct; +struct CodeTemplate; +struct CodeTypename; +struct CodeTypedef; +struct CodeUnion; +struct CodeUsing; +struct CodeVar; +#endif + +#if GEN_COMPILER_CPP +template< class Type> forceinline Type tmpl_cast( Code self ) { return * rcast( Type*, & self ); } +#endif + +#pragma region Code C-Interface + + void code_append (Code code, Code other ); +GEN_API Str code_debug_str (Code code); +GEN_API Code code_duplicate (Code code); + Code* code_entry (Code code, u32 idx ); + bool code_has_entries (Code code); + bool code_is_body (Code code); +GEN_API bool code_is_equal (Code code, Code other); + bool code_is_valid (Code code); + void code_set_global (Code code); +GEN_API StrBuilder code_to_strbuilder (Code self ); +GEN_API void code_to_strbuilder_ref(Code self, StrBuilder* result ); + Str code_type_str (Code self ); +GEN_API bool code_validate_body (Code self ); + +#pragma endregion Code C-Interface + +#if GEN_COMPILER_CPP +/* + AST* wrapper + - Not constantly have to append the '*' as this is written often.. + - Allows for implicit conversion to any of the ASTs (raw or filtered). +*/ +struct Code +{ + AST* ast; + +# define Using_Code( Typename ) \ + forceinline Str debug_str() { return code_debug_str(* this); } \ + forceinline Code duplicate() { return code_duplicate(* this); } \ + forceinline bool is_equal( Code other ) { return code_is_equal(* this, other); } \ + forceinline bool is_body() { return code_is_body(* this); } \ + forceinline bool is_valid() { return code_is_valid(* this); } \ + forceinline void set_global() { return code_set_global(* this); } + +# define Using_CodeOps( Typename ) \ + forceinline Typename& operator = ( Code other ); \ + forceinline bool operator ==( Code other ) { return (AST*)ast == other.ast; } \ + forceinline bool operator !=( Code other ) { return (AST*)ast != other.ast; } \ + forceinline bool operator ==(std::nullptr_t) const { return ast == nullptr; } \ + forceinline bool operator !=(std::nullptr_t) const { return ast != nullptr; } \ + operator bool(); + +#if ! GEN_C_LIKE_CPP + Using_Code( Code ); + forceinline void append(Code other) { return code_append(* this, other); } + forceinline Code* entry(u32 idx) { return code_entry(* this, idx); } + forceinline bool has_entries() { return code_has_entries(* this); } + forceinline StrBuilder to_strbuilder() { return code_to_strbuilder(* this); } + forceinline void to_strbuilder(StrBuilder& result) { return code_to_strbuilder_ref(* this, & result); } + forceinline Str type_str() { return code_type_str(* this); } + forceinline bool validate_body() { return code_validate_body(*this); } +#endif + + Using_CodeOps( Code ); + forceinline Code operator *() { return * this; } // Required to support for-range iteration. + forceinline AST* operator ->() { return ast; } + + Code& operator ++(); + +#ifdef GEN_ENFORCE_STRONG_CODE_TYPES +# define operator explicit operator +#endif + operator CodeBody() const; + operator CodeAttributes() const; + // operator CodeBaseClass() const; + operator CodeComment() const; + operator CodeClass() const; + operator CodeConstructor() const; + operator CodeDefine() const; + operator CodeDefineParams() const; + operator CodeDestructor() const; + operator CodeExec() const; + operator CodeEnum() const; + operator CodeExtern() const; + operator CodeInclude() const; + operator CodeFriend() const; + operator CodeFn() const; + operator CodeModule() const; + operator CodeNS() const; + operator CodeOperator() const; + operator CodeOpCast() const; + operator CodeParams() const; + operator CodePragma() const; + operator CodePreprocessCond() const; + operator CodeSpecifiers() const; + operator CodeStruct() const; + operator CodeTemplate() const; + operator CodeTypename() const; + operator CodeTypedef() const; + operator CodeUnion() const; + operator CodeUsing() const; + operator CodeVar() const; + #undef operator +}; +#endif + +#pragma region Statics +// Used to identify ASTs that should always be duplicated. (Global constant ASTs) +GEN_API extern Code Code_Global; + +// Used to identify invalid generated code. +GEN_API extern Code Code_Invalid; +#pragma endregion Statics + +struct Code_POD +{ + AST* ast; +}; +static_assert( sizeof(Code) == sizeof(Code_POD), "ERROR: Code is not POD" ); + +// Desired width of the AST data structure. +constexpr int const AST_POD_Size = 128; + +constexpr static +int AST_ArrSpecs_Cap = +( + AST_POD_Size + - sizeof(Code) + - sizeof(StrCached) + - sizeof(Code) * 2 + - sizeof(Token*) + - sizeof(Code) + - sizeof(CodeType) + - sizeof(ModuleFlag) + - sizeof(u32) +) +/ sizeof(Specifier) - 1; + +/* + Simple AST POD with functionality to seralize into C++ syntax. + TODO(Ed): Eventually haven't a transparent AST like this will longer be viable once statements & expressions are in (most likely....) +*/ +struct AST +{ + union { + struct + { + Code InlineCmt; // Class, Constructor, Destructor, Enum, Friend, Functon, Operator, OpCast, Struct, Typedef, Using, Variable + Code Attributes; // Class, Enum, Function, Struct, Typedef, Union, Using, Variable // TODO(Ed): Parameters can have attributes + Code Specs; // Class, Destructor, Function, Operator, Struct, Typename, Variable + union { + Code InitializerList; // Constructor + Code ParentType; // Class, Struct, ParentType->Next has a possible list of interfaces. + Code ReturnType; // Function, Operator, Typename + Code UnderlyingType; // Enum, Typedef + Code ValueType; // Parameter, Variable + }; + union { + Code Macro; // Parameter + Code BitfieldSize; // Variable (Class/Struct Data Member) + Code Params; // Constructor, Define, Function, Operator, Template, Typename + Code UnderlyingTypeMacro; // Enum + }; + union { + Code ArrExpr; // Typename + Code Body; // Class, Constructor, Define, Destructor, Enum, Friend, Function, Namespace, Struct, Union + Code Declaration; // Friend, Template + Code Value; // Parameter, Variable + }; + union { + Code NextVar; // Variable + Code SuffixSpecs; // Typename, Function (Thanks Unreal) + Code PostNameMacro; // Only used with parameters for specifically UE_REQUIRES (Thanks Unreal) + }; + }; + StrCached Content; // Attributes, Comment, Execution, Include + TokenSlice ContentToks; // TODO(Ed): Use a token slice for content + struct { + Specifier ArrSpecs[AST_ArrSpecs_Cap]; // Specifiers + Code NextSpecs; // Specifiers; If ArrSpecs is full, then NextSpecs is used. + }; + }; + StrCached Name; + union { + Code Prev; + Code Front; + Code Last; + }; + union { + Code Next; + Code Back; + }; + Token* Token; // Reference to starting token, only available if it was derived from parsing. // TODO(Ed): Change this to a token slice. + Code Parent; + CodeType Type; +// CodeFlag CodeFlags; + ModuleFlag ModuleFlags; + union { + b32 IsFunction; // Used by typedef to not serialize the name field. + struct { + b16 IsParamPack; // Used by typename to know if type should be considered a parameter pack. + ETypenameTag TypeTag; // Used by typename to keep track of explicitly declared tags for the identifier (enum, struct, union) + }; + Operator Op; + AccessSpec ParentAccess; + s32 NumEntries; + s32 VarParenthesizedInit; // Used by variables to know that initialization is using a constructor expression instead of an assignment expression. + }; +}; +static_assert( sizeof(AST) == AST_POD_Size, "ERROR: AST is not size of AST_POD_Size" ); + +#if GEN_COMPILER_CPP +// Uses an implicitly overloaded cast from the AST to the desired code type. +// Necessary if the user wants GEN_ENFORCE_STRONG_CODE_TYPES +struct InvalidCode_ImplictCaster; +#define InvalidCode (InvalidCode_ImplictCaster{}) +#else +#define InvalidCode (void*){ (void*)Code_Invalid } +#endif + +#if GEN_COMPILER_CPP +struct NullCode_ImplicitCaster; +// Used when the its desired when omission is allowed in a definition. +#define NullCode (NullCode_ImplicitCaster{}) +#else +#define NullCode nullptr +#endif \ No newline at end of file diff --git a/tests/assets/gencpp_samples/parser.cpp b/tests/assets/gencpp_samples/parser.cpp new file mode 100644 index 0000000..7860c25 --- /dev/null +++ b/tests/assets/gencpp_samples/parser.cpp @@ -0,0 +1,5784 @@ +#ifdef INTELLISENSE_DIRECTIVES +#pragma once +#include "gen/etoktype.hpp" +#include "parser_case_macros.cpp" +#include "interface.upfront.cpp" +#include "lexer.cpp" +#endif + +// TODO(Ed) : Rename ETok_Capture_Start, ETok_Capture_End to Open_Parenthesis adn Close_Parenthesis + +constexpr bool lex_dont_skip_formatting = false; +constexpr bool lex_skip_formatting = true; + +void parser_push( ParseContext* ctx, ParseStackNode* node ) +{ + node->prev = ctx->scope; + ctx->scope = node; + +#if 0 && GEN_BUILD_DEBUG + log_fmt("\tEntering parser: %.*s\n", Scope->ProcName.Len, Scope->ProcName.Ptr ); +#endif +} + +void parser_pop(ParseContext* ctx) +{ +#if 0 && GEN_BUILD_DEBUG + log_fmt("\tPopping parser: %.*s\n", Scope->ProcName.Len, Scope->ProcName.Ptr ); +#endif + ctx->scope = ctx->scope->prev; +} + +StrBuilder parser_to_strbuilder(ParseContext const* ctx, AllocatorInfo temp) +{ + StrBuilder result = strbuilder_make_reserve( temp, kilobytes(4) ); + + Token scope_start = * ctx->scope->start; + Token last_valid = (ctx->token_id >= ctx->tokens.num) ? ctx->tokens.ptr[ctx->tokens.num -1] : (* lex_peek(ctx, true)); + + sptr length = scope_start.Text.Len; + char const* current = scope_start.Text.Ptr + length; + while ( current <= ctx->tokens.ptr[ctx->tokens.num - 1].Text.Ptr && (* current) != '\n' && length < 74 ) + { + current++; + length++; + } + + Str scope_str = { scope_start.Text.Ptr, length }; + StrBuilder line = strbuilder_make_str( temp, scope_str ); + strbuilder_append_fmt( & result, "\tScope : %s\n", line ); + strbuilder_free(& line); + + sptr dist = (sptr)last_valid.Text.Ptr - (sptr)scope_start.Text.Ptr + 2; + sptr length_from_err = dist; + + Str err_str = { last_valid.Text.Ptr, length_from_err }; + StrBuilder line_from_err = strbuilder_make_str( temp, err_str ); + + if ( length_from_err < 100 ) + strbuilder_append_fmt(& result, "\t(%d, %d):%*c\n", last_valid.Line, last_valid.Column, length_from_err, '^' ); + else + strbuilder_append_fmt(& result, "\t(%d, %d)\n", last_valid.Line, last_valid.Column ); + + ParseStackNode* curr_scope = ctx->scope; + s32 level = 0; + do + { + if ( curr_scope->name.Ptr ) { + strbuilder_append_fmt(& result, "\t%d: %S, AST Name: %S\n", level, curr_scope->proc_name, curr_scope->name ); + } + else { + strbuilder_append_fmt(& result, "\t%d: %S\n", level, curr_scope->proc_name ); + } + + curr_scope = curr_scope->prev; + level++; + } + while ( curr_scope ); + return result; +} + +bool lex__eat(Context* ctx, ParseContext* parser, TokType type) +{ + if ( parser->tokens.num - parser->token_id <= 0 ) { + log_failure( "No tokens left.\n%SB", parser_to_strbuilder(parser, ctx->Allocator_Temp) ); + return false; + } + + Token at_idx = parser->tokens.ptr[ parser->token_id ]; + + if ( ( at_idx.Type == Tok_NewLine && type != Tok_NewLine ) + || ( at_idx.Type == Tok_Comment && type != Tok_Comment ) ) + { + parser->token_id ++; + } + + b32 not_accepted = at_idx.Type != type; + b32 is_identifier = at_idx.Type == Tok_Identifier; + if ( not_accepted ) + { + Macro* macro = lookup_macro(at_idx.Text); + b32 accept_as_identifier = macro && bitfield_is_set(MacroFlags, macro->Flags, MF_Allow_As_Identifier ); + not_accepted = type == Tok_Identifier && accept_as_identifier ? false : true; + } + if ( not_accepted ) + { + Token tok = * lex_current( parser, lex_skip_formatting ); + log_failure( "Parse Error, TokArray::eat, Expected: ' %S ' not ' %S ' (%d, %d)`\n%SB" + , toktype_to_str(type) + , at_idx.Text + , tok.Line + , tok.Column + , parser_to_strbuilder(parser, ctx->Allocator_Temp) + ); + GEN_DEBUG_TRAP(); + return false; + } + +#if 0 && GEN_BUILD_DEBUG + log_fmt("Ate: %SB\n", self->Arr[Idx].to_strbuilder() ); +#endif + + parser->token_id ++; + return true; +} + +internal +void parser_init(Context* ctx) +{ +} + +internal +void parser_deinit(Context* ctx) +{ +} + +#pragma region Helper Macros + +#define check_parse_args( def ) _check_parse_args(& ctx->parser, def, stringize(_func_) ) +bool _check_parse_args(ParseContext* parser, Str def, char const* func_name ) +{ + if ( def.Len <= 0 ) + { + log_failure( c_str_fmt_buf("gen::%s: length must greater than 0", func_name) ); + parser_pop(parser); + return false; + } + if ( def.Ptr == nullptr ) + { + log_failure( c_str_fmt_buf("gen::%s: def was null", func_name) ); + parser_pop(parser); + return false; + } + return true; +} + +# define currtok_noskip (* lex_current( & ctx->parser, lex_dont_skip_formatting )) +# define currtok (* lex_current( & ctx->parser, lex_skip_formatting )) +# define peektok (* lex_peek(& ctx->parser, lex_skip_formatting)) +# define prevtok (* lex_previous( & ctx->parser, lex_dont_skip_formatting)) +# define nexttok (* lex_next( & ctx->parser, lex_skip_formatting )) +# define nexttok_noskip (* lex_next( & ctx->parser, lex_dont_skip_formatting)) +# define eat( Type_ ) lex__eat(ctx, & ctx->parser, Type_ ) +# define left ( ctx->parser.tokens.num - ctx->parser.token_id ) + +#if GEN_COMPILER_CPP +# define def_assign( ... ) { __VA_ARGS__ } +#else +# define def_assign( ... ) __VA_ARGS__ +#endif + +#ifdef check +#define CHECK_WAS_DEFINED +#pragma push_macro("check") +#undef check +#endif + +# define check_noskip( Type_ ) ( left && currtok_noskip.Type == Type_ ) +# define check( Type_ ) ( left && currtok.Type == Type_ ) + +#if GEN_COMPILER_CPP +# define NullScope { nullptr, {nullptr, 0}, lex_current( & ctx->parser, lex_dont_skip_formatting ), Str{nullptr, 0}, txt( __func__ ), { nullptr} } +#else +# define NullScope (ParseStackNode){ nullptr, {nullptr, 0}, lex_current( & ctx->parser, lex_dont_skip_formatting ), (Str){nullptr, 0}, txt( __func__ ), { nullptr} } +#endif + +#pragma endregion Helper Macros + +// Procedure Forwards ( Entire parser internal parser interface ) + +internal Code parse_array_decl (Context* ctx); +internal CodeAttributes parse_attributes (Context* ctx); +internal CodeComment parse_comment (Context* ctx); +internal Code parse_complicated_definition (Context* ctx, TokType which); +internal CodeBody parse_class_struct_body (Context* ctx, TokType which, Token name); +internal Code parse_class_struct (Context* ctx, TokType which, bool inplace_def); +internal Code parse_expression (Context* ctx); +internal Code parse_forward_or_definition (Context* ctx, TokType which, bool is_inplace); +internal CodeFn parse_function_after_name (Context* ctx, ModuleFlag mflags, CodeAttributes attributes, CodeSpecifiers specifiers, CodeTypename ret_type, Token name); +internal Code parse_function_body (Context* ctx); +internal CodeBody parse_global_nspace (Context* ctx, CodeType which); +internal Code parse_global_nspace_constructor_destructor(Context* ctx, CodeSpecifiers specifiers); +internal Token parse_identifier (Context* ctx, bool* possible_member_function); +internal CodeInclude parse_include (Context* ctx); +internal Code parse_macro_as_definiton (Context* ctx, CodeAttributes attributes, CodeSpecifiers specifiers); +internal CodeOperator parse_operator_after_ret_type (Context* ctx, ModuleFlag mflags, CodeAttributes attributes, CodeSpecifiers specifiers, CodeTypename ret_type); +internal Code parse_operator_function_or_variable(Context* ctx, bool expects_function, CodeAttributes attributes, CodeSpecifiers specifiers); +internal CodePragma parse_pragma (Context* ctx); +internal CodeParams parse_params (Context* ctx, bool use_template_capture); +internal CodePreprocessCond parse_preprocess_cond (Context* ctx); +internal Code parse_simple_preprocess (Context* ctx, TokType which); +internal Code parse_static_assert (Context* ctx); +internal void parse_template_args (Context* ctx, Token* token ); +internal CodeVar parse_variable_after_name (Context* ctx, ModuleFlag mflags, CodeAttributes attributes, CodeSpecifiers specifiers, CodeTypename type, Str name); +internal CodeVar parse_variable_declaration_list (Context* ctx); + +internal CodeClass parser_parse_class (Context* ctx, bool inplace_def ); +internal CodeConstructor parser_parse_constructor (Context* ctx, CodeSpecifiers specifiers ); +internal CodeDefine parser_parse_define (Context* ctx); +internal CodeDestructor parser_parse_destructor (Context* ctx, CodeSpecifiers specifiers ); +internal CodeEnum parser_parse_enum (Context* ctx, bool inplace_def ); +internal CodeBody parser_parse_export_body (Context* ctx); +internal CodeBody parser_parse_extern_link_body(Context* ctx); +internal CodeExtern parser_parse_extern_link (Context* ctx); +internal CodeFriend parser_parse_friend (Context* ctx); +internal CodeFn parser_parse_function (Context* ctx); +internal CodeNS parser_parse_namespace (Context* ctx); +internal CodeOpCast parser_parse_operator_cast (Context* ctx, CodeSpecifiers specifiers ); +internal CodeStruct parser_parse_struct (Context* ctx, bool inplace_def ); +internal CodeVar parser_parse_variable (Context* ctx); +internal CodeTemplate parser_parse_template (Context* ctx); +internal CodeTypename parser_parse_type (Context* ctx, bool from_template, bool* is_function ); +internal CodeTypedef parser_parse_typedef (Context* ctx); +internal CodeUnion parser_parse_union (Context* ctx, bool inplace_def ); +internal CodeUsing parser_parse_using (Context* ctx); + +constexpr bool parser_inplace_def = true; +constexpr bool parser_not_inplace_def = false; +constexpr bool parser_dont_consume_braces = true; +constexpr bool parser_consume_braces = false; +constexpr bool parser_not_from_template = false; + +constexpr bool parser_use_parenthesis = false; + +// Internal parsing functions + +constexpr bool parser_strip_formatting_dont_preserve_newlines = false; +/* + This function was an attempt at stripping formatting from any c++ code. + It has edge case failures that prevent it from being used in function bodies. +*/ +internal +StrBuilder parser_strip_formatting(Context* ctx, Str raw_text, bool preserve_newlines ) +{ + StrBuilder content = strbuilder_make_reserve( ctx->Allocator_Temp, raw_text.Len ); + + if ( raw_text.Len == 0 ) + return content; + +#define cut_length ( scanner - raw_text.Ptr - last_cut ) +#define cut_ptr ( raw_text.Ptr + last_cut ) +#define pos ( rcast( sptr, scanner ) - rcast( sptr, raw_text.Ptr ) ) +#define move_fwd() do { scanner++; tokleft--; } while(0) + + s32 tokleft = raw_text.Len; + sptr last_cut = 0; + char const* scanner = raw_text.Ptr; + + if ( scanner[0] == ' ' ) { + move_fwd(); + last_cut = 1; + } + + bool within_string = false; + bool within_char = false; + bool must_keep_newline = false; + while ( tokleft ) + { + // Skip over the content of string literals + if ( scanner[0] == '"' ) + { + move_fwd(); + + while ( tokleft && ( scanner[0] != '"' || *( scanner - 1 ) == '\\' ) ) + { + if ( scanner[0] == '\\' && tokleft > 1 ) + { + scanner += 2; + tokleft -= 2; + } + else + { + move_fwd(); + } + } + + // Skip the closing " + if ( tokleft ) + move_fwd(); + + strbuilder_append_c_str_len( & content, cut_ptr, cut_length ); + last_cut = rcast(sptr, scanner ) - rcast( sptr, raw_text.Ptr ); + continue; + } + + // Skip over the content of character literals + if ( scanner[0] == '\'' ) + { + move_fwd(); + + while ( tokleft + && ( scanner[0] != '\'' + || ( *(scanner -1 ) == '\\' ) + ) ) + { + move_fwd(); + } + + // Skip the closing ' + if ( tokleft ) + move_fwd(); + + strbuilder_append_c_str_len( & content, cut_ptr, cut_length ); + last_cut = rcast( sptr, scanner ) - rcast( sptr, raw_text.Ptr ); + continue; + } + + // Block comments + if ( tokleft > 1 && scanner[0] == '/' && scanner[1] == '*' ) + { + while ( tokleft > 1 && !(scanner[0] == '*' && scanner[1] == '/') ) + move_fwd(); + + scanner += 2; + tokleft -= 2; + + strbuilder_append_c_str_len( & content, cut_ptr, cut_length ); + last_cut = rcast( sptr, scanner ) - rcast( sptr, raw_text.Ptr ); + continue; + } + + // Line comments + if ( tokleft > 1 && scanner[0] == '/' && scanner[1] == '/' ) + { + must_keep_newline = true; + + scanner += 2; + tokleft -= 2; + + while ( tokleft && scanner[ 0 ] != '\n' ) + move_fwd(); + + if (tokleft) + move_fwd(); + + strbuilder_append_c_str_len( & content, cut_ptr, cut_length ); + last_cut = rcast( sptr, scanner ) - rcast( sptr, raw_text.Ptr ); + continue; + } + + // Tabs + if (scanner[0] == '\t') + { + if (pos > last_cut) + strbuilder_append_c_str_len( & content, cut_ptr, cut_length); + + if ( * strbuilder_back( content ) != ' ' ) + strbuilder_append_char( & content, ' ' ); + + move_fwd(); + last_cut = rcast( sptr, scanner) - rcast( sptr, raw_text.Ptr); + continue; + } + + if ( tokleft > 1 && scanner[0] == '\r' && scanner[1] == '\n' ) + { + if ( must_keep_newline || preserve_newlines ) + { + must_keep_newline = false; + + scanner += 2; + tokleft -= 2; + + strbuilder_append_c_str_len( & content, cut_ptr, cut_length ); + last_cut = rcast( sptr, scanner ) - rcast( sptr, raw_text.Ptr ); + continue; + } + + if ( pos > last_cut ) + strbuilder_append_c_str_len( & content, cut_ptr, cut_length ); + + // Replace with a space + if ( * strbuilder_back( content ) != ' ' ) + strbuilder_append_char( & content, ' ' ); + + scanner += 2; + tokleft -= 2; + + last_cut = rcast( sptr, scanner ) - rcast( sptr, raw_text.Ptr ); + continue; + } + + if ( scanner[0] == '\n' ) + { + if ( must_keep_newline || preserve_newlines ) + { + must_keep_newline = false; + + move_fwd(); + + strbuilder_append_c_str_len( & content, cut_ptr, cut_length ); + last_cut = rcast( sptr, scanner ) - rcast( sptr, raw_text.Ptr ); + continue; + } + + if ( pos > last_cut ) + strbuilder_append_c_str_len( & content, cut_ptr, cut_length ); + + // Replace with a space + if ( * strbuilder_back( content ) != ' ' ) + strbuilder_append_char( & content, ' ' ); + + move_fwd(); + + last_cut = rcast( sptr, scanner ) - rcast( sptr, raw_text.Ptr ); + continue; + } + + // Escaped newlines + if ( scanner[0] == '\\' ) + { + strbuilder_append_c_str_len( & content, cut_ptr, cut_length ); + + s32 amount_to_skip = 1; + if ( tokleft > 1 && scanner[1] == '\n' ) + { + amount_to_skip = 2; + } + else if ( tokleft > 2 && scanner[1] == '\r' && scanner[2] == '\n' ) + { + amount_to_skip = 3; + } + + if ( amount_to_skip > 1 && pos == last_cut ) + { + scanner += amount_to_skip; + tokleft -= amount_to_skip; + } + else + move_fwd(); + + last_cut = rcast( sptr, scanner ) - rcast( sptr, raw_text.Ptr ); + continue; + } + + // Consectuive spaces + if ( tokleft > 1 && char_is_space( scanner[0] ) && char_is_space( scanner[ 1 ] ) ) + { + strbuilder_append_c_str_len( & content, cut_ptr, cut_length ); + do + { + move_fwd(); + } + while ( tokleft && char_is_space( scanner[0] ) ); + + last_cut = rcast( sptr, scanner ) - rcast( sptr, raw_text.Ptr ); + + // Preserve only 1 space of formattting + char* last = strbuilder_back(content); + if ( last == nullptr || * last != ' ' ) + strbuilder_append_char( & content, ' ' ); + + continue; + } + + move_fwd(); + } + + if ( last_cut < raw_text.Len ) { + strbuilder_append_c_str_len( & content, cut_ptr, raw_text.Len - last_cut ); + } + +#undef cut_ptr +#undef cut_length +#undef pos +#undef move_fwd + + return content; +} + +StrBuilder parser_strip_formatting_2(TokenSlice tokens) +{ + // TODO(Ed): Use this to produce strings for validation purposes. We shouldn't serialize down from tokens once we start storing token slices for content. + StrBuilder result = struct_zero(StrBuilder); + return result; +} + +internal +Code parse_array_decl(Context* ctx) +{ + ParseStackNode scope = NullScope; + parser_push(& ctx->parser, & scope ); + + if ( check( Tok_Operator ) && currtok.Text.Ptr[0] == '[' && currtok.Text.Ptr[1] == ']' ) + { + Code array_expr = untyped_str( txt(" ") ); + eat( Tok_Operator ); + // [] + + parser_pop(& ctx->parser); + return array_expr; + } + + if ( check( Tok_BraceSquare_Open ) ) + { + eat( Tok_BraceSquare_Open ); + // [ + + if ( left == 0 ) + { + log_failure( "Error, unexpected end of array declaration ( '[]' scope started )\n%SB", parser_to_strbuilder(& ctx->parser, ctx->Allocator_Temp) ); + parser_pop(& ctx->parser); + return InvalidCode; + } + + if ( currtok.Type == Tok_BraceSquare_Close ) + { + log_failure( "Error, empty array expression in definition\n%SB", parser_to_strbuilder(& ctx->parser, ctx->Allocator_Temp) ); + parser_pop(& ctx->parser); + return InvalidCode; + } + + TokenSlice tokens = { & currtok, 1 }; + Token untyped_tok = currtok; + + while ( left && currtok.Type != Tok_BraceSquare_Close ) + { + eat( currtok.Type ); + ++ tokens.num; + } + + // untyped_tok.Text.Len = ( (sptr)prevtok.Text.Ptr + prevtok.Text.Len ) - (sptr)untyped_tok.Text.Ptr; + untyped_tok.Text = token_range_to_str(untyped_tok, prevtok); + + Code array_expr = untyped_str( untyped_tok.Text ); + // Code array_expr = untyped_toks( tokens ); // TODO(Ed): Use token slice instead of untyped strings. + // [ + + if ( left == 0 ) + { + log_failure( "Error, unexpected end of array declaration, expected ]\n%SB", parser_to_strbuilder(& ctx->parser, ctx->Allocator_Temp) ); + parser_pop(& ctx->parser); + return InvalidCode; + } + + if ( currtok.Type != Tok_BraceSquare_Close ) + { + log_failure( "%s: Error, expected ] in array declaration, not %S\n%SB", toktype_to_str( currtok.Type ), parser_to_strbuilder(& ctx->parser, ctx->Allocator_Temp) ); + parser_pop(& ctx->parser); + return InvalidCode; + } + + eat( Tok_BraceSquare_Close ); + // [ ] + + // Its a multi-dimensional array + if ( check( Tok_BraceSquare_Open )) + { + Code adjacent_arr_expr = parse_array_decl(ctx); + // [ ][ ]... + + array_expr->Next = adjacent_arr_expr; + } + + parser_pop(& ctx->parser); + return array_expr; + } + + parser_pop(& ctx->parser); + return NullCode; +} + +internal inline +CodeAttributes parse_attributes(Context* ctx) +{ + ParseStackNode scope = NullScope; + parser_push(& ctx->parser, & scope); + + Token start = currtok; + s32 len = 0; + + // There can be more than one attribute. If there is flatten them to a single string. + // TODO(Ed): Support chaining attributes (Use parameter linkage pattern) + while ( left && tok_is_attribute(currtok) ) + { + if ( check( Tok_Attribute_Open ) ) + { + eat( Tok_Attribute_Open ); + // [[ + + while ( left && currtok.Type != Tok_Attribute_Close ) + { + eat( currtok.Type ); + } + // [[ + + eat( Tok_Attribute_Close ); + // [[ ]] + + len = ( ( sptr )prevtok.Text.Ptr + prevtok.Text.Len ) - ( sptr )start.Text.Ptr; + } + else if ( check( Tok_Decl_GNU_Attribute ) ) + { + eat( Tok_Decl_GNU_Attribute ); + eat( Tok_Paren_Open ); + eat( Tok_Paren_Open ); + // __attribute__(( + + while ( left && currtok.Type != Tok_Paren_Close ) + { + eat( currtok.Type ); + } + // __attribute__(( + + eat( Tok_Paren_Close ); + eat( Tok_Paren_Close ); + // __attribute__(( )) + + len = ( ( sptr )prevtok.Text.Ptr + prevtok.Text.Len ) - ( sptr )start.Text.Ptr; + } + else if ( check( Tok_Decl_MSVC_Attribute ) ) + { + eat( Tok_Decl_MSVC_Attribute ); + eat( Tok_Paren_Open ); + // __declspec( + + while ( left && currtok.Type != Tok_Paren_Close ) + { + eat( currtok.Type ); + } + // __declspec( + + eat( Tok_Paren_Close ); + // __declspec( ) + + len = ( ( sptr )prevtok.Text.Ptr + prevtok.Text.Len ) - ( sptr )start.Text.Ptr; + } + else if ( tok_is_attribute(currtok) ) + { + eat( currtok.Type ); + // + + // If its a macro based attribute, this could be a functional macro such as Unreal's UE_DEPRECATED(...) + if ( check( Tok_Paren_Open)) + { + eat( Tok_Paren_Open ); + + s32 level = 0; + while (left && currtok.Type != Tok_Paren_Close && level == 0) + { + if (currtok.Type == Tok_Paren_Open) + ++ level; + if (currtok.Type == Tok_Paren_Close) + --level; + eat(currtok.Type); + } + eat(Tok_Paren_Close); + } + + len = ( ( sptr )prevtok.Text.Ptr + prevtok.Text.Len ) - ( sptr )start.Text.Ptr; + // ( ... ) + } + } + + if ( len > 0 ) + { + Str attribute_txt = { start.Text.Ptr, len }; + parser_pop(& ctx->parser); + + StrBuilder name_stripped = parser_strip_formatting(ctx, attribute_txt, parser_strip_formatting_dont_preserve_newlines ); + + Code result = make_code(); + result->Type = CT_PlatformAttributes; + result->Name = cache_str( strbuilder_to_str(name_stripped) ); + result->Content = result->Name; + // result->Token = + return ( CodeAttributes )result; + } + + parser_pop(& ctx->parser); + return NullCode; +} + +internal +Code parse_class_struct(Context* ctx, TokType which, bool inplace_def) +{ + if ( which != Tok_Decl_Class && which != Tok_Decl_Struct ) { + log_failure( "Error, expected class or struct, not %S\n%SB", toktype_to_str( which ), parser_to_strbuilder(& ctx->parser, ctx->Allocator_Temp) ); + return InvalidCode; + } + + Token name = NullToken; + + AccessSpec access = AccessSpec_Default; + CodeTypename parent = { nullptr }; + CodeBody body = { nullptr }; + CodeAttributes attributes = { nullptr }; + ModuleFlag mflags = ModuleFlag_None; + + Code result = InvalidCode; + + if ( check(Tok_Module_Export) ) { + mflags = ModuleFlag_Export; + eat( Tok_Module_Export ); + } + // + + eat( which ); + // + + attributes = parse_attributes(ctx); + // + + if ( check( Tok_Identifier ) ) { + name = parse_identifier(ctx, nullptr); + ctx->parser.scope->name = name.Text; + } + // + + CodeSpecifiers specifiers = NullCode; + if ( check(Tok_Spec_Final)) { + specifiers = def_specifier(Spec_Final); + eat(Tok_Spec_Final); + } + // + + local_persist + char interface_arr_mem[ kilobytes(4) ] = {0}; + Array(CodeTypename) interfaces = {nullptr}; + + // TODO(Ed) : Make an AST_DerivedType, we'll store any arbitary derived type into there as a linear linked list of them. + if ( check( Tok_Assign_Classifer ) ) + { + eat( Tok_Assign_Classifer ); + // : + + if ( tok_is_access_specifier(currtok) ) { + access = tok_to_access_specifier(currtok); + // : + eat( currtok.Type ); + } + + Token parent_tok = parse_identifier(ctx, nullptr); + parent = def_type( parent_tok.Text ); + // : + + if (check(Tok_Comma)) + { + Arena arena = arena_init_from_memory( interface_arr_mem, kilobytes(4) ); + interfaces = array_init_reserve(CodeTypename, arena_allocator_info(& arena), 4 ); + do + { + eat( Tok_Comma ); + // : , + + if ( tok_is_access_specifier(currtok) ) { + eat(currtok.Type); + } + Token interface_tok = parse_identifier(ctx, nullptr); + + array_append( interfaces, def_type( interface_tok.Text ) ); + // : , ... + } + while ( check(Tok_Comma) ); + } + } + + if ( check( Tok_BraceCurly_Open ) ) { + body = parse_class_struct_body( ctx, which, name ); + } + // : , ... { } + + CodeComment inline_cmt = NullCode; + if ( ! inplace_def ) + { + Token stmt_end = currtok; + eat( Tok_Statement_End ); + // : , ... { }; + + if ( currtok_noskip.Type == Tok_Comment && currtok_noskip.Line == stmt_end.Line ) + inline_cmt = parse_comment(ctx); + // : , ... { }; + } + + s32 num_interfaces = scast(s32, interfaces ? array_num(interfaces) : 0); + + if ( which == Tok_Decl_Class ) + result = cast(Code, def_class( name.Text, def_assign( body, parent, access, attributes, interfaces, num_interfaces, specifiers, mflags ) )); + + else + result = cast(Code, def_struct( name.Text, def_assign( body, (CodeTypename)parent, access, attributes, interfaces, num_interfaces, specifiers, mflags ) )); + + if ( inline_cmt ) + result->InlineCmt = cast(Code, inline_cmt); + + if (interfaces) + array_free(interfaces); + return result; +} + +internal neverinline +CodeBody parse_class_struct_body(Context* ctx, TokType which, Token name) +{ + ParseStackNode scope = NullScope; + parser_push(& ctx->parser, & scope ); + + eat( Tok_BraceCurly_Open ); + // { + + CodeBody + result = (CodeBody) make_code(); + + if ( which == Tok_Decl_Class ) + result->Type = CT_Class_Body; + else + result->Type = CT_Struct_Body; + + while ( left && currtok_noskip.Type != Tok_BraceCurly_Close ) + { + Code member = Code_Invalid; + CodeAttributes attributes = { nullptr }; + CodeSpecifiers specifiers = { nullptr }; + + bool expects_function = false; + + // ctx->parser.Scope->Start = currtok_noskip; + + if ( currtok_noskip.Type == Tok_Preprocess_Hash ) + eat( Tok_Preprocess_Hash ); + + switch ( currtok_noskip.Type ) + { + case Tok_Statement_End: { + // TODO(Ed): Convert this to a general warning procedure + log_fmt("Dangling end statement found %SB\n", tok_to_strbuilder(ctx->Allocator_Temp, currtok_noskip)); + eat( Tok_Statement_End ); + continue; + } + case Tok_NewLine: { + member = fmt_newline; + eat( Tok_NewLine ); + break; + } + case Tok_Comment: { + member = cast(Code, parse_comment(ctx)); + break; + } + case Tok_Access_Public: { + member = access_public; + eat( Tok_Access_Public ); + eat( Tok_Assign_Classifer ); + // public: + break; + } + case Tok_Access_Protected: { + member = access_protected; + eat( Tok_Access_Protected ); + eat( Tok_Assign_Classifer ); + // protected: + break; + } + case Tok_Access_Private: { + member = access_private; + eat( Tok_Access_Private ); + eat( Tok_Assign_Classifer ); + // private: + break; + } + case Tok_Decl_Class: { + member = parse_complicated_definition(ctx, Tok_Decl_Class ); + // class + break; + } + case Tok_Decl_Enum: { + member = parse_complicated_definition(ctx, Tok_Decl_Enum ); + // enum + break; + } + case Tok_Decl_Friend: { + member = cast(Code, parser_parse_friend(ctx)); + // friend + break; + } + case Tok_Decl_Operator: { + member = cast(Code, parser_parse_operator_cast(ctx, NullCode)); + // operator () + break; + } + case Tok_Decl_Struct: { + member = parse_complicated_definition(ctx, Tok_Decl_Struct ); + // struct + break; + } + case Tok_Decl_Template: { + member = cast(Code, parser_parse_template(ctx)); + // template< ... > + break; + } + case Tok_Decl_Typedef: { + member = cast(Code, parser_parse_typedef(ctx)); + // typedef + break; + } + case Tok_Decl_Union: { + member = parse_complicated_definition(ctx, Tok_Decl_Union ); + // union + break; + } + case Tok_Decl_Using: { + member = cast(Code, parser_parse_using(ctx)); + // using + break; + } + case Tok_Operator: + { + //if ( currtok.Text[0] != '~' ) + //{ + // log_failure( "Operator token found in global body but not destructor unary negation\n%s", to_strbuilder(ctx->parser) ); + // return InvalidCode; + //} + + member = cast(Code, parser_parse_destructor(ctx, NullCode)); + // ~() + break; + } + case Tok_Preprocess_Define: { + member = cast(Code, parser_parse_define(ctx)); + // #define + break; + } + case Tok_Preprocess_Include: + { + member = cast(Code, parse_include(ctx)); + // #include + break; + } + + case Tok_Preprocess_If: + case Tok_Preprocess_IfDef: + case Tok_Preprocess_IfNotDef: + case Tok_Preprocess_ElIf: + member = cast(Code, parse_preprocess_cond(ctx)); + // # + break; + + case Tok_Preprocess_Else: { + member = cast(Code, preprocess_else); + eat( Tok_Preprocess_Else ); + // #else + break; + } + case Tok_Preprocess_EndIf: { + member = cast(Code, preprocess_endif); + eat( Tok_Preprocess_EndIf ); + // #endif + break; + } + + case Tok_Preprocess_Macro_Stmt: { + member = cast(Code, parse_simple_preprocess(ctx, Tok_Preprocess_Macro_Stmt )); + break; + } + + // case Tok_Preprocess_Macro: + // // + // macro_found = true; + // goto Preprocess_Macro_Bare_In_Body; + // break; + + case Tok_Preprocess_Pragma: { + member = cast(Code, parse_pragma(ctx)); + // #pragma + break; + } + + case Tok_Preprocess_Unsupported: { + member = cast(Code, parse_simple_preprocess(ctx, Tok_Preprocess_Unsupported)); + // # + break; + } + + case Tok_StaticAssert: { + member = parse_static_assert(ctx); + // static_assert + break; + } + + case Tok_Preprocess_Macro_Expr: + { + if ( ! tok_is_attribute(currtok)) + { + log_failure("Unbounded macro expression residing in class/struct body\n%S", parser_to_strbuilder(& ctx->parser, ctx->Allocator_Temp)); + return InvalidCode; + } + } + //! Fallthrough intended + case Tok_Attribute_Open: + case Tok_Decl_GNU_Attribute: + case Tok_Decl_MSVC_Attribute: + #define Entry( attribute, str ) case attribute: + GEN_DEFINE_ATTRIBUTE_TOKENS + #undef Entry + { + attributes = parse_attributes(ctx); + // + } + //! Fallthrough intended + GEN_PARSER_CLASS_STRUCT_BODY_ALLOWED_MEMBER_TOK_SPECIFIER_CASES: + { + Specifier specs_found[16] = { Spec_NumSpecifiers }; + s32 NumSpecifiers = 0; + + while ( left && tok_is_specifier(currtok) ) + { + Specifier spec = str_to_specifier( currtok.Text ); + + b32 ignore_spec = false; + + switch ( spec ) + { + GEN_PARSER_CLASS_STRUCT_BODY_ALLOWED_MEMBER_SPECIFIER_CASES: + break; + + case Spec_Consteval: + expects_function = true; + break; + + case Spec_Const : + ignore_spec = true; + break; + + default: + log_failure( "Invalid specifier %S for class/struct member\n%S", spec_to_str(spec), strbuilder_to_str( parser_to_strbuilder(& ctx->parser, ctx->Allocator_Temp)) ); + parser_pop(& ctx->parser); + return InvalidCode; + } + + // Every specifier after would be considered part of the type type signature + if (ignore_spec) + break; + + specs_found[NumSpecifiers] = spec; + NumSpecifiers++; + eat( currtok.Type ); + } + + if ( NumSpecifiers ) + { + specifiers = def_specifiers_arr( NumSpecifiers, specs_found ); + } + // + + if ( tok_is_attribute(currtok) ) + { + // Unfortuantely Unreal has code where there is attirbutes before specifiers + CodeAttributes more_attributes = parse_attributes(ctx); + + if ( attributes ) + { + StrBuilder fused = strbuilder_make_reserve( ctx->Allocator_Temp, attributes->Content.Len + more_attributes->Content.Len ); + strbuilder_append_fmt( & fused, "%SB %SB", attributes->Content, more_attributes->Content ); + + Str attrib_name = strbuilder_to_str(fused); + attributes->Name = cache_str( attrib_name ); + attributes->Content = attributes->Name; + // + } + + attributes = more_attributes; + } + + if ( currtok.Type == Tok_Operator && currtok.Text.Ptr[0] == '~' ) + { + member = cast(Code, parser_parse_destructor(ctx, specifiers )); + // ~() + break; + } + + if ( currtok.Type == Tok_Decl_Operator ) + { + member = cast(Code, parser_parse_operator_cast(ctx, specifiers )); + // operator () + break; + } + } + //! Fallthrough intentional + case Tok_Identifier: + case Tok_Preprocess_Macro_Typename: + case Tok_Spec_Const: + case Tok_Type_Unsigned: + case Tok_Type_Signed: + case Tok_Type_Short: + case Tok_Type_Long: + case Tok_Type_bool: + case Tok_Type_char: + case Tok_Type_int: + case Tok_Type_double: + { + if ( nexttok.Type == Tok_Paren_Open && name.Text.Len && currtok.Type == Tok_Identifier ) + { + if ( c_str_compare_len( name.Text.Ptr, currtok.Text.Ptr, name.Text.Len ) == 0 ) + { + member = cast(Code, parser_parse_constructor(ctx, specifiers )); + // () + break; + } + } + + member = parse_operator_function_or_variable(ctx, expects_function, attributes, specifiers ); + // operator ... + // or + // ... + } + break; + + default: + Token untyped_tok = currtok; + while ( left && currtok.Type != Tok_BraceCurly_Close ) + { + untyped_tok.Text.Len = ( (sptr)currtok.Text.Ptr + currtok.Text.Len ) - (sptr)untyped_tok.Text.Ptr; + eat( currtok.Type ); + } + member = untyped_str( untyped_tok.Text ); + // Something unknown + break; + } + + if ( member == Code_Invalid ) + { + log_failure( "Failed to parse member\n%SB", parser_to_strbuilder(& ctx->parser, ctx->Allocator_Temp) ); + parser_pop(& ctx->parser); + return InvalidCode; + } + body_append(result, member ); + } + + eat( Tok_BraceCurly_Close ); + // { } + parser_pop(& ctx->parser); + return result; +} + +internal +CodeComment parse_comment(Context* ctx) +{ + ParseStackNode scope = NullScope; + parser_push(& ctx->parser, & scope ); + + CodeComment + result = (CodeComment) make_code(); + result->Type = CT_Comment; + result->Content = cache_str( currtok_noskip.Text ); + // result->Token = currtok_noskip; + eat( Tok_Comment ); + + parser_pop(& ctx->parser); + return result; +} + +internal +Code parse_complicated_definition(Context* ctx, TokType which) +{ + ParseStackNode scope = NullScope; + parser_push(& ctx->parser, & scope ); + + b32 is_inplace = false; + b32 is_fn_def = false; + + TokenSlice tokens = ctx->parser.tokens; + + s32 idx = ctx->parser.token_id; + s32 level = 0; + b32 had_def = false; + b32 had_paren = false; + for ( ; idx < tokens.num; idx++ ) + { + if ( tokens.ptr[ idx ].Type == Tok_BraceCurly_Open ) + level++; + + if ( tokens.ptr[ idx ].Type == Tok_BraceCurly_Close ) { + level--; + had_def = level == 0; + } + + b32 found_fn_def = had_def && had_paren; + + if ( level == 0 && (tokens.ptr[ idx ].Type == Tok_Statement_End || found_fn_def) ) + break; + } + + is_fn_def = had_def && had_paren; + if (is_fn_def) + { + // Function definition with on return type + Code result = parse_operator_function_or_variable(ctx, false, NullCode, NullCode); + // (...) ... { ... } + parser_pop(& ctx->parser); + return result; + } + + if ( ( idx - 2 ) == ctx->parser.token_id ) + { + // It's a forward declaration only + Code result = parse_forward_or_definition(ctx, which, is_inplace ); + // ; + parser_pop(& ctx->parser); + return result; + } + + Token tok = tokens.ptr[ idx - 1 ]; + if ( tok_is_specifier(tok) && spec_is_trailing( str_to_specifier( tok.Text)) ) + { + // (...) ...; + + s32 spec_idx = idx - 1; + Token spec = tokens.ptr[spec_idx]; + while ( tok_is_specifier(spec) && spec_is_trailing( str_to_specifier( spec.Text)) ) + { + -- spec_idx; + spec = tokens.ptr[spec_idx]; + } + + if ( tokens.ptr[spec_idx].Type == Tok_Paren_Close ) + { + // Forward declaration with trailing specifiers for a procedure + tok = tokens.ptr[spec_idx]; + + Code result = parse_operator_function_or_variable(ctx, false, NullCode, NullCode); + // , or Name> ... + parser_pop(& ctx->parser); + return result; + } + + log_failure( "Unsupported or bad member definition after %S declaration\n%SB", toktype_to_str(which), parser_to_strbuilder(& ctx->parser, ctx->Allocator_Temp) ); + parser_pop(& ctx->parser); + return InvalidCode; + } + if ( tok.Type == Tok_Identifier ) + { + tok = tokens.ptr[ idx - 2 ]; + bool is_indirection = tok.Type == Tok_Ampersand || tok.Type == Tok_Star; + bool ok_to_parse = false; + + if ( tok.Type == Tok_BraceCurly_Close ) + { + // Its an inplace definition + // { ... } ; + ok_to_parse = true; + is_inplace = true; + + CodeTypename type = cast(CodeTypename, parse_forward_or_definition(ctx, which, is_inplace)); + + // Should be a name right after the type. + Token name = parse_identifier(ctx, nullptr); + ctx->parser.scope->name = name.Text; + + CodeVar result = parse_variable_after_name(ctx, ModuleFlag_None, NullCode, NullCode, type, name.Text); + parser_pop(& ctx->parser); + return (Code) result; + } + else if ( tok.Type == Tok_Identifier && tokens.ptr[ idx - 3 ].Type == which ) + { + // Its a variable with type ID using namespace. + // ; + ok_to_parse = true; + } + else if ( tok.Type == Tok_Assign_Classifer + && ( ( tokens.ptr[idx - 5].Type == which && tokens.ptr[idx - 4].Type == Tok_Decl_Class ) + || ( tokens.ptr[idx - 4].Type == which)) + ) + { + // Its a forward declaration of an enum + // : ; + // : ; + ok_to_parse = true; + Code result = cast(Code, parser_parse_enum(ctx, ! parser_inplace_def)); + parser_pop(& ctx->parser); + return result; + } + else if ( is_indirection ) + { + // Its a indirection type with type ID using struct namespace. + // * ; + ok_to_parse = true; + } + + if ( ! ok_to_parse ) + { + log_failure( "Unsupported or bad member definition after %S declaration\n%SB", toktype_to_str(which), parser_to_strbuilder(& ctx->parser, ctx->Allocator_Temp) ); + parser_pop(& ctx->parser); + return InvalidCode; + } + + Code result = parse_operator_function_or_variable(ctx, false, NullCode, NullCode ); + // , or Name> ... + parser_pop(& ctx->parser); + return result; + } + else if ( tok.Type >= Tok_Type_Unsigned && tok.Type <= Tok_Type_MS_W64 ) + { + tok = tokens.ptr[ idx - 2 ]; + + if ( tok.Type != Tok_Assign_Classifer + || ( ( tokens.ptr[idx - 5].Type != which && tokens.ptr[idx - 4].Type != Tok_Decl_Class ) + && ( tokens.ptr[idx - 4].Type != which)) + ) + { + log_failure( "Unsupported or bad member definition after %S declaration\n%SB", toktype_to_str(which), parser_to_strbuilder(& ctx->parser, ctx->Allocator_Temp) ); + parser_pop(& ctx->parser); + return InvalidCode; + } + + // Its a forward declaration of an enum class + // : ; + // : ; + Code result = cast(Code, parser_parse_enum(ctx, ! parser_inplace_def)); + parser_pop(& ctx->parser); + return result; + } + else if ( tok.Type == Tok_BraceCurly_Close ) + { + // Its a definition + Code result = parse_forward_or_definition(ctx, which, is_inplace ); + // { ... }; + parser_pop(& ctx->parser); + return result; + } + else if ( tok.Type == Tok_BraceSquare_Close ) + { + // Its an array definition + Code result = parse_operator_function_or_variable(ctx, false, NullCode, NullCode ); + // [ ... ]; + parser_pop(& ctx->parser); + return result; + } + else + { + log_failure( "Unsupported or bad member definition after %S declaration\n%SB", toktype_to_str(which).Ptr, parser_to_strbuilder(& ctx->parser, ctx->Allocator_Temp) ); + parser_pop(& ctx->parser); + return InvalidCode; + } +} + +internal inline +Code parse_assignment_expression(Context* ctx) +{ + ParseStackNode scope = NullScope; + parser_push(& ctx->parser, & scope ); + + Code expr = { nullptr }; + + eat( Tok_Operator ); + // = + + Token expr_tok = currtok; + + if ( currtok.Type == Tok_Statement_End && currtok.Type != Tok_Comma ) + { + log_failure( "Expected expression after assignment operator\n%SB", parser_to_strbuilder(& ctx->parser, ctx->Allocator_Temp) ); + parser_pop(& ctx->parser); + return InvalidCode; + } + + s32 level = 0; + while ( left && currtok.Type != Tok_Statement_End && (currtok.Type != Tok_Comma || level > 0) ) + { + if (currtok.Type == Tok_BraceCurly_Open ) + level++; + if (currtok.Type == Tok_BraceCurly_Close ) + level--; + if (currtok.Type == Tok_Paren_Open) + level++; + else if (currtok.Type == Tok_Paren_Close) + level--; + + eat( currtok.Type ); + } + + if (left) { + expr_tok.Text.Len = ( ( sptr )currtok.Text.Ptr + currtok.Text.Len ) - ( sptr )expr_tok.Text.Ptr - 1; + } + expr = untyped_str( expr_tok.Text ); + // = + + parser_pop(& ctx->parser); + return expr; +} + +internal inline +Code parse_forward_or_definition(Context* ctx, TokType which, bool is_inplace ) +{ + Code result = InvalidCode; + + switch ( which ) + { + case Tok_Decl_Class: + result = cast(Code, parser_parse_class(ctx, is_inplace )); + return result; + + case Tok_Decl_Enum: + result = cast(Code, parser_parse_enum(ctx, is_inplace )); + return result; + + case Tok_Decl_Struct: + result = cast(Code, parser_parse_struct(ctx, is_inplace )); + return result; + + case Tok_Decl_Union: + result = cast(Code, parser_parse_union(ctx, is_inplace )); + return result; + + default: + log_failure( "Error, wrong token type given to parse_complicated_definition " + "(only supports class, enum, struct, union) \n%SB" + , parser_to_strbuilder(& ctx->parser, ctx->Allocator_Temp) ); + + return InvalidCode; + } +} + +// Function parsing is handled in multiple places because its initial signature is shared with variable parsing +internal inline +CodeFn parse_function_after_name(Context* ctx + , ModuleFlag mflags + , CodeAttributes attributes + , CodeSpecifiers specifiers + , CodeTypename ret_type + , Token name +) +{ + ParseStackNode scope = NullScope; + parser_push(& ctx->parser, & scope ); + + CodeParams params = parse_params(ctx, parser_use_parenthesis); + // ( ) + + Code suffix_specs = NullCode; + + // TODO(Ed), Review old comment : These have to be kept separate from the return type's specifiers. + while ( left && tok_is_specifier(currtok) ) + { + // For Unreal's PURE_VIRTUAL Support + Macro* macro = lookup_macro( currtok.Text ); + if (macro && tok_is_specifier(currtok)) + { + suffix_specs = parse_simple_preprocess(ctx, Tok_Preprocess_Macro_Expr); + continue; + } + if ( specifiers == nullptr ) + { + specifiers = def_specifier( str_to_specifier( currtok.Text) ); + eat( currtok.Type ); + continue; + } + + specifiers_append(specifiers, str_to_specifier( currtok.Text) ); + eat( currtok.Type ); + } + // ( ) + + // Check for trailing specifiers... + CodeAttributes post_rt_attributes = parse_attributes(ctx); + if (post_rt_attributes) + { + if (attributes) + { + StrBuilder merged = strbuilder_fmt_buf(ctx->Allocator_Temp, "%S %S", attributes->Content, post_rt_attributes->Content); + attributes->Content = cache_str(strbuilder_to_str(merged)); + } + else + { + attributes = post_rt_attributes; + } + } + // ( ) + + CodeBody body = NullCode; + CodeComment inline_cmt = NullCode; + if ( check( Tok_BraceCurly_Open ) ) + { + body = cast(CodeBody, parse_function_body(ctx)); + if ( cast(Code, body) == Code_Invalid ) + { + parser_pop(& ctx->parser); + return InvalidCode; + } + // ( ) { } + } + else if ( check(Tok_Operator) && currtok.Text.Ptr[0] == '=' ) + { + eat(Tok_Operator); + if ( specifiers == nullptr ) + { + specifiers = (CodeSpecifiers) make_code(); + specifiers->Type = CT_Specifiers; + } + if ( str_are_equal(nexttok.Text, txt("delete"))) + { + specifiers_append(specifiers, Spec_Delete); + eat(currtok.Type); + // ( ) = delete + } + else + { + specifiers_append(specifiers, Spec_Pure ); + + eat( Tok_Number); + // ( ) = 0 + } + Token stmt_end = currtok; + eat( Tok_Statement_End ); + + if ( currtok_noskip.Type == Tok_Comment && currtok_noskip.Line == stmt_end.Line ) + inline_cmt = parse_comment(ctx); + // ( ) < = 0 or delete > ; + } + + + if (body == nullptr) + { + Token stmt_end = currtok; + eat( Tok_Statement_End ); + // ( ) < = 0 or delete > ; + + if ( currtok_noskip.Type == Tok_Comment && currtok_noskip.Line == stmt_end.Line ) + inline_cmt = parse_comment(ctx); + // ( ) < = 0 or delete > ; + } + + StrBuilder + name_stripped = strbuilder_make_str( ctx->Allocator_Temp, name.Text ); + strbuilder_strip_space(name_stripped); + + CodeFn + result = (CodeFn) make_code(); + result->Name = cache_str( strbuilder_to_str(name_stripped) ); + result->ModuleFlags = mflags; + + if ( body ) + { + switch ( body->Type ) + { + case CT_Function_Body: + case CT_Untyped: + break; + + default: + { + log_failure("Body must be either of Function_Body or Untyped type, %S\n%SB", code_debug_str(body), parser_to_strbuilder(& ctx->parser, ctx->Allocator_Temp)); + parser_pop(& ctx->parser); + return InvalidCode; + } + } + + result->Type = CT_Function; + result->Body = body; + } + else + { + result->Type = CT_Function_Fwd; + } + + if ( attributes ) + result->Attributes = attributes; + + if ( specifiers ) + result->Specs = specifiers; + + if ( suffix_specs ) + result->SuffixSpecs = suffix_specs; + + result->ReturnType = ret_type; + + if ( params ) + result->Params = params; + + if ( inline_cmt ) + result->InlineCmt = inline_cmt; + + parser_pop(& ctx->parser); + return result; +} + +internal +Code parse_function_body(Context* ctx) +{ + ParseStackNode scope = NullScope; + parser_push(& ctx->parser, & scope ); + + eat( Tok_BraceCurly_Open ); + + CodeBody + result = (CodeBody) make_code(); + result->Type = CT_Function_Body; + + // TODO : Support actual parsing of function body + Token start = currtok_noskip; + + s32 level = 0; + while ( left && ( currtok_noskip.Type != Tok_BraceCurly_Close || level > 0 ) ) + { + if ( currtok_noskip.Type == Tok_BraceCurly_Open ) + level++; + + else if ( currtok_noskip.Type == Tok_BraceCurly_Close && level > 0 ) + level--; + + eat( currtok_noskip.Type ); + } + + Token past = prevtok; + + s32 len = ( (sptr)prevtok.Text.Ptr + prevtok.Text.Len ) - (sptr)start.Text.Ptr; + + if ( len > 0 ) + { + Str str = { start.Text.Ptr, len }; + body_append( result, cast(Code, def_execution( str )) ); + } + + eat( Tok_BraceCurly_Close ); + + parser_pop(& ctx->parser); + return cast(Code, result); +} + +internal neverinline +CodeBody parse_global_nspace(Context* ctx, CodeType which) +{ + ParseStackNode scope = NullScope; + parser_push(& ctx->parser, & scope ); + + if ( which != CT_Namespace_Body && which != CT_Global_Body && which != CT_Export_Body && which != CT_Extern_Linkage_Body ) + return InvalidCode; + + if ( which != CT_Global_Body ) + eat( Tok_BraceCurly_Open ); + // { + + CodeBody + result = (CodeBody) make_code(); + result->Type = which; + + while ( left && currtok_noskip.Type != Tok_BraceCurly_Close ) + { + Code member = Code_Invalid; + CodeAttributes attributes = { nullptr }; + CodeSpecifiers specifiers = { nullptr }; + + bool expects_function = false; + + // ctx->parser.Scope->Start = currtok_noskip; + + if ( currtok_noskip.Type == Tok_Preprocess_Hash ) + eat( Tok_Preprocess_Hash ); + + b32 macro_found = false; + + switch ( currtok_noskip.Type ) + { + case Tok_Comma: + { + log_failure("Dangling comma found: %SB\nContext:\n%SB", tok_to_strbuilder(ctx->Allocator_Temp, currtok), parser_to_strbuilder(& ctx->parser, ctx->Allocator_Temp)); + parser_pop( & ctx->parser); + return InvalidCode; + } + break; + case Tok_Statement_End: + { + // TODO(Ed): Convert this to a general warning procedure + log_fmt("Dangling end statement found %SB\n", tok_to_strbuilder(ctx->Allocator_Temp, currtok_noskip)); + eat( Tok_Statement_End ); + continue; + } + case Tok_NewLine: + // Empty lines are auto skipped by Tokens.current() + member = fmt_newline; + eat( Tok_NewLine ); + break; + + case Tok_Comment: + member = cast(Code, parse_comment(ctx)); + break; + + case Tok_Decl_Class: + member = parse_complicated_definition(ctx, Tok_Decl_Class ); + // class + break; + + case Tok_Decl_Enum: + member = parse_complicated_definition(ctx, Tok_Decl_Enum ); + // enum + break; + + case Tok_Decl_Extern_Linkage: + if ( which == CT_Extern_Linkage_Body ) + log_failure( "Nested extern linkage\n%SB", parser_to_strbuilder(& ctx->parser, ctx->Allocator_Temp) ); + + member = cast(Code, parser_parse_extern_link(ctx)); + // extern "..." { ... } + break; + + case Tok_Decl_Namespace: + member = cast(Code, parser_parse_namespace(ctx)); + // namespace { ... } + break; + + case Tok_Decl_Struct: + member = parse_complicated_definition(ctx, Tok_Decl_Struct ); + // struct ... + break; + + case Tok_Decl_Template: + member = cast(Code, parser_parse_template(ctx)); + // template<...> ... + break; + + case Tok_Decl_Typedef: + member = cast(Code, parser_parse_typedef(ctx)); + // typedef ... + break; + + case Tok_Decl_Union: + member = parse_complicated_definition(ctx, Tok_Decl_Union ); + // union ... + break; + + case Tok_Decl_Using: + member = cast(Code, parser_parse_using(ctx)); + // using ... + break; + + case Tok_Preprocess_Define: + member = cast(Code, parser_parse_define(ctx)); + // #define ... + break; + + case Tok_Preprocess_Include: + member = cast(Code, parse_include(ctx)); + // #include ... + break; + + case Tok_Preprocess_If: + case Tok_Preprocess_IfDef: + case Tok_Preprocess_IfNotDef: + case Tok_Preprocess_ElIf: + member = cast(Code, parse_preprocess_cond(ctx)); + // # ... + break; + + case Tok_Preprocess_Else: + member = cast(Code, preprocess_else); + eat( Tok_Preprocess_Else ); + // #else + break; + + case Tok_Preprocess_EndIf: + member = cast(Code, preprocess_endif); + eat( Tok_Preprocess_EndIf ); + // #endif + break; + + case Tok_Preprocess_Macro_Stmt: { + member = cast(Code, parse_simple_preprocess(ctx, Tok_Preprocess_Macro_Stmt )); + break; + } + + case Tok_Preprocess_Pragma: { + member = cast(Code, parse_pragma(ctx)); + // #pragma ... + } + break; + + case Tok_Preprocess_Unsupported: { + member = cast(Code, parse_simple_preprocess(ctx, Tok_Preprocess_Unsupported )); + // # ... + } + break; + + case Tok_StaticAssert: { + member = cast(Code, parse_static_assert(ctx)); + // static_assert( , ... ); + } + break; + + case Tok_Module_Export: { + if ( which == CT_Export_Body ) + log_failure( "Nested export declaration\n%SB", parser_to_strbuilder(& ctx->parser, ctx->Allocator_Temp) ); + + member = cast(Code, parser_parse_export_body(ctx)); + // export { ... } + } + break; + + case Tok_Module_Import: { + // import ... + log_failure( "gen::%s: This function is not implemented" ); + return InvalidCode; + } + break; + + case Tok_Preprocess_Macro_Expr: + { + if ( ! tok_is_attribute(currtok)) + { + log_failure("Unbounded macro expression residing in class/struct body\n%SB", parser_to_strbuilder(& ctx->parser, ctx->Allocator_Temp)); + return InvalidCode; + } + } + //! Fallthrough intentional + case Tok_Attribute_Open: + case Tok_Decl_GNU_Attribute: + case Tok_Decl_MSVC_Attribute: + #define Entry( attribute, str ) case attribute: + GEN_DEFINE_ATTRIBUTE_TOKENS + #undef Entry + { + attributes = parse_attributes(ctx); + // + } + //! Fallthrough intentional + GEN_PARSER_CLASS_GLOBAL_NSPACE_ALLOWED_MEMBER_TOK_SPECIFIER_CASES: + { + Specifier specs_found[16] = { Spec_NumSpecifiers }; + s32 NumSpecifiers = 0; + + while ( left && tok_is_specifier(currtok) ) + { + Specifier spec = str_to_specifier( currtok.Text ); + + bool ignore_spec = false; + + switch ( spec ) + { + GEN_PARSER_CLASS_GLOBAL_NSPACE_ALLOWED_MEMBER_SPECIFIER_CASES: + break; + + case Spec_Consteval: + expects_function = true; + break; + + case Spec_Const: + ignore_spec = true; + break; + + default: + Str spec_str = spec_to_str(spec); + + log_failure( "Invalid specifier %S for variable\n%S", spec_str, strbuilder_to_str( parser_to_strbuilder(& ctx->parser, ctx->Allocator_Temp)) ); + parser_pop(& ctx->parser); + return InvalidCode; + } + + if (ignore_spec) + break; + + specs_found[NumSpecifiers] = spec; + NumSpecifiers++; + eat( currtok.Type ); + } + + if ( NumSpecifiers ) + { + specifiers = def_specifiers_arr( NumSpecifiers, specs_found ); + } + // + } + //! Fallthrough intentional + case Tok_Identifier: + case Tok_Preprocess_Macro_Typename: + case Tok_Spec_Const: + case Tok_Type_Long: + case Tok_Type_Short: + case Tok_Type_Signed: + case Tok_Type_Unsigned: + case Tok_Type_bool: + case Tok_Type_char: + case Tok_Type_double: + case Tok_Type_int: + { + // This s only in a scope so that Preprocess_Macro_Bare_In_Body works without microsoft extension warnings + { + Code constructor_destructor = parse_global_nspace_constructor_destructor( ctx, specifiers ); + // Possible constructor implemented at global file scope. + if ( constructor_destructor ) + { + member = constructor_destructor; + break; + } + + bool found_operator_cast_outside_class_implmentation = false; + s32 idx = ctx->parser.token_id; + + for ( ; idx < ctx->parser.tokens.num; idx++ ) + { + Token tok = ctx->parser.tokens.ptr[ idx ]; + + if ( tok.Type == Tok_Identifier ) + { + idx++; + tok = ctx->parser.tokens.ptr[ idx ]; + if ( tok.Type == Tok_Access_StaticSymbol ) + continue; + + break; + } + + if ( tok.Type == Tok_Decl_Operator ) + found_operator_cast_outside_class_implmentation = true; + + break; + } + + if ( found_operator_cast_outside_class_implmentation ) + { + member = cast(Code, parser_parse_operator_cast(ctx, specifiers )); + // ::operator () { ... } + break; + } + } + + member = parse_operator_function_or_variable(ctx, expects_function, attributes, specifiers ); + // ... + } + } + + if ( member == Code_Invalid ) + { + log_failure( "Failed to parse member\nToken: %SB\nContext:\n%SB", tok_to_strbuilder(ctx->Allocator_Temp, currtok_noskip), parser_to_strbuilder(& ctx->parser, ctx->Allocator_Temp) ); + parser_pop(& ctx->parser); + return InvalidCode; + } + + // log_fmt("Global Body Member: %s", member->debug_str()); + body_append(result, member ); + } + + if ( which != CT_Global_Body ) + eat( Tok_BraceCurly_Close ); + // { } + + parser_pop(& ctx->parser); + return result; +} + +internal inline +Code parse_global_nspace_constructor_destructor(Context* ctx, CodeSpecifiers specifiers) +{ + ParseStackNode scope = NullScope; + parser_push(& ctx->parser, & scope ); + + Code result = { nullptr }; + + /* + To check if a definition is for a constructor we can go straight to the opening parenthesis for its parameters + From There we work backwards to see if we come across two identifiers with the same name between an member access + :: operator, there can be template parameters on the left of the :: so we ignore those. + Whats important is that its back to back. + + This has multiple possible faults. What we parse using this method may not filter out if something has a "return type" + This is bad since technically you could have a namespace nested into another namespace with the same name. + If this awful pattern is done the only way to distiguish with this coarse parse is to know there is no return type defined. + + TODO(Ed): We could fix this by attempting to parse a type, but we would have to have a way to have it soft fail and rollback. + */ + TokenSlice tokens = ctx->parser.tokens; + + s32 idx = ctx->parser.token_id; + Token nav = tokens.ptr[ idx ]; + for ( ; idx < tokens.num; idx++, nav = tokens.ptr[ idx ] ) + { + if ( nav.Text.Ptr[0] == '<' ) + { + // Skip templated expressions as they mey have expressions with the () operators + s32 capture_level = 0; + s32 template_level = 0; + for ( ; idx < tokens.num; idx++, nav = tokens.ptr[idx] ) + { + if (nav.Text.Ptr[ 0 ] == '<') + ++ template_level; + + if (nav.Text.Ptr[ 0 ] == '>') + -- template_level; + if (nav.Type == Tok_Operator && nav.Text.Ptr[1] == '>') + -- template_level; + + if ( nav.Type == Tok_Paren_Open) + { + if (template_level != 0 ) + ++ capture_level; + else + break; + } + + if ( template_level != 0 && nav.Type == Tok_Paren_Close) + -- capture_level; + } + } + + if ( nav.Type == Tok_Paren_Open ) + break; + } + + -- idx; + Token tok_right = tokens.ptr[idx]; + Token tok_left = NullToken; + + if (tok_right.Type != Tok_Identifier) + { + parser_pop(& ctx->parser); + // We're not dealing with a constructor if there is no identifier right before the opening of a parameter's scope. + return result; + } + + -- idx; + tok_left = tokens.ptr[idx]; + // ... + + bool possible_destructor = false; + if ( tok_left.Type == Tok_Operator && tok_left.Text.Ptr[0] == '~') + { + possible_destructor = true; + -- idx; + tok_left = tokens.ptr[idx]; + } + + if ( tok_left.Type != Tok_Access_StaticSymbol ) { + parser_pop(& ctx->parser); + return result; + } + + -- idx; + tok_left = tokens.ptr[idx]; + // ... :: + + // We search toward the left until we find the next valid identifier + s32 capture_level = 0; + s32 template_level = 0; + while ( idx != ctx->parser.token_id ) + { + if (tok_left.Text.Ptr[ 0 ] == '<') + ++ template_level; + + if (tok_left.Text.Ptr[ 0 ] == '>') + -- template_level; + if (tok_left.Type == Tok_Operator && tok_left.Text.Ptr[1] == '>') + -- template_level; + + if ( template_level != 0 && tok_left.Type == Tok_Paren_Open) + ++ capture_level; + + if ( template_level != 0 && tok_left.Type == Tok_Paren_Close) + -- capture_level; + + if ( capture_level == 0 && template_level == 0 && tok_left.Type == Tok_Identifier ) + break; + + -- idx; + tok_left = tokens.ptr[idx]; + } + + bool is_same = c_str_compare_len( tok_right.Text.Ptr, tok_left.Text.Ptr, tok_right.Text.Len ) == 0; + if (tok_left.Type == Tok_Identifier && is_same) + { + // We have found the pattern we desired + if (possible_destructor) + { + // :: ~ ( + result = cast(Code, parser_parse_destructor(ctx, specifiers )); + } + else { + // :: ( + result = cast(Code, parser_parse_constructor(ctx, specifiers )); + } + } + + parser_pop(& ctx->parser); + return result; +} + +// TODO(Ed): I want to eventually change the identifier to its own AST type. +// This would allow distinction of the qualifier for a symbol :: +// This would also allow +internal +Token parse_identifier(Context* ctx, bool* possible_member_function) +{ + ParseStackNode scope = NullScope; + parser_push(& ctx->parser, & scope ); + + Token name = currtok; + ctx->parser.scope->name = name.Text; + + Macro* macro = lookup_macro(currtok.Text); + b32 accept_as_identifier = macro && bitfield_is_set(MacroFlags, macro->Flags, MF_Allow_As_Identifier ); + b32 is_decarator = macro && bitfield_is_set(MacroFlags, macro->Flags, MF_Identifier_Decorator ); + + // Typename can be: '::' + // If that is the case first option will be Tok_Access_StaticSymbol below + if (check(Tok_Identifier) || accept_as_identifier) + { + if (is_decarator) { + Code name_macro = parse_simple_preprocess(ctx, currtok.Type); + name.Text.Len = ( ( sptr )prevtok.Text.Ptr + prevtok.Text.Len ) - ( sptr )name.Text.Ptr; + } + else { + eat(Tok_Identifier); + } + } + // + + parse_template_args(ctx, & name); + //