diff --git a/build.bat b/build.bat index b1fdaac..881c7e5 100644 --- a/build.bat +++ b/build.bat @@ -19,7 +19,6 @@ echo ~~~ Build All Tests ~~~ cl %compile_flags% ..\tests\sanity_tests.c cl %compile_flags% ..\tests\unicode_test.c cl %compile_flags% ..\tests\cpp_build_test.cpp -cl %compile_flags% ..\tests\grammar.c popd rem Stop wasting time getting hung on broken parser diff --git a/build.sh b/build.sh index 81af723..8aaf0fa 100755 --- a/build.sh +++ b/build.sh @@ -29,7 +29,6 @@ echo ~~~ Build All Tests ~~~ $CC $compile_flags ../tests/sanity_tests.c -o sanity_tests $CC $compile_flags ../tests/unicode_test.c -o unicode_test clang++ $compile_flags ../tests/cpp_build_test.cpp -$CC $compile_flags ../tests/grammar.c -o grammar popd echo diff --git a/build_with_clang.bat b/build_with_clang.bat index 12a885f..3a51907 100644 --- a/build_with_clang.bat +++ b/build_with_clang.bat @@ -21,7 +21,6 @@ echo ~~~ Build All Tests ~~~ clang %compile_flags% %src%\tests\sanity_tests.c -o sanity_tests.exe clang %compile_flags% %src%\tests\unicode_test.c -o unicode_test.exe clang++ %compile_flags% %src%\tests\cpp_build_test.cpp -o cpp_build_test.exe -clang %compile_flags% %src%\tests\grammar.c -o grammar.exe popd echo. diff --git a/tests/grammar.c b/tests/grammar.c deleted file mode 100644 index 0070efa..0000000 --- a/tests/grammar.c +++ /dev/null @@ -1,804 +0,0 @@ -#include "md.h" -#include "md.c" -#include - -#define DEBUG_RULES_AFTER_TRANSFORMATIONS 0 -#define DEBUG_PRINT_GENERATED_TESTS 1 - -// NOTE: https://www.pcg-random.org/download.html -typedef struct RandomSeries -{ - MD_u64 state; // NOTE: RNG state. All values are possible. - MD_u64 inc; // Controls which RNG sequence (stream) is selected. Odd at all times. -} RandomSeries; - -static MD_u32 rand_U32(RandomSeries* rng) -{ - MD_u64 oldstate = rng->state; - rng->state = oldstate * 6364136223846793005ULL + rng->inc; - MD_u32 xorshifted = ((oldstate >> 18u) ^ oldstate) >> 27u; - MD_u32 rot = oldstate >> 59u; - return (xorshifted >> rot) | (xorshifted << ((-rot) & 31)); -} - -static RandomSeries rand_seed(MD_u64 initstate, MD_u64 initseq) -{ - RandomSeries result = {0}; - - result.state = 0U; - result.inc = (initseq << 1u) | 1u; // NOTE: inc must be odd - rand_U32(&result); - result.state += initstate; - rand_U32(&result); - - return result; -} - -MD_GLOBAL struct -{ - RandomSeries *random_series; - MD_Map *production_table; -} globals = {0}; - -static void TagSquareBracketSetsAsOptional(MD_Node *node, MD_Node *optional_tag) -{ - if(node->kind == MD_NodeKind_Main && - (node->flags & MD_NodeFlag_HasBracketLeft) && (node->flags & MD_NodeFlag_HasBracketRight)) - { - if(node->string.size) // NOTE(mal): Production. Tag belongs to the first child - { - MD_Assert(!MD_NodeIsNil(node->first_child)); - MD_PushTag(node->first_child, optional_tag); - } - else - { - MD_PushTag(node, optional_tag); - } - } - - for(MD_EachNode(child, node->first_child)) - { - TagSquareBracketSetsAsOptional(child, optional_tag); - } -} - -static MD_Node * NewChildLabel(MD_Node *parent, MD_String8 label) -{ - MD_Node *result = 0; - - result = MD_MakeNode(MD_NodeKind_Main, label, label, 0); - if(parent) - { - MD_PushChild(parent, result); - } - return result; -} - -static MD_Node * NewChild(MD_Node *parent) -{ - MD_Node *result = NewChildLabel(parent, MD_S8Lit("")); - return result; -} - -#define OPTIONAL_TAG "optional" - -#define GET_DEPTH(depth_map, node) ((MD_u64)MD_MapLookup(depth_map, MD_MapKeyPtr(node))->val) -#define SET_DEPTH(depth_map, node, depth) MD_MapOverwrite(depth_map, MD_MapKeyPtr(node), (void *)(depth)) -static void PrintRule(MD_Map *depth_map, MD_Node *rule) -{ - MD_b32 is_literal_char = rule->flags & MD_NodeFlag_StringLiteral; - - MD_b32 optional = MD_NodeHasTag(rule, MD_S8Lit(OPTIONAL_TAG), 0); - - if(optional) - { - printf("["); - } - - for(MD_EachNode(tag, rule->first_tag)) - { - if(!MD_S8Match(tag->string, MD_S8Lit(OPTIONAL_TAG), 0)) - { - printf("@%.*s ", MD_S8VArg(tag->string)); - } - } - - if(is_literal_char) - { - printf("'"); - } - - MD_b32 has_children = !MD_NodeIsNil(rule->first_child); - if(has_children) - { - MD_Assert(rule->string.size == 0); - - for(MD_EachNode(rule_element, rule->first_child)) - { - PrintRule(depth_map, rule_element); - printf(" (%lu) ", (unsigned long)GET_DEPTH(depth_map, rule_element)); - - if(!MD_NodeIsNil(rule_element->next)) - { - printf(" "); - } - } - } - else - { - MD_Assert(rule->string.size > 0); - printf("%.*s", MD_S8VArg(rule->string)); - } - - if(is_literal_char) - { - printf("'"); - } - - if(optional) - { - printf("]"); - } -} - -typedef enum OperationFlags -{ - OperationFlag_Fill = 1<<0, - OperationFlag_Markup = 1<<1, - OperationFlag_Tag = 1<<2, -} OperationFlags; - -static void Extend(MD_String8 *s, char c) -{ - *s = MD_S8Fmt("%.*s%c", MD_S8VArg(*s), c); -} - -static void ExpandProduction(MD_Node *production, MD_String8List *out, MD_Node *cur_node, - OperationFlags op_flags, MD_Map *depth_map, - MD_u32 max_depth, MD_u32 depth); - -static void ExpandRule(MD_Node *rule, MD_String8List *out_strings, MD_Node *cur_node, OperationFlags op_flags, - MD_Map *depth_map, MD_u32 max_depth, MD_u32 depth) -{ - for(MD_EachNode(rule_element, rule->first_child)) - { - MD_b32 expand = 1; - if(MD_NodeHasTag(rule_element, MD_S8Lit(OPTIONAL_TAG), 0)) - { - expand = rand_U32(globals.random_series)%2; - - if(expand) - { - if(GET_DEPTH(depth_map, rule_element) + depth >= max_depth) - { - expand = 0; - } - } - } - - if(expand) - { - MD_Node *node_to_tag = 0; - OperationFlags old_op_flags = op_flags; - for(MD_EachNode(tag_node, rule_element->first_tag)){ - if(MD_S8Match(tag_node->string, MD_S8Lit("child"), 0)) - { - cur_node = NewChild(cur_node); - op_flags &= ~OperationFlag_Tag; // NOTE(mal): Tag parameters are not tags - } - else if(MD_S8Match(tag_node->string, MD_S8Lit("sibling"), 0)) - { - cur_node = NewChild(cur_node->parent); - } - else if(MD_S8Match(tag_node->string, MD_S8Lit("fill"), 0)) - { - op_flags |= OperationFlag_Fill; - } - else if(MD_S8Match(tag_node->string, MD_S8Lit("tag"), 0)) - { - op_flags |= OperationFlag_Tag; - node_to_tag = cur_node; - cur_node = NewChild(0); - cur_node->kind = MD_NodeKind_Tag; - } - else if(MD_S8Match(tag_node->string, MD_S8Lit("markup"), 0)) - { - op_flags |= OperationFlag_Markup; - } - else if(MD_S8Match(tag_node->string, MD_S8Lit(OPTIONAL_TAG), 0)) - { - } - else - { - MD_Assert(!"Not implemented"); - } - } - - MD_b32 has_children = !MD_NodeIsNil(rule_element->first_child); - if(has_children) - { - ExpandRule(rule_element, out_strings, cur_node, op_flags, depth_map, max_depth, depth+1); - } - else - { - if(rule_element->flags & MD_NodeFlag_StringLiteral) // NOTE(mal): Terminal production - { - char c = 0; - if(rule_element->string.size == 2 && rule_element->string.str[0] == '\\') - { - switch(rule_element->string.str[1]){ - case '\\': c = '\\'; break; - case '\'': c = '\''; break; - case '"': c = '\"'; break; - case 'n': c = '\n'; break; - } - } - else - { - c = rule_element->string.str[0]; - } - - MD_String8 character = MD_S8Fmt("%c", c); - MD_S8ListPush(out_strings, character); - - if(op_flags & OperationFlag_Fill) - { - Extend(&cur_node->raw_string, c); - if(!(op_flags & OperationFlag_Markup)) - { - Extend(&cur_node->string, c); - } - } - } - else // NOTE(mal): Non-terminal production - { - MD_Node * production = MD_MapLookup(globals.production_table, MD_MapKeyStr(rule_element->string))->val; - MD_Assert(production); - ExpandProduction(production, out_strings, cur_node, op_flags, depth_map, max_depth, depth+1); - } - } - - if(node_to_tag) - { - MD_PushTag(node_to_tag, cur_node); - cur_node = node_to_tag; - } - - op_flags = old_op_flags; - } - } -} - -static void ExpandProduction(MD_Node *production, MD_String8List *out, MD_Node *cur_node, - OperationFlags op_flags, MD_Map *depth_map, - MD_u32 max_depth, MD_u32 depth) -{ - MD_i64 rule_count = MD_ChildCountFromNode(production); - - MD_Assert(GET_DEPTH(depth_map, production)+depth <= max_depth); - - MD_Node *rule = 0; - do{ - int rule_number = rand_U32(globals.random_series)%rule_count; - rule = MD_ChildFromIndex(production, rule_number); - }while(GET_DEPTH(depth_map, rule)+depth > max_depth); - - ExpandRule(rule, out, cur_node, op_flags, depth_map, max_depth, depth); -} - -static MD_Node * FindNonTerminalProduction(MD_Node *node, MD_Map *visited) -{ - MD_Node *result = 0; - - MD_b32 should_visit = 1; - if(!MD_NodeIsNil(node->first_child) && node->string.size) - { - if(MD_MapLookup(visited, MD_MapKeyStr(node->string))) - { - should_visit = 0; - } - else - { - void *inserted = MD_MapOverwrite(visited, MD_MapKeyStr(node->string), node); - MD_Assert(inserted); - } - } - - if(should_visit) - { - if(MD_NodeIsNil(node->first_child)) - { - if(node->flags & MD_NodeFlag_StringLiteral) - { - } - else - { - MD_MapSlot *slot = MD_MapLookup(globals.production_table, MD_MapKeyStr(node->string)); - if(slot) - { - MD_Node *production = slot->val; - result = FindNonTerminalProduction(production, visited); - } - else - { - result = node; - } - } - } - else - { - for(MD_EachNode(child, node->first_child)) - { - result = FindNonTerminalProduction(child, visited); - if(result) - { - break; - } - } - } - } - - return result; -} - -static MD_b32 EqualTrees(MD_Node *a, MD_Node *b); -static MD_b32 EqualList(MD_Node *a, MD_Node *b) -{ - MD_b32 result = 1; - while(!MD_NodeIsNil(a) || !MD_NodeIsNil(b)) - { - if(!EqualTrees(a, b)) - { - result = 0; - break; - } - a = a->next; - b = b->next; - } - return result; -} -static MD_b32 EqualTrees(MD_Node *a, MD_Node *b) -{ - MD_b32 result = (a->kind == b->kind && - MD_S8Match(a->string, b->string, 0) && - MD_S8Match(a->raw_string, b->raw_string, 0)); - result &= EqualList(a->first_tag, b->first_tag); - result &= EqualList(a->first_child, b->first_child); - result &= MD_S8Match(a->prev_comment, b->prev_comment, 0); - return result; -} - -static MD_String8 EscapeNewlines(MD_String8 s) -{ - MD_String8 result = s; - - MD_u32 newline_count = 0; - for(MD_u8 *c = s.str; c < s.str + s.size; ++c) - { - newline_count += (*c == '\n'); - } - - if(newline_count) - { - result.size = s.size + newline_count; - result.str = calloc(result.size+1, sizeof(MD_u8)); - for(MD_u8 *dest = result.str, *orig = s.str; orig < s.str + s.size; ++orig, ++dest) - { - if(*orig == '\n') - { - *dest++ = '\\'; - *dest = 'n'; - } - else - { - *dest = *orig; - } - } - } - - return result; -} - -typedef struct Test Test; -struct Test -{ - MD_String8 input; - MD_Node *expected_output; - Test *next; -}; - -static void ComputeElementDepth(MD_Map *depth_map, MD_Node *re) -{ - MD_u64 result = 0; - MD_b32 has_children = !MD_NodeIsNil(re->first_child); - if(has_children) - { - for(MD_EachNode(sub_re, re->first_child)) - { - ComputeElementDepth(depth_map, sub_re); - MD_u64 depth = GET_DEPTH(depth_map, sub_re); - if(result < depth) - { - result = depth; - } - } - } - else - { - if(re->flags & MD_NodeFlag_StringLiteral) // NOTE(mal): Terminal production - { - result = 1; - } - else - { - MD_Node * production = MD_MapLookup(globals.production_table, MD_MapKeyStr(re->string))->val; - result = GET_DEPTH(depth_map, production)+1; - } - } - - SET_DEPTH(depth_map, re, result); -} - -// NOTE(mal): Compares first by size then alphabetically -static int StringCompare(MD_String8 a, MD_String8 b) -{ - int result = 0; - if(a.size < b.size) - { - result = -1; - } - else if(a.size > b.size) - { - result = 1; - } - else - { - for(MD_u64 i = 0; i < a.size; ++i) - { - if(a.str[i] < b.str[i]) - { - result = -1; - break; - } - else if(a.str[i] > b.str[i]) - { - result = 1; - break; - } - } - } - return result; -} - -static MD_Node * -FirstBadNodeAtPointer(MD_Node *node) -{ - MD_Node *result = 0; - MD_Node *root = MD_RootFromNode(node); - MD_u8 *node_at = root->raw_string.str + node->offset; - switch(node->kind){ - case MD_NodeKind_File: - { - } break; - case MD_NodeKind_Main: - { - if(node_at != node->raw_string.str) - { - if(node->raw_string.size) - { - result = node; - } - else - { - if(!node_at || (node_at[0] != '(' && node_at[0] != '[' && node_at[0] != '{')) - { - result = node; - } - } - - if(result) - { - goto end; - } - } - } break; - case MD_NodeKind_List: - case MD_NodeKind_Tag: - { - if(node_at != node->raw_string.str) - { - result = node; - goto end; - } - } break; - default: - break; - } - - for(MD_EachNode(child, node->first_child)) - { - result = FirstBadNodeAtPointer(child); - if(result) goto end; - } - for(MD_EachNode(child, node->first_tag)) - { - result = FirstBadNodeAtPointer(child); - if(result) goto end; - } - - end: - - return result; -} - -int main(int argument_count, char **arguments) -{ - MD_Node *grammar = MD_ParseWholeFile(MD_S8Lit("tests/grammar.md")).node; - - // NOTE(mal): In order to get a BNF-like syntax, I feed the MD output through two transformations: - // 1) Tag []-style sets as optional - MD_Node *optional_tag = 0; - { - MD_ParseResult parse_result = MD_ParseOneNode(MD_S8Lit("@"OPTIONAL_TAG" is_a_tag"), 0); - optional_tag = parse_result.node->first_tag; - } - TagSquareBracketSetsAsOptional(grammar, optional_tag); - - // NOTE(mal): 2) Divide rules around '|' operators and transplant them into a new tree structure - // productions > production > rules > rule > rule_element - MD_Node *productions = NewChild(0); - for(MD_EachNode(symbol, grammar->first_child)) - { - MD_Node *production = NewChildLabel(productions, symbol->string); - MD_Node *rule = NewChild(production); - for(MD_Node *rule_element = symbol->first_child; !MD_NodeIsNil(rule_element); ) - { - MD_Node *next = rule_element->next; - - MD_b32 has_children = !MD_NodeIsNil(rule_element->first_child); - if(has_children) - { - MD_PushChild(rule, rule_element); - } - else - { - if(MD_S8Match(rule_element->string, MD_S8Lit("|"), 0) && - !(rule_element->flags & MD_NodeFlag_StringLiteral)) - { - rule = NewChild(production); - } - else - { - MD_PushChild(rule, rule_element); - } - } - - rule_element = next; - } - } - - // NOTE(mal): Build production hash table - MD_Map production_table_ = MD_MapMake(); - globals.production_table = &production_table_; - for(MD_EachNode(production, productions->first_child)) - { - void *inserted = MD_MapOverwrite(globals.production_table, - MD_MapKeyStr(production->string), production); - MD_Assert(inserted); - } - - // NOTE(mal): Check for root production - MD_Node* file_production = 0; - { - MD_MapSlot *file_production_slot = MD_MapLookup(globals.production_table, MD_MapKeyStr(MD_S8Lit("file"))); - if(!file_production_slot) - { - fprintf(stderr, "Error: Grammar file does not specify \"file\" production\n"); - goto error; - } - file_production = file_production_slot->val; - } - - // NOTE(mal): Check that all branches lead to terminal nodes - MD_Map visited_productions = MD_MapMake(); - - for(MD_EachNode(production, productions->first_child)) - { - MD_Node *non_terminal_production = FindNonTerminalProduction(production, &visited_productions); - if(non_terminal_production) - { - fprintf(stderr, "Error: Non-terminal production \"%.*s\"\n", MD_S8VArg(non_terminal_production->string)); - goto error; - } - } - - // NOTE(mal): Check that all productions are reachable - for(MD_EachNode(production, productions->first_child)) - { - MD_MapSlot *slot = MD_MapLookup(&visited_productions, MD_MapKeyStr(production->string)); - if(!slot) - { - fprintf(stderr, "Warning: Unreachable production \"%.*s\"\n", MD_S8VArg(production->string)); - } - } - - // NOTE(mal): Compute depth of productions, rules, rule elements - - // NOTE(mal): Init all MD_Node depths to 0 - MD_Map depth_map_ = MD_MapMake(); - MD_Map *depth_map = &depth_map_; - for(MD_EachNode(production, productions->first_child)) - { - SET_DEPTH(depth_map, production, 0); - for(MD_EachNode(rule, production->first_child)) - { - SET_DEPTH(depth_map, rule, 0); - for(MD_EachNode(rule_element, rule->first_child)) - { - SET_DEPTH(depth_map, rule_element, 0); - } - } - } - - MD_b32 progress = 1; - while(progress) - { - progress = 0; - for(MD_EachNode(production, productions->first_child)) - { - MD_u64 min_rule_depth = 0xffffffffffffffff; - for(MD_EachNode(rule, production->first_child)) - { - MD_u64 max_rule_element_depth = 0; - MD_u64 max_mandatory_rule_element_depth = 0; - for(MD_EachNode(rule_element, rule->first_child)) - { - ComputeElementDepth(depth_map, rule_element); - MD_u64 depth = GET_DEPTH(depth_map, rule_element); - - if(!MD_NodeHasTag(rule_element, MD_S8Lit(OPTIONAL_TAG), 0)) - { - MD_u64 depth = 0; - MD_Assert(MD_NodeIsNil(rule_element->first_child)); - if(!(rule_element->flags & MD_NodeFlag_StringLiteral)) - { - MD_Node * production = MD_MapLookup(globals.production_table, MD_MapKeyStr(rule_element->string))->val; - depth = GET_DEPTH(depth_map, production); - } - depth += 1; - - if(depth > max_mandatory_rule_element_depth) - { - max_mandatory_rule_element_depth = depth; - } - } - - if(depth > max_rule_element_depth) - { - max_rule_element_depth = depth; - } - } - - if(max_mandatory_rule_element_depth > GET_DEPTH(depth_map, rule)) - { - progress = 1; - SET_DEPTH(depth_map, rule, max_mandatory_rule_element_depth); - } - - if(max_mandatory_rule_element_depth < min_rule_depth) - { - min_rule_depth = max_mandatory_rule_element_depth; - } - } - - if(min_rule_depth > GET_DEPTH(depth_map, production)) - { - progress = 1; - SET_DEPTH(depth_map, production, min_rule_depth); - } - } - } - -#if DEBUG_RULES_AFTER_TRANSFORMATIONS - for(MD_EachNode(production, productions->first_child)) - { - printf("%.*s (min %lu): \n", MD_S8VArg(production->string), GET_DEPTH(production)); - for(MD_EachNode(rule, production->first_child)) - { - printf(" "); - PrintRule(rule); - printf(" (max %lu)", GET_DEPTH(rule)); - printf("\n"); - } - } -#endif - - RandomSeries random_series = rand_seed(0, 0); // NOTE(mal): Reproduceable - globals.random_series = &random_series; - MD_Node* file_production_node = MD_MapLookup(globals.production_table, MD_MapKeyStr(MD_S8Lit("file")))->val; - - // NOTE(mal): Generate test_count unique tests, sorted by complexity - MD_u32 test_count = 1000; - MD_u32 max_production_depth = 30; - Test *first_test = calloc(1, sizeof(Test)); - first_test->expected_output = NewChild(0); - first_test->expected_output->kind = MD_NodeKind_File; - for(MD_u32 i_test = 1; i_test < test_count;) - { - Test test = {0}; - test.expected_output = NewChild(0); - test.expected_output->kind = MD_NodeKind_File; - - MD_String8List string_list = {0}; - // NOTE(mal): Generate a random MD file - ExpandProduction(file_production_node, &string_list, test.expected_output, 0, depth_map, max_production_depth, 0); - test.input = MD_S8ListJoin(string_list, 0); - - Test *prev = 0; - for(Test *cur = first_test; cur; cur = cur->next) - { - int comp = StringCompare(test.input, cur->input); - if(comp > 0) - { - prev = cur; - } - else if(comp < 0) - { - break; - } - else - { - prev = 0; // NOTE(mal): Repeat node - } - } - - if(prev) - { - Test *stored_test = calloc(1, sizeof(Test)); - *stored_test = test; - stored_test->next = prev->next; - prev->next = stored_test; - ++i_test; - } - } - - MD_u32 i_test = 0; - for(Test *test = first_test; test; test = test->next) - { - MD_Node *file_node = MD_ParseWholeString(MD_S8Lit(""), test->input).node; - file_node->string = file_node->raw_string = (MD_String8){0}; - -#if DEBUG_PRINT_GENERATED_TESTS - printf("> %.*s <\n", MD_S8VArg(EscapeNewlines(test->input))); -#endif - if(!EqualTrees(file_node, test->expected_output)) - { - printf("\nFailed test %d\n", i_test); - printf("> %.*s <\n", MD_S8VArg(test->input)); - printf("MD:\n"); - MD_DebugOutputTree(stdout, file_node, 0); - printf("Grammar:\n"); - MD_DebugOutputTree(stdout, test->expected_output, 0); printf("\n"); - return -1; - } - - MD_Node *bad_at_node = FirstBadNodeAtPointer(file_node); - if(bad_at_node) - { - printf("\nBad node->at on test %d\n", i_test); - printf("> %.*s <\n", MD_S8VArg(test->input)); - printf("MD:\n"); - MD_DebugOutputTree(stdout, file_node, 0); - printf("offending_node"); - MD_DebugOutputTree(stdout, bad_at_node, 0); - return -1; - } - ++i_test; - } - - printf("Passed %d tests\n", test_count); - - error: - - return 0; -} diff --git a/tests/grammar.md b/tests/grammar.md deleted file mode 100644 index 2a6c463..0000000 --- a/tests/grammar.md +++ /dev/null @@ -1,116 +0,0 @@ -/* MetaDesk grammar with semantic annotations - * - * Each line represents a BNF-esque production: - * symbol : rule_1 | ... | rule_n - * - Pipe signs indicate mutually exclusive alternatives - * - Square quotes denote optional rules - * - Character literals are terminal productions - * - Tags indicate which way the productions attach to the generated tree (@child, @sibling, @tag) - * and miscellaneous semantics (@fill, @markup) - */ - -//// A bunch of common definitions -separator : ' ' | unscoped_separator -unscoped_separator : '\n'| ',' | ';' -alt_scope_beg : '(' | '[' -alt_scope_end : ')' | ']' -id : alpha [alphanumeric] | '_' [alphanumeric] -alphanumeric : alpha [alphanumeric] | digit [alphanumeric] | '_' [alphanumeric] -alpha : lowercase | uppercase -lowercase : 'a'|'b'|'c'|'d'|'e'|'f'|'g'|'h'|'i'|'j'|'k'|'l'|'m'|'n'|'o'|'p'|'q'|'r'|'s'|'t'|'u'|'v'|'w'|'z'|'y'|'z' -uppercase : 'A'|'B'|'C'|'D'|'E'|'F'|'G'|'H'|'I'|'J'|'K'|'L'|'M'|'N'|'O'|'P'|'Q'|'R'|'S'|'T'|'U'|'V'|'W'|'Z'|'Y'|'Z' -digit : '0'|'1'|'2'|'3'|'4'|'5'|'6'|'7'|'8'|'9' -label : id | integer_literal | char_literal | string_literal | symbol_label -integer_literal : { ['-'] natural_literal } -natural_literal : digit [natural_literal] -char_literal : @markup '\'' [char_literal_items] @markup '\'' -char_literal_items : char_literal_item [char_literal_items] -char_literal_item : ascii_no_backslash_no_quotes | '"' | '\\' ascii -ascii : ascii_no_backslash_no_quotes | '\'' | '"' | '`' | '\\' -ascii_no_backslash_no_quotes : digit | alpha | symbol_no_backslash_no_quotes | ' ' -symbol_no_backslash_no_quotes : symbol_no_backslash_no_quotes_1 | symbol_no_backslash_no_quotes_2 -symbol_no_backslash_no_quotes_1 : '!'|'#'|'$'|'%'|'&'|'('|')'|'*'|'+'|','|'-'|'.'|'/'|':' -symbol_no_backslash_no_quotes_2 : ';'|'<'|'='|'>'|'?'|'@'|'['|']'|'^'|'_'|'{'|'|'|'}'|'~' -string_literal : @markup '"' [string_literal_items] @markup '"' | @markup '`' [string_literal_items] @markup '`' -string_literal_items : string_literal_item [string_literal_items] -string_literal_item : ascii_no_backslash_no_quotes | '\'' | '\\' ascii -symbol_label : '~'|'!'|'%'|'^'|'&'|'*'|'+'|'-'|'/'|'|'|'<'|'>'|'$'|'='|'.'|'?'|'_' - -/* What follows is a range of annotated grammars that can be used to generate - * tests of increasing complexity and completeness to check against MetaDesk. - * To run them, uncomment them one by one and run the build/grammar test. - */ - -//// Arbitrarily deep tree, possibly empty -// file : [@child set_list] -// set_list : '{' [@child set_list] '}' [separator @sibling set_list] - -//// Labeled leaves -// file : [@child set_list] -// set_list : set [separator @sibling set_list] -// set : @fill 'A' | '{' [@child set_list] '}' - -//// Labeled internal nodes -// file : [@child set_list] -// set_list : set [separator @sibling set_list] -// set : @fill 'A' | [@fill 'A' ':'] '{' [@child set_list] '}' - -//// Unscoped sets -// file : [@child set_list] -// set_list : scoped_set [separator @sibling set_list] | unscoped_set [unscoped_separator @sibling set_list] -// scoped_set : '{' [@child set_list] '}' -// unscoped_set : @fill 'A' [unscoped_set_tail] -// unscoped_set_tail : ':' @child unscoped_set | ' ' @sibling unscoped_set | ':' scoped_set |' ' @sibling scoped_set - -//// Tags -// file : [@child set_list] -// set_list : scoped_set [separator @sibling set_list] | unscoped_set [unscoped_separator @sibling set_list] -// scoped_set : {[tag_list] untagged_scoped_set} -// untagged_scoped_set : '{' [@child set_list] '}' -// unscoped_set : {[tag_list] @fill 'A' [unscoped_set_tail]} -// tag_list : '@' @tag tag ' ' [tag_list] -// tag : @fill 'T'['(' [@child set_list] ')'] -// unscoped_set_tail : {':' @child unscoped_set | ' ' @sibling unscoped_set | ':' untagged_scoped_set | ' ' @sibling scoped_set} - -//// Alternative scope markers -// file : [@child set_list] -// set_list : scoped_set [separator @sibling set_list] | unscoped_set [unscoped_separator @sibling set_list] -// scoped_set : {[tag_list] untagged_scoped_set} -// untagged_scoped_set : '{' [@child set_list] '}' | alt_scope_beg [@child set_list] alt_scope_end -// unscoped_set : {[tag_list] @fill 'A' [unscoped_set_tail]} -// tag_list : '@' @tag tag ' ' [tag_list] -// tag : @fill 'T'['(' [@child set_list] ')'] -// unscoped_set_tail : {':' @child unscoped_set | ' ' @sibling unscoped_set | ':' untagged_scoped_set | ' ' @sibling scoped_set} - -//// General tags and labels -file : [@child set_list] -set_list : scoped_set [separator @sibling set_list] | unscoped_set [unscoped_separator @sibling set_list] -scoped_set : {[tag_list] untagged_scoped_set} -untagged_scoped_set : '{' [@child set_list] '}' | alt_scope_beg [@child set_list] alt_scope_end -unscoped_set : {[tag_list] @fill label [unscoped_set_tail]} -tag_list : '@' @tag tag ' ' [tag_list] -tag : @fill id['(' [@child set_list] ')'] -unscoped_set_tail : {':' @child unscoped_set | ' ' @sibling unscoped_set | ':' untagged_scoped_set | ' ' @sibling scoped_set} - -/* Comments - * Comments around nodes are accessible to the user. Here's how they behave: - * - The text inside a comment immediatly following a node is stored as the - * next_comment member of that node. No newlines can happen between a - * node and its after_comment. - * - The text inside a comment preceding a node is stored as the - * prev_comment of that node _unless_ it is already the next_comment - * of another node. One newline between the comment and the node is - * obviously necessary in the case of C++-style comments and it's also - * allowed for C-style comments. - * - If the first character inside a C++-style comment is a space, it's - * omitted from the stored string - * - * The semantically annotated Backus-Naur form that we're using is not a - * good fit to describe the grammar of comments. - * To prevent the comment in "a /* comment */ b" from being interpreted as -* a prev_comment of "b", instead of what it is (a next_comment of "a"), -* we would have to complicate the grammar by introducing several extra -* productions with this specific purpose in mind. -* The sensitivity of whitespace in the attachment of comments to nodes is -* also cumbersome to express in BNF. -*/