From 2d22fc17d56a8797a9412db1b767fda52f0e7cfe Mon Sep 17 00:00:00 2001
From: Miguel Lechon <miguel.lechon@gmail.com>
Date: Mon, 8 Mar 2021 20:02:27 +0100
Subject: [PATCH] [grammar test] Remove malformed tagging. Give up on comments.

---
 tests/grammar.c  | 16 -----------
 tests/grammar.md | 72 +++++++++++++++++++++---------------------------
 2 files changed, 32 insertions(+), 56 deletions(-)

diff --git a/tests/grammar.c b/tests/grammar.c
index b561cf4..ba6e918 100644
--- a/tests/grammar.c
+++ b/tests/grammar.c
@@ -147,7 +147,6 @@ enum OperationFlags
     OperationFlag_Fill          = 1<<0,
     OperationFlag_Markup        = 1<<1,
     OperationFlag_Tag           = 1<<2,
-    OperationFlag_PreComment    = 1<<3,
 };
 
 static void Extend(MD_String8 *s, char c)
@@ -161,7 +160,6 @@ static void ExpandProduction(MD_Node *production, MD_String8List *out, MD_Node *
 static void ExpandRule(MD_Node *rule, MD_String8List *out_strings, MD_Node *cur_node, OperationFlags op_flags,
                        MD_u32 max_depth, MD_u32 depth)
 {
-    MD_String8 pre_comment = {0};
     for(MD_EachNode(rule_element, rule->first_child))
     {
         MD_b32 expand = 1;
@@ -181,8 +179,6 @@ static void ExpandRule(MD_Node *rule, MD_String8List *out_strings, MD_Node *cur_
         if(expand)
         {
             MD_Node *node_to_tag = 0;
-            MD_Node *node_to_precomment = 0;
-            MD_String8 precomment = {0};
             OperationFlags old_op_flags = op_flags;
             for(MD_EachNode(tag_node, rule_element->first_tag)){
                 if(MD_StringMatch(tag_node->string, MD_S8Lit("child"), 0))
@@ -209,12 +205,6 @@ static void ExpandRule(MD_Node *rule, MD_String8List *out_strings, MD_Node *cur_
                 {
                     op_flags |= OperationFlag_Markup;
                 }
-                else if(MD_StringMatch(tag_node->string, MD_S8Lit("pre_comment"), 0))
-                {
-                    op_flags |= OperationFlag_PreComment;
-                    node_to_precomment = cur_node;
-                    cur_node = NewChild(0); // NOTE(mal): Only used to store the comment, won't be linked as a node
-                }
                 else if(MD_StringMatch(tag_node->string, MD_S8Lit(OPTIONAL_TAG), 0))
                 {
                 }
@@ -274,12 +264,6 @@ static void ExpandRule(MD_Node *rule, MD_String8List *out_strings, MD_Node *cur_
                 cur_node = node_to_tag;
             }
 
-            if(node_to_precomment)
-            {
-                node_to_precomment->comment_before = cur_node->string;
-                cur_node = node_to_precomment;
-            }
-
             op_flags = old_op_flags;
         }
     }
diff --git a/tests/grammar.md b/tests/grammar.md
index 03dcb4a..185f056 100644
--- a/tests/grammar.md
+++ b/tests/grammar.md
@@ -36,6 +36,11 @@ string_literal_items            : string_literal_item [string_literal_items]
 string_literal_item             : ascii_no_backslash_no_quotes | '\'' | '\\' ascii
 symbol_label                    : '~'|'!'|'%'|'^'|'&'|'*'|'+'|'-'|'/'|'|'|'<'|'>'|'$'|'='|'.'|'?'|'$'
 
+/* What follows is a range of annotated grammars that can be used to generate
+ * tests of increasing complexity and completeness to check against MetaDesk.
+ * To run them, uncomment them one by one and run the build/grammar test.
+ */
+
 //// Arbitrarily deep tree, possibly empty
 // file                : [@child set_list]
 // set_list            : '{' [@child set_list] '}' [separator @sibling set_list]
@@ -60,18 +65,12 @@ symbol_label                    : '~'|'!'|'%'|'^'|'&'|'*'|'+'|'-'|'/'|'|'|'<'|'>
 //// Tags
 // file                : [@child set_list]
 // set_list            : scoped_set [separator @sibling set_list] | unscoped_set [unscoped_separator @sibling set_list]
-// scoped_set          : {[tag_list] '{' [@child set_list] '}'}
+// scoped_set          : {[tag_list] untagged_scoped_set}
+// untagged_scoped_set : '{' [@child set_list] '}'
 // unscoped_set        : {[tag_list] @fill 'A' [unscoped_set_tail]}
 // tag_list            : '@' @tag tag ' ' [tag_list]
 // tag                 : @fill 'T'['(' [@child set_list] ')']
-// // unscoped_set_tail   : ':' @child unscoped_set | ' ' @sibling unscoped_set | ':' @child scoped_set | ' ' @sibling scoped_set
-// unscoped_set_tail   : {':' @child unscoped_set | ' ' @sibling unscoped_set | set_tail_hack | ' ' @sibling scoped_set}
-//                       // NOTE(mal): Ideally the set_tail_hack should be captured by the simpler
-//                       //            ':' @child scoped_set
-//                       //            but there's a quirk in the grammar. 
-//                       //            In "A:{}", A has no children but in "A:@T{}", it has one tagged children, 
-//                       //            which means that *the presence of tags* introduces the child
-// set_tail_hack       : ':' [@child tag_list] '{' '}' | ':' @child [tag_list] '{' @child set_list '}'
+// unscoped_set_tail   : {':' @child unscoped_set | ' ' @sibling unscoped_set | ':' untagged_scoped_set | ' ' @sibling scoped_set}
 
 //// Alternative scope markers
 // file                : [@child set_list]
@@ -81,12 +80,7 @@ symbol_label                    : '~'|'!'|'%'|'^'|'&'|'*'|'+'|'-'|'/'|'|'|'<'|'>
 // unscoped_set        : {[tag_list] @fill 'A' [unscoped_set_tail]}
 // tag_list            : '@' @tag tag ' ' [tag_list]
 // tag                 : @fill 'T'['(' [@child set_list] ')']
-// unscoped_set_tail   : {':' @child unscoped_set | ' ' @sibling unscoped_set | set_tail_hack | ' ' @sibling scoped_set}
-// set_tail_hack       : {':' [@child tag_list] '{' '}' | 
-//                        ':' @child [tag_list] '{' @child set_list '}' | 
-//                        ':' [@child tag_list] alt_scope_beg alt_scope_end | 
-//                        ':' @child [tag_list] alt_scope_beg @child set_list alt_scope_end
-//                       }
+// unscoped_set_tail   : {':' @child unscoped_set | ' ' @sibling unscoped_set | ':' untagged_scoped_set | ' ' @sibling scoped_set}
 
 //// General tags and labels
 file                : [@child set_list]
@@ -96,29 +90,27 @@ untagged_scoped_set : '{' [@child set_list] '}' | alt_scope_beg [@child set_list
 unscoped_set        : {[tag_list] @fill label [unscoped_set_tail]}
 tag_list            : '@' @tag tag ' ' [tag_list]
 tag                 : @fill id['(' [@child set_list] ')']
-unscoped_set_tail   : {':' @child unscoped_set | ' ' @sibling unscoped_set | set_tail_hack | ' ' @sibling scoped_set}
-set_tail_hack       : {':' [@child tag_list] '{' '}' | 
-                       ':' @child [tag_list] '{' @child set_list '}' | 
-                       ':' [@child tag_list] alt_scope_beg alt_scope_end | 
-                       ':' @child [tag_list] alt_scope_beg @child set_list alt_scope_end
-                      }
+unscoped_set_tail   : {':' @child unscoped_set | ' ' @sibling unscoped_set | ':' untagged_scoped_set | ' ' @sibling scoped_set}
 
-//// Comments before 
-//// TODO: This needs some work to make sure that "a /* comment_before_b */ b" can't be generated
-////                                     but that "a /* comment_after_a  */ b" can
-// file                : [@child set_list]
-// set_list            : scoped_set [separator @sibling set_list] | unscoped_set [unscoped_separator @sibling set_list]
-// scoped_set          : {[tag_list] [@pre_comment pre_comment] untagged_scoped_set}
-// pre_comment         : '/' '/' [' '] [@fill c_code_content] '\n' | '/' '*' [@fill cpp_code_content] '*' '/'
-// c_code_content      : 'c''o''m''m''e''n''t' // TODO: Arbitrary strings, including C-style comments, as long as /* */ pairs are balanced
-// cpp_code_content    : 'c''o''m''m''e''n''t' // TODO: Arbitrary strings that don't start with space
-// untagged_scoped_set : '{' [@child set_list] '}' | alt_scope_beg [@child set_list] alt_scope_end
-// unscoped_set        : {[tag_list] @fill label [unscoped_set_tail]}
-// tag_list            : '@' @tag tag ' ' [tag_list]
-// tag                 : @fill id['(' [@child set_list] ')']
-// unscoped_set_tail   : {':' @child unscoped_set | ' ' @sibling unscoped_set | set_tail_hack | ' ' @sibling scoped_set}
-// set_tail_hack       : {':' [@child tag_list] '{' '}' | 
-//                        ':' @child [tag_list] '{' @child set_list '}' | 
-//                        ':' [@child tag_list] alt_scope_beg alt_scope_end | 
-//                        ':' @child [tag_list] alt_scope_beg @child set_list alt_scope_end
-//                       }
+/* Comments
+ * Comments around nodes are accessible to the user. Here's how they behave:
+ * - The text inside a comment immediatly following a node is stored as the 
+ *   comment_after member of that node. No newlines can happen between a 
+ *   node and its after_comment.
+ * - The text inside a comment preceding a node is stored as the 
+ *   comment_before of that node _unless_ it is already the comment_after 
+ *   of another node. One newline between the comment and the node is 
+ *   obviously necessary in the case of C++-style comments and it's also
+ *   allowed for C-style comments.
+ * - If the first character inside a C++-style comment is a space, it's
+ *   omitted from the stored string
+ *
+ * The semantically annotated Backus-Naur form that we're using is not a 
+ * good fit to describe the grammar of comments.
+ * To prevent the comment in "a /* comment */ b" from being interpreted as 
+ * a comment_before of "b", instead of what it is (a comment_after of "a"),
+ * we would have to complicate the grammar by introducing several extra 
+ * productions with this specific purpose in mind.
+ * The sensitivity of whitespace in the attachment of comments to nodes is
+ * also cumbersome to express in BNF.
+ */