From e4fbe7a81df6e47c0f7b52513fc76f1ca0e6c350 Mon Sep 17 00:00:00 2001
From: Miguel Lechon <miguel.lechon@gmail.com>
Date: Sat, 13 Mar 2021 10:01:08 +0100
Subject: [PATCH] More parsing errors.

Invalid hash directives.
Invalid tag token.
Reserved characters.
---
 samples/syntax_errors/08.md |  1 +
 samples/syntax_errors/09.md |  1 +
 samples/syntax_errors/10.md |  2 +
 source/md_impl.c            | 93 ++++++++++++++++++++-----------------
 tests/grammar.c             |  3 --
 5 files changed, 55 insertions(+), 45 deletions(-)
 create mode 100644 samples/syntax_errors/08.md
 create mode 100644 samples/syntax_errors/09.md
 create mode 100644 samples/syntax_errors/10.md

diff --git a/samples/syntax_errors/08.md b/samples/syntax_errors/08.md
new file mode 100644
index 0000000..53c5fdf
--- /dev/null
+++ b/samples/syntax_errors/08.md
@@ -0,0 +1 @@
+#include <stdio.h>
diff --git a/samples/syntax_errors/09.md b/samples/syntax_errors/09.md
new file mode 100644
index 0000000..adb837b
--- /dev/null
+++ b/samples/syntax_errors/09.md
@@ -0,0 +1 @@
+@"tag" node
diff --git a/samples/syntax_errors/10.md b/samples/syntax_errors/10.md
new file mode 100644
index 0000000..e1c81ff
--- /dev/null
+++ b/samples/syntax_errors/10.md
@@ -0,0 +1,2 @@
+#namespace foo
+{a,#b}
diff --git a/source/md_impl.c b/source/md_impl.c
index 30e99e7..09ef4c4 100644
--- a/source/md_impl.c
+++ b/source/md_impl.c
@@ -114,8 +114,9 @@ MD_CharIsSymbol(MD_u8 c)
 MD_FUNCTION_IMPL MD_b32
 MD_CharIsReservedSymbol(MD_u8 c)
 {
-    return (c == '{' || c == '}' || c == '(' || c == ')' ||
-            c == '[' || c == ']' || c == '#');
+    return (c == '{' || c == '}' || c == '(' || c == ')' || c == '\\' ||
+            c == '[' || c == ']' || c == '#' || c == ',' || c == ';'  ||
+            c == ':' || c == '@');
 }
 
 MD_FUNCTION_IMPL MD_b32
@@ -1611,40 +1612,8 @@ _MD_ParseOneNode(MD_ParseCtx *ctx)
     MD_TokenGroups skip_groups = MD_TokenGroup_Whitespace|MD_TokenGroup_Comment;
     MD_Token next_token = MD_Parse_PeekSkipSome(ctx, skip_groups);
     
-    // NOTE(rjf): #-things (just namespaces right now, but can be used for other such
-    // 'directives' in the future maybe)
-    if(MD_Parse_Require(ctx, MD_S8Lit("#"), MD_TokenKind_Symbol))
-    {
-        // NOTE(rjf): Namespaces
-        if(MD_Parse_Require(ctx, MD_S8Lit("namespace"), MD_TokenKind_Identifier))
-        {
-            if(MD_Parse_RequireKind(ctx, MD_TokenKind_Identifier, &token))
-            {
-                MD_NodeTableSlot *existing_namespace_slot = MD_NodeTable_Lookup(&ctx->namespace_table, token.string);
-                if(existing_namespace_slot == 0)
-                {
-                    MD_Node *ns = _MD_MakeNodeFromString_Ctx(ctx, MD_NodeKind_Namespace, token.string);
-                    MD_NodeTable_Insert(&ctx->namespace_table, MD_NodeTableCollisionRule_Overwrite, token.string, ns);
-                }
-                ctx->selected_namespace = existing_namespace_slot->node;
-                goto end_parse;
-            }
-            else
-            {
-                ctx->selected_namespace = 0;
-                goto end_parse;
-            }
-        }
-        
-        // NOTE(rjf): Not a valid hash thing
-        else
-        {
-            goto end_parse;
-        }
-    }
-    
     // NOTE(rjf): Unnamed Sets
-    else if((MD_Parse_TokenMatch(next_token, MD_S8Lit("("), 0) ||
+    if((MD_Parse_TokenMatch(next_token, MD_S8Lit("("), 0) ||
              MD_Parse_TokenMatch(next_token, MD_S8Lit("{"), 0) ||
              MD_Parse_TokenMatch(next_token, MD_S8Lit("["), 0)) &&
             next_token.kind == MD_TokenKind_Symbol )
@@ -1680,17 +1649,14 @@ _MD_ParseOneNode(MD_ParseCtx *ctx)
         else if(token.kind == MD_TokenKind_Symbol && token.string.size == 1 && MD_CharIsReservedSymbol(token.string.str[0]))
         {
             MD_u8 c = token.string.str[0];
-            const char *error_message = 0;
             if(c == '}' || c == ']' || c == ')')
             {
-                error_message = "Unbalanced";
+                _MD_Error(ctx, result.node, ctx->at-token.outer_string.size, 1, "Unbalanced \"%c\"", c);
             }
             else
             {
-                error_message = "Unexpected reserved symbol";
+                _MD_Error(ctx, result.node, ctx->at-token.outer_string.size, 0, "Unexpected reserved symbol \"%c\"", c);
             }
-
-            _MD_Error(ctx, result.node, ctx->at-token.outer_string.size, 1, "%s \"%c\"", error_message, c);
         }
 
         // NOTE(rjf): Children
@@ -1915,8 +1881,7 @@ _MD_ParseTagList(MD_ParseCtx *ctx, MD_Node **first_out, MD_Node **last_out)
         {
             MD_Parse_Bump(ctx, next_token);
             
-            MD_Token name;
-            _MD_MemoryZero(&name, sizeof(name));
+            MD_Token name = MD_ZERO_STRUCT;
             if(MD_Parse_RequireKind(ctx, MD_TokenKind_Identifier, &name))
             {
                 MD_Node *tag = _MD_MakeNodeFromToken_Ctx(ctx, MD_NodeKind_Tag, name);
@@ -1929,6 +1894,10 @@ _MD_ParseTagList(MD_ParseCtx *ctx, MD_Node **first_out, MD_Node **last_out)
             }
             else
             {
+                MD_Token token = MD_Parse_PeekSkipSome(ctx, 0);
+                _MD_Error(ctx, 0, token.outer_string.str, 0,
+                          "Tag \"%.*s\" is not a proper identifier", MD_StringExpand(token.outer_string));
+                // NOTE(mal): There are reasons to consume the non-tag token, but also to leave it.
                 break;
             }
         }
@@ -1949,6 +1918,12 @@ MD_ParseOneNode(MD_String8 filename, MD_String8 contents)
     return _MD_ParseOneNode(&ctx);
 }
 
+// TODO(mal): Make this public once the full story for namespaces is in place
+MD_PRIVATE_FUNCTION_IMPL void
+_MD_InsertToNamespace(MD_Node *ns, MD_Node *node)
+{
+}
+
 MD_FUNCTION MD_ParseResult
 MD_ParseWholeString(MD_String8 filename, MD_String8 contents)
 {
@@ -1961,6 +1936,39 @@ MD_ParseWholeString(MD_String8 filename, MD_String8 contents)
         MD_NodeFlags next_child_flags = 0;
         for(MD_u64 child_idx = 0;; child_idx += 1)
         {
+            // NOTE(rjf): #-things (just namespaces right now, but can be used for other such
+            // 'directives' in the future maybe)
+            if(MD_Parse_Require(&ctx, MD_S8Lit("#"), MD_TokenKind_Symbol))
+            {
+                // NOTE(rjf): Namespaces
+                if(MD_Parse_Require(&ctx, MD_S8Lit("namespace"), MD_TokenKind_Identifier))
+                {
+                    MD_Token token = MD_ZERO_STRUCT;
+                    if(MD_Parse_RequireKind(&ctx, MD_TokenKind_Identifier, &token))
+                    {
+                        MD_NodeTableSlot *existing_namespace_slot = MD_NodeTable_Lookup(&ctx.namespace_table, token.string);
+                        if(existing_namespace_slot == 0)
+                        {
+                            MD_Node *ns = _MD_MakeNodeFromString_Ctx(&ctx, MD_NodeKind_Namespace, token.string);
+                            MD_NodeTable_Insert(&ctx.namespace_table, MD_NodeTableCollisionRule_Overwrite, token.string, ns);
+                            existing_namespace_slot = MD_NodeTable_Lookup(&ctx.namespace_table, token.string);
+                        }
+                        ctx.selected_namespace = existing_namespace_slot->node;
+                    }
+                    else
+                    {
+                        ctx.selected_namespace = 0;
+                    }
+                }
+                // NOTE(rjf): Not a valid hash thing
+                else
+                {
+                    MD_Token token = MD_Parse_PeekSkipSome(&ctx, 0);
+                    _MD_Error(&ctx, 0, ctx.at, 0, "Invalid hash directive \"%.*s\"",
+                              MD_StringExpand(token.outer_string));
+                }
+            }
+
             MD_ParseResult parse = _MD_ParseOneNode(&ctx);
             MD_Node *child = parse.node;
             child->flags |= next_child_flags;
@@ -1972,6 +1980,7 @@ MD_ParseWholeString(MD_String8 filename, MD_String8 contents)
             else
             {
                 _MD_PushNodeToList(&root->first_child, &root->last_child, root, child);
+                _MD_InsertToNamespace(ctx.selected_namespace, child);
             }
 
             if(MD_Parse_Require(&ctx, MD_S8Lit(","), MD_TokenKind_Symbol))
diff --git a/tests/grammar.c b/tests/grammar.c
index de74a29..1556f81 100644
--- a/tests/grammar.c
+++ b/tests/grammar.c
@@ -405,9 +405,6 @@ struct Test
     Test *next;
 };
 
-// TODO(mal): Use data table instead of smuggling depth as file_contents
-#define GET_DEPTH(node) ((MD_u64)((node)->file_contents))
-#define SET_DEPTH(node, v) (node)->file_contents = (MD_u8 *)(v);
 static void ComputeElementDepth(MD_Node *re)
 {
     MD_u64 result = 0;