From e4fbe7a81df6e47c0f7b52513fc76f1ca0e6c350 Mon Sep 17 00:00:00 2001 From: Miguel Lechon Date: Sat, 13 Mar 2021 10:01:08 +0100 Subject: [PATCH] More parsing errors. Invalid hash directives. Invalid tag token. Reserved characters. --- samples/syntax_errors/08.md | 1 + samples/syntax_errors/09.md | 1 + samples/syntax_errors/10.md | 2 + source/md_impl.c | 93 ++++++++++++++++++++----------------- tests/grammar.c | 3 -- 5 files changed, 55 insertions(+), 45 deletions(-) create mode 100644 samples/syntax_errors/08.md create mode 100644 samples/syntax_errors/09.md create mode 100644 samples/syntax_errors/10.md diff --git a/samples/syntax_errors/08.md b/samples/syntax_errors/08.md new file mode 100644 index 0000000..53c5fdf --- /dev/null +++ b/samples/syntax_errors/08.md @@ -0,0 +1 @@ +#include diff --git a/samples/syntax_errors/09.md b/samples/syntax_errors/09.md new file mode 100644 index 0000000..adb837b --- /dev/null +++ b/samples/syntax_errors/09.md @@ -0,0 +1 @@ +@"tag" node diff --git a/samples/syntax_errors/10.md b/samples/syntax_errors/10.md new file mode 100644 index 0000000..e1c81ff --- /dev/null +++ b/samples/syntax_errors/10.md @@ -0,0 +1,2 @@ +#namespace foo +{a,#b} diff --git a/source/md_impl.c b/source/md_impl.c index 30e99e7..09ef4c4 100644 --- a/source/md_impl.c +++ b/source/md_impl.c @@ -114,8 +114,9 @@ MD_CharIsSymbol(MD_u8 c) MD_FUNCTION_IMPL MD_b32 MD_CharIsReservedSymbol(MD_u8 c) { - return (c == '{' || c == '}' || c == '(' || c == ')' || - c == '[' || c == ']' || c == '#'); + return (c == '{' || c == '}' || c == '(' || c == ')' || c == '\\' || + c == '[' || c == ']' || c == '#' || c == ',' || c == ';' || + c == ':' || c == '@'); } MD_FUNCTION_IMPL MD_b32 @@ -1611,40 +1612,8 @@ _MD_ParseOneNode(MD_ParseCtx *ctx) MD_TokenGroups skip_groups = MD_TokenGroup_Whitespace|MD_TokenGroup_Comment; MD_Token next_token = MD_Parse_PeekSkipSome(ctx, skip_groups); - // NOTE(rjf): #-things (just namespaces right now, but can be used for other such - // 'directives' in the future maybe) - if(MD_Parse_Require(ctx, MD_S8Lit("#"), MD_TokenKind_Symbol)) - { - // NOTE(rjf): Namespaces - if(MD_Parse_Require(ctx, MD_S8Lit("namespace"), MD_TokenKind_Identifier)) - { - if(MD_Parse_RequireKind(ctx, MD_TokenKind_Identifier, &token)) - { - MD_NodeTableSlot *existing_namespace_slot = MD_NodeTable_Lookup(&ctx->namespace_table, token.string); - if(existing_namespace_slot == 0) - { - MD_Node *ns = _MD_MakeNodeFromString_Ctx(ctx, MD_NodeKind_Namespace, token.string); - MD_NodeTable_Insert(&ctx->namespace_table, MD_NodeTableCollisionRule_Overwrite, token.string, ns); - } - ctx->selected_namespace = existing_namespace_slot->node; - goto end_parse; - } - else - { - ctx->selected_namespace = 0; - goto end_parse; - } - } - - // NOTE(rjf): Not a valid hash thing - else - { - goto end_parse; - } - } - // NOTE(rjf): Unnamed Sets - else if((MD_Parse_TokenMatch(next_token, MD_S8Lit("("), 0) || + if((MD_Parse_TokenMatch(next_token, MD_S8Lit("("), 0) || MD_Parse_TokenMatch(next_token, MD_S8Lit("{"), 0) || MD_Parse_TokenMatch(next_token, MD_S8Lit("["), 0)) && next_token.kind == MD_TokenKind_Symbol ) @@ -1680,17 +1649,14 @@ _MD_ParseOneNode(MD_ParseCtx *ctx) else if(token.kind == MD_TokenKind_Symbol && token.string.size == 1 && MD_CharIsReservedSymbol(token.string.str[0])) { MD_u8 c = token.string.str[0]; - const char *error_message = 0; if(c == '}' || c == ']' || c == ')') { - error_message = "Unbalanced"; + _MD_Error(ctx, result.node, ctx->at-token.outer_string.size, 1, "Unbalanced \"%c\"", c); } else { - error_message = "Unexpected reserved symbol"; + _MD_Error(ctx, result.node, ctx->at-token.outer_string.size, 0, "Unexpected reserved symbol \"%c\"", c); } - - _MD_Error(ctx, result.node, ctx->at-token.outer_string.size, 1, "%s \"%c\"", error_message, c); } // NOTE(rjf): Children @@ -1915,8 +1881,7 @@ _MD_ParseTagList(MD_ParseCtx *ctx, MD_Node **first_out, MD_Node **last_out) { MD_Parse_Bump(ctx, next_token); - MD_Token name; - _MD_MemoryZero(&name, sizeof(name)); + MD_Token name = MD_ZERO_STRUCT; if(MD_Parse_RequireKind(ctx, MD_TokenKind_Identifier, &name)) { MD_Node *tag = _MD_MakeNodeFromToken_Ctx(ctx, MD_NodeKind_Tag, name); @@ -1929,6 +1894,10 @@ _MD_ParseTagList(MD_ParseCtx *ctx, MD_Node **first_out, MD_Node **last_out) } else { + MD_Token token = MD_Parse_PeekSkipSome(ctx, 0); + _MD_Error(ctx, 0, token.outer_string.str, 0, + "Tag \"%.*s\" is not a proper identifier", MD_StringExpand(token.outer_string)); + // NOTE(mal): There are reasons to consume the non-tag token, but also to leave it. break; } } @@ -1949,6 +1918,12 @@ MD_ParseOneNode(MD_String8 filename, MD_String8 contents) return _MD_ParseOneNode(&ctx); } +// TODO(mal): Make this public once the full story for namespaces is in place +MD_PRIVATE_FUNCTION_IMPL void +_MD_InsertToNamespace(MD_Node *ns, MD_Node *node) +{ +} + MD_FUNCTION MD_ParseResult MD_ParseWholeString(MD_String8 filename, MD_String8 contents) { @@ -1961,6 +1936,39 @@ MD_ParseWholeString(MD_String8 filename, MD_String8 contents) MD_NodeFlags next_child_flags = 0; for(MD_u64 child_idx = 0;; child_idx += 1) { + // NOTE(rjf): #-things (just namespaces right now, but can be used for other such + // 'directives' in the future maybe) + if(MD_Parse_Require(&ctx, MD_S8Lit("#"), MD_TokenKind_Symbol)) + { + // NOTE(rjf): Namespaces + if(MD_Parse_Require(&ctx, MD_S8Lit("namespace"), MD_TokenKind_Identifier)) + { + MD_Token token = MD_ZERO_STRUCT; + if(MD_Parse_RequireKind(&ctx, MD_TokenKind_Identifier, &token)) + { + MD_NodeTableSlot *existing_namespace_slot = MD_NodeTable_Lookup(&ctx.namespace_table, token.string); + if(existing_namespace_slot == 0) + { + MD_Node *ns = _MD_MakeNodeFromString_Ctx(&ctx, MD_NodeKind_Namespace, token.string); + MD_NodeTable_Insert(&ctx.namespace_table, MD_NodeTableCollisionRule_Overwrite, token.string, ns); + existing_namespace_slot = MD_NodeTable_Lookup(&ctx.namespace_table, token.string); + } + ctx.selected_namespace = existing_namespace_slot->node; + } + else + { + ctx.selected_namespace = 0; + } + } + // NOTE(rjf): Not a valid hash thing + else + { + MD_Token token = MD_Parse_PeekSkipSome(&ctx, 0); + _MD_Error(&ctx, 0, ctx.at, 0, "Invalid hash directive \"%.*s\"", + MD_StringExpand(token.outer_string)); + } + } + MD_ParseResult parse = _MD_ParseOneNode(&ctx); MD_Node *child = parse.node; child->flags |= next_child_flags; @@ -1972,6 +1980,7 @@ MD_ParseWholeString(MD_String8 filename, MD_String8 contents) else { _MD_PushNodeToList(&root->first_child, &root->last_child, root, child); + _MD_InsertToNamespace(ctx.selected_namespace, child); } if(MD_Parse_Require(&ctx, MD_S8Lit(","), MD_TokenKind_Symbol)) diff --git a/tests/grammar.c b/tests/grammar.c index de74a29..1556f81 100644 --- a/tests/grammar.c +++ b/tests/grammar.c @@ -405,9 +405,6 @@ struct Test Test *next; }; -// TODO(mal): Use data table instead of smuggling depth as file_contents -#define GET_DEPTH(node) ((MD_u64)((node)->file_contents)) -#define SET_DEPTH(node, v) (node)->file_contents = (MD_u8 *)(v); static void ComputeElementDepth(MD_Node *re) { MD_u64 result = 0;