[impl] more passing over the parser; found an odd part and made a test to reveal a bug

This commit is contained in:
Allen Webster
2021-06-06 01:06:36 -07:00
parent dd445676f5
commit bf2d161158
5 changed files with 102 additions and 87 deletions
-8
View File
@@ -1042,14 +1042,6 @@ main:
return: MD_Token,
};
@send(Parsing)
@func MD_Parse_TokenMatch: {
token: MD_Token,
string: MD_String8,
flags: MD_StringMatchFlags,
return: MD_b32,
};
@send(Parsing)
@func MD_Parse_Require: {
ctx: *MD_ParseCtx,
+1 -1
View File
@@ -16,7 +16,7 @@ int main(int argument_count, char **arguments)
MD_PushSibling(&first, &last, root);
}
// NOTE(rjf): Put errors on every single node.
// NOTE(rjf): Print errors on every single node.
for(MD_EachNode(root, first))
{
for(MD_EachNode(node, root->first_child))
+1 -3
View File
@@ -687,11 +687,10 @@ MD_FUNCTION void MD_Parse_Bump(MD_ParseCtx *ctx, MD_Token token);
MD_FUNCTION void MD_Parse_BumpNext(MD_ParseCtx *ctx);
MD_FUNCTION MD_Token MD_Parse_LexNext(MD_ParseCtx *ctx);
MD_FUNCTION MD_Token MD_Parse_PeekSkipSome(MD_ParseCtx *ctx, MD_TokenGroups skip_groups);
MD_FUNCTION MD_b32 MD_Parse_TokenMatch(MD_Token token, MD_String8 string, MD_MatchFlags flags);
MD_FUNCTION MD_b32 MD_Parse_Require(MD_ParseCtx *ctx, MD_String8 string, MD_TokenKind kind);
MD_FUNCTION MD_b32 MD_Parse_RequireKind(MD_ParseCtx *ctx, MD_TokenKind kind, MD_Token *out_token);
MD_FUNCTION void MD_Parse_Set(MD_ParseCtx *ctx, MD_Node *parent,
MD_FUNCTION void MD_Parse_Set(MD_ParseCtx *ctx, MD_Node *root,
MD_ParseSetFlags flags);
MD_FUNCTION MD_ParseResult MD_ParseOneNodeFromCtx(MD_ParseCtx *ctx);
@@ -713,7 +712,6 @@ MD_FUNCTION MD_Node *MD_MakeNode(MD_NodeKind kind, MD_String8 string,
MD_FUNCTION void MD_PushSibling(MD_Node **first, MD_Node **last, MD_Node *new_sibling);
MD_FUNCTION void MD_PushChild(MD_Node *parent, MD_Node *new_child);
MD_FUNCTION void MD_PushTag(MD_Node *node, MD_Node *tag);
MD_FUNCTION void MD_InsertToNamespace(MD_Node *ns, MD_Node *node);
MD_FUNCTION MD_Node *MD_PushReference(MD_Node *list, MD_Node *target);
//~ Introspection Helpers
+76 -75
View File
@@ -1604,12 +1604,6 @@ MD_Parse_PeekSkipSome(MD_ParseCtx *ctx, MD_TokenGroups skip_groups)
return result;
}
MD_FUNCTION_IMPL MD_b32
MD_Parse_TokenMatch(MD_Token token, MD_String8 string, MD_MatchFlags flags)
{
return MD_StringMatch(token.string, string, flags);
}
MD_FUNCTION_IMPL MD_b32
MD_Parse_Require(MD_ParseCtx *ctx, MD_String8 string, MD_TokenKind kind)
{
@@ -1687,42 +1681,50 @@ MD_Parse_Set(MD_ParseCtx *ctx, MD_Node *parent, MD_ParseSetFlags flags)
set_opener = '[';
}
// something else
MD_b32 brace = 0;
MD_b32 paren = 0;
MD_b32 bracket = 0;
MD_b32 terminate_with_separator = 0;
// attach left-symbol flag to parent
switch (set_opener){
default:
{
terminate_with_separator = (!!(flags & MD_ParseSetFlag_Implicit));
}break;
case '{':
{
parent->flags |= MD_NodeFlag_BraceLeft;
brace = 1;
}break;
case '(':
{
parent->flags |= MD_NodeFlag_ParenLeft;
paren = 1;
}break;
case '[':
{
parent->flags |= MD_NodeFlag_BracketLeft;
bracket = 1;
}break;
}
// determine set close rule
MD_b32 close_with_brace = 0;
MD_b32 close_with_paren = 0;
MD_b32 close_with_separator = 0;
switch (set_opener){
default:
{
close_with_separator = (!!(flags & MD_ParseSetFlag_Implicit));
}break;
case '{':
{
close_with_brace = 1;
}break;
case '(':
case '[':
{
close_with_paren = 1;
}break;
}
// NOTE(rjf): Parse children.
if((set_opener != 0) || terminate_with_separator)
if((set_opener != 0) || close_with_separator)
{
MD_u8 *at_before_children = ctx->at;
MD_NodeFlags next_child_flags = 0;
for(;;)
{
if(brace)
if(close_with_brace)
{
if(MD_Parse_Require(ctx, MD_S8Lit("}"), MD_TokenKind_Symbol))
{
@@ -1730,7 +1732,7 @@ MD_Parse_Set(MD_ParseCtx *ctx, MD_Node *parent, MD_ParseSetFlags flags)
goto end_parse;
}
}
else if(paren || bracket)
else if(close_with_paren)
{
if((flags & MD_ParseSetFlag_Paren) &&
MD_Parse_Require(ctx, MD_S8Lit(")"), MD_TokenKind_Symbol))
@@ -1749,9 +1751,9 @@ MD_Parse_Set(MD_ParseCtx *ctx, MD_Node *parent, MD_ParseSetFlags flags)
{
MD_Token peek = MD_Parse_PeekSkipSome(ctx, MD_TokenGroup_Whitespace | MD_TokenGroup_Comment);
if(peek.kind == MD_TokenKind_Symbol &&
(MD_Parse_TokenMatch(peek, MD_S8Lit("}"), 0) ||
MD_Parse_TokenMatch(peek, MD_S8Lit(")"), 0) ||
MD_Parse_TokenMatch(peek, MD_S8Lit("]"), 0)))
(MD_StringMatch(peek.string, MD_S8Lit("}"), 0) ||
MD_StringMatch(peek.string, MD_S8Lit(")"), 0) ||
MD_StringMatch(peek.string, MD_S8Lit("]"), 0)))
{
goto end_parse;
}
@@ -1773,8 +1775,37 @@ MD_Parse_Set(MD_ParseCtx *ctx, MD_Node *parent, MD_ParseSetFlags flags)
// connect node into graph
MD_PushChild(parent, child);
// check trailing symbol
MD_u32 symbol_flags = 0;
if (!close_with_separator){
if(MD_Parse_Require(ctx, MD_S8Lit(","), MD_TokenKind_Symbol))
{
symbol_flags = MD_NodeFlag_BeforeComma;
}
else if(MD_Parse_Require(ctx, MD_S8Lit(";"), MD_TokenKind_Symbol))
{
symbol_flags = MD_NodeFlag_BeforeSemicolon;
}
}
// fill flags from surrounding context
child->flags |= next_child_flags;
child->flags |= next_child_flags|symbol_flags;
// setup next_child_flags
next_child_flags = MD_NodeFlag_AfterFromBefore(symbol_flags);
// separator close condition
if(close_with_separator)
{
MD_Token next_token = MD_Parse_PeekSkipSome(ctx, 0);
if(next_token.kind == MD_TokenKind_Newline ||
(next_token.kind == MD_TokenKind_Symbol &&
(MD_StringMatch(next_token.string, MD_S8Lit(","), 0) ||
MD_StringMatch(next_token.string, MD_S8Lit(";"), 0))))
{
goto end_parse;
}
}
// TODO(allen): I find it kind of concerning that ParseWholeString and
// ParseOneNode are both doing this. I did some refactors in the
@@ -1783,38 +1814,6 @@ MD_Parse_Set(MD_ParseCtx *ctx, MD_Node *parent, MD_ParseSetFlags flags)
// I also see that these really are slightly different, but it seems
// like it should be possible to express the whole-string case as a
// special case of this and avoid the duplication.
// NOTE(rjf): Separators.
next_child_flags = 0;
{
MD_b32 result = 0;
if(terminate_with_separator)
{
MD_Token next_token = MD_Parse_PeekSkipSome(ctx, 0);
if(next_token.kind == MD_TokenKind_Newline ||
(next_token.kind == MD_TokenKind_Symbol &&
(MD_StringMatch(next_token.string, MD_S8Lit(","), 0) ||
MD_StringMatch(next_token.string, MD_S8Lit(";"), 0))))
{
result = 1;
}
}
else if(MD_Parse_Require(ctx, MD_S8Lit(","), MD_TokenKind_Symbol))
{
child->flags |= MD_NodeFlag_BeforeComma;
next_child_flags |= MD_NodeFlag_AfterComma;
}
else if(MD_Parse_Require(ctx, MD_S8Lit(";"), MD_TokenKind_Symbol))
{
child->flags |= MD_NodeFlag_BeforeSemicolon;
next_child_flags |= MD_NodeFlag_AfterSemicolon;
}
if(result)
{
goto end_parse;
}
}
}
}
@@ -1888,10 +1887,10 @@ MD_ParseOneNodeFromCtx(MD_ParseCtx *ctx)
retry:
// NOTE(rjf): Unnamed Sets
if((MD_Parse_TokenMatch(next_token, MD_S8Lit("("), 0) ||
MD_Parse_TokenMatch(next_token, MD_S8Lit("{"), 0) ||
MD_Parse_TokenMatch(next_token, MD_S8Lit("["), 0)) &&
next_token.kind == MD_TokenKind_Symbol )
if(next_token.kind == MD_TokenKind_Symbol &&
(MD_StringMatch(next_token.string, MD_S8Lit("("), 0) ||
MD_StringMatch(next_token.string, MD_S8Lit("{"), 0) ||
MD_StringMatch(next_token.string, MD_S8Lit("["), 0)))
{
result.node = _MD_MakeNode_Ctx(ctx, MD_NodeKind_Label,
MD_S8Lit(""), MD_S8Lit(""),
@@ -1947,6 +1946,18 @@ MD_ParseOneNodeFromCtx(MD_ParseCtx *ctx)
MD_ParseSetFlag_Bracket |
MD_ParseSetFlag_Implicit);
// TODO(allen): This poking in an error "from afar" thing seems
// like a bad sign to me. First it took a bit of digging for me to
// understand how this code actually detects the errors it says it
// does. Second it's kind of unclear that this should be illegal.
// I mean we can do these:
// `label: @tag child`
// `label: child @tag {children}`
// `label: @tag child`
// I do get *why* this is an odd thing to allow, but it's weird either way.
// Third, looks like this also is throwing out an error in the totally legal case:
// `label:{@tag {bar}}`
// NOTE(mal): Generate error for tags in positions such as "label:@tag {children}"
MD_Node *fc = result.node->first_child;
if(fc == result.node->last_child && !MD_NodeIsNil(fc->first_tag) && // NOTE(mal): One child. Tagged.
@@ -2139,7 +2150,7 @@ MD_ParseWholeString(MD_String8 filename, MD_String8 contents)
// connect node into graph
MD_PushChild(root, child);
MD_InsertToNamespace(selected_namespace, child);
MD_PushReference(selected_namespace, child);
// check trailing symbol
MD_u32 symbol_flags = 0;
@@ -2279,21 +2290,11 @@ MD_PushTag(MD_Node *node, MD_Node *tag)
tag->parent = node;
}
MD_FUNCTION_IMPL void
MD_InsertToNamespace(MD_Node *ns, MD_Node *node)
{
MD_Node *ref = MD_MakeNode(MD_NodeKind_Reference, node->string,
node->whole_string, node->filename,
node->file_contents, node->at);
ref->ref_target = node;
MD_PushChild(ns, ref);
}
MD_FUNCTION_IMPL MD_Node*
MD_PushReference(MD_Node *list, MD_Node *target)
{
MD_String8 string = MD_S8Lit("`reference node`");
MD_Node *n = MD_MakeNode(MD_NodeKind_Reference, string, string, string, 0, 0);
MD_Node *n = MD_MakeNode(MD_NodeKind_Reference, target->string, target->whole_string,
target->filename, target->file_contents, target->at);
n->ref_target = target;
MD_PushChild(list, n);
return(n);
+24
View File
@@ -724,5 +724,29 @@ int main(void)
}
}
Test("Tagged & Unlabeled")
{
// TODO(allen): these tests checking for rules that I find fishy; maybe instead
// of trying to pass these tests, adjust the rules so we don't have this odd
// expected behavior to begin with? Not sure.
MD_String8 file_name = MD_S8Lit("raw_text");
{
MD_ParseResult result = MD_ParseWholeString(file_name, MD_S8Lit("foo:{@tag {bar}}\n"));
TestResult(result.first_error == 0);
}
{
MD_ParseResult result = MD_ParseWholeString(file_name, MD_S8Lit("foo:@tag bar\n"));
TestResult(result.first_error == 0);
}
{
MD_ParseResult result = MD_ParseWholeString(file_name, MD_S8Lit("foo:bar @tag {bar}\n"));
TestResult(result.first_error == 0);
}
{
MD_ParseResult result = MD_ParseWholeString(file_name, MD_S8Lit("foo:@tag {bar}\n"));
TestResult(result.first_error != 0);
}
}
return 0;
}