mirror of
https://github.com/Ed94/metadesk.git
synced 2026-07-02 08:11:49 -07:00
[impl] more passing over the parser; found an odd part and made a test to reveal a bug
This commit is contained in:
@@ -1042,14 +1042,6 @@ main:
|
||||
return: MD_Token,
|
||||
};
|
||||
|
||||
@send(Parsing)
|
||||
@func MD_Parse_TokenMatch: {
|
||||
token: MD_Token,
|
||||
string: MD_String8,
|
||||
flags: MD_StringMatchFlags,
|
||||
return: MD_b32,
|
||||
};
|
||||
|
||||
@send(Parsing)
|
||||
@func MD_Parse_Require: {
|
||||
ctx: *MD_ParseCtx,
|
||||
|
||||
@@ -16,7 +16,7 @@ int main(int argument_count, char **arguments)
|
||||
MD_PushSibling(&first, &last, root);
|
||||
}
|
||||
|
||||
// NOTE(rjf): Put errors on every single node.
|
||||
// NOTE(rjf): Print errors on every single node.
|
||||
for(MD_EachNode(root, first))
|
||||
{
|
||||
for(MD_EachNode(node, root->first_child))
|
||||
|
||||
+1
-3
@@ -687,11 +687,10 @@ MD_FUNCTION void MD_Parse_Bump(MD_ParseCtx *ctx, MD_Token token);
|
||||
MD_FUNCTION void MD_Parse_BumpNext(MD_ParseCtx *ctx);
|
||||
MD_FUNCTION MD_Token MD_Parse_LexNext(MD_ParseCtx *ctx);
|
||||
MD_FUNCTION MD_Token MD_Parse_PeekSkipSome(MD_ParseCtx *ctx, MD_TokenGroups skip_groups);
|
||||
MD_FUNCTION MD_b32 MD_Parse_TokenMatch(MD_Token token, MD_String8 string, MD_MatchFlags flags);
|
||||
MD_FUNCTION MD_b32 MD_Parse_Require(MD_ParseCtx *ctx, MD_String8 string, MD_TokenKind kind);
|
||||
MD_FUNCTION MD_b32 MD_Parse_RequireKind(MD_ParseCtx *ctx, MD_TokenKind kind, MD_Token *out_token);
|
||||
|
||||
MD_FUNCTION void MD_Parse_Set(MD_ParseCtx *ctx, MD_Node *parent,
|
||||
MD_FUNCTION void MD_Parse_Set(MD_ParseCtx *ctx, MD_Node *root,
|
||||
MD_ParseSetFlags flags);
|
||||
|
||||
MD_FUNCTION MD_ParseResult MD_ParseOneNodeFromCtx(MD_ParseCtx *ctx);
|
||||
@@ -713,7 +712,6 @@ MD_FUNCTION MD_Node *MD_MakeNode(MD_NodeKind kind, MD_String8 string,
|
||||
MD_FUNCTION void MD_PushSibling(MD_Node **first, MD_Node **last, MD_Node *new_sibling);
|
||||
MD_FUNCTION void MD_PushChild(MD_Node *parent, MD_Node *new_child);
|
||||
MD_FUNCTION void MD_PushTag(MD_Node *node, MD_Node *tag);
|
||||
MD_FUNCTION void MD_InsertToNamespace(MD_Node *ns, MD_Node *node);
|
||||
MD_FUNCTION MD_Node *MD_PushReference(MD_Node *list, MD_Node *target);
|
||||
|
||||
//~ Introspection Helpers
|
||||
|
||||
+76
-75
@@ -1604,12 +1604,6 @@ MD_Parse_PeekSkipSome(MD_ParseCtx *ctx, MD_TokenGroups skip_groups)
|
||||
return result;
|
||||
}
|
||||
|
||||
MD_FUNCTION_IMPL MD_b32
|
||||
MD_Parse_TokenMatch(MD_Token token, MD_String8 string, MD_MatchFlags flags)
|
||||
{
|
||||
return MD_StringMatch(token.string, string, flags);
|
||||
}
|
||||
|
||||
MD_FUNCTION_IMPL MD_b32
|
||||
MD_Parse_Require(MD_ParseCtx *ctx, MD_String8 string, MD_TokenKind kind)
|
||||
{
|
||||
@@ -1687,42 +1681,50 @@ MD_Parse_Set(MD_ParseCtx *ctx, MD_Node *parent, MD_ParseSetFlags flags)
|
||||
set_opener = '[';
|
||||
}
|
||||
|
||||
// something else
|
||||
MD_b32 brace = 0;
|
||||
MD_b32 paren = 0;
|
||||
MD_b32 bracket = 0;
|
||||
MD_b32 terminate_with_separator = 0;
|
||||
// attach left-symbol flag to parent
|
||||
switch (set_opener){
|
||||
default:
|
||||
{
|
||||
terminate_with_separator = (!!(flags & MD_ParseSetFlag_Implicit));
|
||||
}break;
|
||||
|
||||
case '{':
|
||||
{
|
||||
parent->flags |= MD_NodeFlag_BraceLeft;
|
||||
brace = 1;
|
||||
}break;
|
||||
case '(':
|
||||
{
|
||||
parent->flags |= MD_NodeFlag_ParenLeft;
|
||||
paren = 1;
|
||||
}break;
|
||||
case '[':
|
||||
{
|
||||
parent->flags |= MD_NodeFlag_BracketLeft;
|
||||
bracket = 1;
|
||||
}break;
|
||||
}
|
||||
|
||||
// determine set close rule
|
||||
MD_b32 close_with_brace = 0;
|
||||
MD_b32 close_with_paren = 0;
|
||||
MD_b32 close_with_separator = 0;
|
||||
switch (set_opener){
|
||||
default:
|
||||
{
|
||||
close_with_separator = (!!(flags & MD_ParseSetFlag_Implicit));
|
||||
}break;
|
||||
case '{':
|
||||
{
|
||||
close_with_brace = 1;
|
||||
}break;
|
||||
case '(':
|
||||
case '[':
|
||||
{
|
||||
close_with_paren = 1;
|
||||
}break;
|
||||
}
|
||||
|
||||
// NOTE(rjf): Parse children.
|
||||
if((set_opener != 0) || terminate_with_separator)
|
||||
if((set_opener != 0) || close_with_separator)
|
||||
{
|
||||
MD_u8 *at_before_children = ctx->at;
|
||||
MD_NodeFlags next_child_flags = 0;
|
||||
for(;;)
|
||||
{
|
||||
if(brace)
|
||||
if(close_with_brace)
|
||||
{
|
||||
if(MD_Parse_Require(ctx, MD_S8Lit("}"), MD_TokenKind_Symbol))
|
||||
{
|
||||
@@ -1730,7 +1732,7 @@ MD_Parse_Set(MD_ParseCtx *ctx, MD_Node *parent, MD_ParseSetFlags flags)
|
||||
goto end_parse;
|
||||
}
|
||||
}
|
||||
else if(paren || bracket)
|
||||
else if(close_with_paren)
|
||||
{
|
||||
if((flags & MD_ParseSetFlag_Paren) &&
|
||||
MD_Parse_Require(ctx, MD_S8Lit(")"), MD_TokenKind_Symbol))
|
||||
@@ -1749,9 +1751,9 @@ MD_Parse_Set(MD_ParseCtx *ctx, MD_Node *parent, MD_ParseSetFlags flags)
|
||||
{
|
||||
MD_Token peek = MD_Parse_PeekSkipSome(ctx, MD_TokenGroup_Whitespace | MD_TokenGroup_Comment);
|
||||
if(peek.kind == MD_TokenKind_Symbol &&
|
||||
(MD_Parse_TokenMatch(peek, MD_S8Lit("}"), 0) ||
|
||||
MD_Parse_TokenMatch(peek, MD_S8Lit(")"), 0) ||
|
||||
MD_Parse_TokenMatch(peek, MD_S8Lit("]"), 0)))
|
||||
(MD_StringMatch(peek.string, MD_S8Lit("}"), 0) ||
|
||||
MD_StringMatch(peek.string, MD_S8Lit(")"), 0) ||
|
||||
MD_StringMatch(peek.string, MD_S8Lit("]"), 0)))
|
||||
{
|
||||
goto end_parse;
|
||||
}
|
||||
@@ -1773,8 +1775,37 @@ MD_Parse_Set(MD_ParseCtx *ctx, MD_Node *parent, MD_ParseSetFlags flags)
|
||||
// connect node into graph
|
||||
MD_PushChild(parent, child);
|
||||
|
||||
// check trailing symbol
|
||||
MD_u32 symbol_flags = 0;
|
||||
if (!close_with_separator){
|
||||
if(MD_Parse_Require(ctx, MD_S8Lit(","), MD_TokenKind_Symbol))
|
||||
{
|
||||
symbol_flags = MD_NodeFlag_BeforeComma;
|
||||
}
|
||||
else if(MD_Parse_Require(ctx, MD_S8Lit(";"), MD_TokenKind_Symbol))
|
||||
{
|
||||
symbol_flags = MD_NodeFlag_BeforeSemicolon;
|
||||
}
|
||||
}
|
||||
|
||||
// fill flags from surrounding context
|
||||
child->flags |= next_child_flags;
|
||||
child->flags |= next_child_flags|symbol_flags;
|
||||
|
||||
// setup next_child_flags
|
||||
next_child_flags = MD_NodeFlag_AfterFromBefore(symbol_flags);
|
||||
|
||||
// separator close condition
|
||||
if(close_with_separator)
|
||||
{
|
||||
MD_Token next_token = MD_Parse_PeekSkipSome(ctx, 0);
|
||||
if(next_token.kind == MD_TokenKind_Newline ||
|
||||
(next_token.kind == MD_TokenKind_Symbol &&
|
||||
(MD_StringMatch(next_token.string, MD_S8Lit(","), 0) ||
|
||||
MD_StringMatch(next_token.string, MD_S8Lit(";"), 0))))
|
||||
{
|
||||
goto end_parse;
|
||||
}
|
||||
}
|
||||
|
||||
// TODO(allen): I find it kind of concerning that ParseWholeString and
|
||||
// ParseOneNode are both doing this. I did some refactors in the
|
||||
@@ -1783,38 +1814,6 @@ MD_Parse_Set(MD_ParseCtx *ctx, MD_Node *parent, MD_ParseSetFlags flags)
|
||||
// I also see that these really are slightly different, but it seems
|
||||
// like it should be possible to express the whole-string case as a
|
||||
// special case of this and avoid the duplication.
|
||||
|
||||
// NOTE(rjf): Separators.
|
||||
next_child_flags = 0;
|
||||
{
|
||||
MD_b32 result = 0;
|
||||
if(terminate_with_separator)
|
||||
{
|
||||
MD_Token next_token = MD_Parse_PeekSkipSome(ctx, 0);
|
||||
if(next_token.kind == MD_TokenKind_Newline ||
|
||||
(next_token.kind == MD_TokenKind_Symbol &&
|
||||
(MD_StringMatch(next_token.string, MD_S8Lit(","), 0) ||
|
||||
MD_StringMatch(next_token.string, MD_S8Lit(";"), 0))))
|
||||
{
|
||||
result = 1;
|
||||
}
|
||||
}
|
||||
else if(MD_Parse_Require(ctx, MD_S8Lit(","), MD_TokenKind_Symbol))
|
||||
{
|
||||
child->flags |= MD_NodeFlag_BeforeComma;
|
||||
next_child_flags |= MD_NodeFlag_AfterComma;
|
||||
}
|
||||
else if(MD_Parse_Require(ctx, MD_S8Lit(";"), MD_TokenKind_Symbol))
|
||||
{
|
||||
child->flags |= MD_NodeFlag_BeforeSemicolon;
|
||||
next_child_flags |= MD_NodeFlag_AfterSemicolon;
|
||||
}
|
||||
|
||||
if(result)
|
||||
{
|
||||
goto end_parse;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1888,10 +1887,10 @@ MD_ParseOneNodeFromCtx(MD_ParseCtx *ctx)
|
||||
retry:
|
||||
|
||||
// NOTE(rjf): Unnamed Sets
|
||||
if((MD_Parse_TokenMatch(next_token, MD_S8Lit("("), 0) ||
|
||||
MD_Parse_TokenMatch(next_token, MD_S8Lit("{"), 0) ||
|
||||
MD_Parse_TokenMatch(next_token, MD_S8Lit("["), 0)) &&
|
||||
next_token.kind == MD_TokenKind_Symbol )
|
||||
if(next_token.kind == MD_TokenKind_Symbol &&
|
||||
(MD_StringMatch(next_token.string, MD_S8Lit("("), 0) ||
|
||||
MD_StringMatch(next_token.string, MD_S8Lit("{"), 0) ||
|
||||
MD_StringMatch(next_token.string, MD_S8Lit("["), 0)))
|
||||
{
|
||||
result.node = _MD_MakeNode_Ctx(ctx, MD_NodeKind_Label,
|
||||
MD_S8Lit(""), MD_S8Lit(""),
|
||||
@@ -1947,6 +1946,18 @@ MD_ParseOneNodeFromCtx(MD_ParseCtx *ctx)
|
||||
MD_ParseSetFlag_Bracket |
|
||||
MD_ParseSetFlag_Implicit);
|
||||
|
||||
// TODO(allen): This poking in an error "from afar" thing seems
|
||||
// like a bad sign to me. First it took a bit of digging for me to
|
||||
// understand how this code actually detects the errors it says it
|
||||
// does. Second it's kind of unclear that this should be illegal.
|
||||
// I mean we can do these:
|
||||
// `label: @tag child`
|
||||
// `label: child @tag {children}`
|
||||
// `label: @tag child`
|
||||
// I do get *why* this is an odd thing to allow, but it's weird either way.
|
||||
// Third, looks like this also is throwing out an error in the totally legal case:
|
||||
// `label:{@tag {bar}}`
|
||||
|
||||
// NOTE(mal): Generate error for tags in positions such as "label:@tag {children}"
|
||||
MD_Node *fc = result.node->first_child;
|
||||
if(fc == result.node->last_child && !MD_NodeIsNil(fc->first_tag) && // NOTE(mal): One child. Tagged.
|
||||
@@ -2139,7 +2150,7 @@ MD_ParseWholeString(MD_String8 filename, MD_String8 contents)
|
||||
|
||||
// connect node into graph
|
||||
MD_PushChild(root, child);
|
||||
MD_InsertToNamespace(selected_namespace, child);
|
||||
MD_PushReference(selected_namespace, child);
|
||||
|
||||
// check trailing symbol
|
||||
MD_u32 symbol_flags = 0;
|
||||
@@ -2279,21 +2290,11 @@ MD_PushTag(MD_Node *node, MD_Node *tag)
|
||||
tag->parent = node;
|
||||
}
|
||||
|
||||
MD_FUNCTION_IMPL void
|
||||
MD_InsertToNamespace(MD_Node *ns, MD_Node *node)
|
||||
{
|
||||
MD_Node *ref = MD_MakeNode(MD_NodeKind_Reference, node->string,
|
||||
node->whole_string, node->filename,
|
||||
node->file_contents, node->at);
|
||||
ref->ref_target = node;
|
||||
MD_PushChild(ns, ref);
|
||||
}
|
||||
|
||||
MD_FUNCTION_IMPL MD_Node*
|
||||
MD_PushReference(MD_Node *list, MD_Node *target)
|
||||
{
|
||||
MD_String8 string = MD_S8Lit("`reference node`");
|
||||
MD_Node *n = MD_MakeNode(MD_NodeKind_Reference, string, string, string, 0, 0);
|
||||
MD_Node *n = MD_MakeNode(MD_NodeKind_Reference, target->string, target->whole_string,
|
||||
target->filename, target->file_contents, target->at);
|
||||
n->ref_target = target;
|
||||
MD_PushChild(list, n);
|
||||
return(n);
|
||||
|
||||
@@ -724,5 +724,29 @@ int main(void)
|
||||
}
|
||||
}
|
||||
|
||||
Test("Tagged & Unlabeled")
|
||||
{
|
||||
// TODO(allen): these tests checking for rules that I find fishy; maybe instead
|
||||
// of trying to pass these tests, adjust the rules so we don't have this odd
|
||||
// expected behavior to begin with? Not sure.
|
||||
MD_String8 file_name = MD_S8Lit("raw_text");
|
||||
{
|
||||
MD_ParseResult result = MD_ParseWholeString(file_name, MD_S8Lit("foo:{@tag {bar}}\n"));
|
||||
TestResult(result.first_error == 0);
|
||||
}
|
||||
{
|
||||
MD_ParseResult result = MD_ParseWholeString(file_name, MD_S8Lit("foo:@tag bar\n"));
|
||||
TestResult(result.first_error == 0);
|
||||
}
|
||||
{
|
||||
MD_ParseResult result = MD_ParseWholeString(file_name, MD_S8Lit("foo:bar @tag {bar}\n"));
|
||||
TestResult(result.first_error == 0);
|
||||
}
|
||||
{
|
||||
MD_ParseResult result = MD_ParseWholeString(file_name, MD_S8Lit("foo:@tag {bar}\n"));
|
||||
TestResult(result.first_error != 0);
|
||||
}
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
Reference in New Issue
Block a user