setup lexing to work (pretty well) with JSON)

This commit is contained in:
Allen Webster
2021-06-30 17:46:31 -04:00
parent bb6face7fe
commit a477709af3
2 changed files with 29 additions and 81 deletions
+20 -33
View File
@@ -1272,13 +1272,30 @@ MD_TokenFromString(MD_String8 string)
MD_TokenizerScan(MD_CharIsAlpha(*at) || MD_CharIsDigit(*at) || *at == '_');
}
else if (MD_CharIsDigit(*at) ||
(at + 1 < one_past_last && at[0] == '-' && MD_CharIsDigit(at[1])))
else if (MD_CharIsDigit(*at))
{
token.node_flags |= MD_NodeFlag_Numeric;
token.kind = MD_TokenKind_NumericLiteral;
at += 1;
MD_TokenizerScan(MD_CharIsAlpha(*at) || MD_CharIsDigit(*at) || *at == '.');
for (; at < one_past_last;){
MD_b32 good = 0;
if (*at == 'e' || *at == 'E'){
good = 1;
at += 1;
if (at < one_past_last &&
(*at == '+' || *at == '-')){
at += 1;
}
}
else if (MD_CharIsAlpha(*at) || MD_CharIsDigit(*at) || *at == '.'){
good = 1;
at += 1;
}
if (!good){
break;
}
}
}
else if (MD_CharIsUnreservedSymbol(*at))
@@ -1788,36 +1805,6 @@ MD_ParseOneNode(MD_String8 string, MD_u64 offset)
colon_check_off += colon.outer_string.size;
off = colon_check_off;
//- rjf: prohibit tags here
// TODO(allen): This poking in an error "from afar" thing seems
// like a bad sign to me. First it took a bit of digging for me to
// understand how this code actually detects the errors it says it
// does. Second it's kind of unclear that this should be illegal.
// I mean we can do these:
// `label: @tag child`
// `label: child @tag {children}`
// `label: @tag child`
// I do get *why* this is an odd thing to allow, but it's weird either way.
// Third, looks like this also is throwing out an error in the totally legal case:
// `label:{@tag {bar}}`
for(MD_u64 tag_check_off = off; tag_check_off < string.size;)
{
MD_Token token = MD_TokenFromString(MD_StringSkip(string, tag_check_off));
if(token.kind == MD_TokenKind_Reserved &&
token.outer_string.str[0] == '@')
{
MD_Error *error = MD_MakeTokenError(string, token, MD_MessageKind_Error,
MD_S8Lit("Tags are not allowed between a `:` and node children. Place tags before the name of the node list."));
MD_PushErrorToList(&result.errors, error);
tag_check_off += token.outer_string.size;
off = tag_check_off;
}
else
{
break;
}
}
children_parse = MD_ParseNodeSet(string, off, parsed_node, MD_ParseSetRule_EndOnDelimiter);
off += children_parse.bytes_parsed;
MD_PushErrorListToList(&result.errors, &children_parse.errors);
+9 -48
View File
@@ -491,7 +491,7 @@ int main(void)
{"'", {1}},
{"a:'''\nmulti-line text literal", {3}},
{"/* foo", {1}},
{"label:@tag {1, 2, 3} /* /* unterminated comment */", {7, 22}},
{"label: {1, 2, 3} /* /* unterminated comment */", {18}},
{"{a,,#b,}", {4, 5}},
{"foo""\x80""bar", {4}},
};
@@ -727,13 +727,15 @@ int main(void)
MD_String8 text = MD_S8Lit("a: b: {\nx\n} c");
MD_ParseResult result = MD_ParseWholeString(file_name, text);
TestResult(result.errors.first == 0);
TestResult(result.node->first_child == result.node->last_child);
TestResult(MD_ChildCountFromNode(result.node) == 1);
TestResult(MD_ChildCountFromNode(result.node->first_child) == 2);
}
{
MD_String8 text = MD_S8Lit("a: b: {\nx\n}\nc");
MD_ParseResult result = MD_ParseWholeString(file_name, text);
TestResult(result.errors.first == 0);
TestResult(result.node->first_child != result.node->last_child);
TestResult(MD_ChildCountFromNode(result.node) == 2);
TestResult(MD_ChildCountFromNode(result.node->first_child) == 1);
}
// scoped set is not unscoped
@@ -750,23 +752,19 @@ int main(void)
MD_String8 file_name = MD_S8Lit("raw_text");
{
MD_ParseResult result = MD_ParseWholeString(file_name,
MD_S8Lit("@foo bar"));
MD_ParseResult result = MD_ParseWholeString(file_name, MD_S8Lit("@foo bar"));
TestResult(MD_NodeHasTag(result.node->first_child, MD_S8Lit("foo")));
}
{
MD_ParseResult result = MD_ParseWholeString(file_name,
MD_S8Lit("@+ bar"));
MD_ParseResult result = MD_ParseWholeString(file_name, MD_S8Lit("@+ bar"));
TestResult(MD_NodeHasTag(result.node->first_child, MD_S8Lit("+")));
}
{
MD_ParseResult result = MD_ParseWholeString(file_name,
MD_S8Lit("@'a b c' bar"));
MD_ParseResult result = MD_ParseWholeString(file_name, MD_S8Lit("@'a b c' bar"));
TestResult(MD_NodeHasTag(result.node->first_child, MD_S8Lit("a b c")));
}
{
MD_ParseResult result = MD_ParseWholeString(file_name,
MD_S8Lit("@100 bar"));
MD_ParseResult result = MD_ParseWholeString(file_name, MD_S8Lit("@100 bar"));
TestResult(MD_NodeHasTag(result.node->first_child, MD_S8Lit("100")));
}
}
@@ -808,15 +806,9 @@ int main(void)
MD_String8 test_strings[] = {
MD_S8Lit("0765"),
MD_S8Lit("+0765"), // NOTE(allen): not sure about the "+" cases here
MD_S8Lit("-0765"),
MD_S8Lit("0xABC"),
MD_S8Lit("+0xABC"),
MD_S8Lit("-0xABC"),
MD_S8Lit("0x123"),
MD_S8Lit("0b010"),
MD_S8Lit("+0b010"),
MD_S8Lit("-0b010"),
};
MD_String8 *string = test_strings;
@@ -835,21 +827,13 @@ int main(void)
MD_String8 test_strings[] = {
MD_S8Lit("0"),
MD_S8Lit("1"),
MD_S8Lit("-1"),
MD_S8Lit("+1"), // NOTE(allen): not sure about "+1"
MD_S8Lit("0.5"),
MD_S8Lit("-0.5"),
MD_S8Lit("-1.5"),
MD_S8Lit("1e2"),
MD_S8Lit("-1e2"),
MD_S8Lit("1e+2"),
MD_S8Lit("1e-2"),
MD_S8Lit("-1e+2"),
MD_S8Lit("1.5e2"),
MD_S8Lit("-1.5e2"),
MD_S8Lit("1.5e+2"),
MD_S8Lit("1.5e-2"),
MD_S8Lit("-1.5e+2"),
};
MD_String8 *string = test_strings;
@@ -861,29 +845,6 @@ int main(void)
}
}
Test("Float Lexing Pt 2")
{
MD_String8 file_name = MD_S8Lit("raw_text");
MD_String8 test_strings[] = {
MD_S8Lit("0765.1"),
MD_S8Lit("0765e2"),
MD_S8Lit("0xABC.1"),
MD_S8Lit("0xABCe2"),
MD_S8Lit("0x123.1"),
MD_S8Lit("0x123e2"),
MD_S8Lit("0b010.1"),
MD_S8Lit("0b010e2"),
};
MD_String8 *string = test_strings;
for (int i = 0; i < MD_ArrayCount(test_strings); i += 1, string += 1){
MD_ParseResult result = MD_ParseWholeString(file_name, *string);
TestResult((result.errors.first == 0) &&
(result.node->first_child != result.node->last_child));
}
}
Test("Labels are Not Reserved")
{
MD_String8 file_name = MD_S8Lit("raw_text");