tighter implementation of group mechanism

This commit is contained in:
Allen Webster
2021-06-30 15:28:27 -04:00
parent e0ed76c62f
commit 5ef77634ef
2 changed files with 54 additions and 87 deletions
+30 -30
View File
@@ -416,35 +416,37 @@ struct MD_Map
//~ Tokens
typedef enum MD_TokenKind
typedef MD_u32 MD_TokenKind;
enum
{
// NOTE(rjf): *MUST* stay up-to-date with MD_TokenGroupsFromTokenKind.
MD_TokenKind_Nil,
MD_TokenKind_Identifier,
MD_TokenKind_NumericLiteral,
MD_TokenKind_StringLiteral,
MD_TokenKind_Symbol,
MD_TokenKind_Comment,
MD_TokenKind_Whitespace,
MD_TokenKind_Newline,
// Character outside currently supported encodings
MD_TokenKind_BadCharacter,
MD_TokenKind_COUNT,
}
MD_TokenKind;
MD_TokenKind_Identifier = (1<<0),
MD_TokenKind_NumericLiteral = (1<<1),
MD_TokenKind_StringLiteral = (1<<2),
MD_TokenKind_Symbol = (1<<3),
MD_TokenKind_Reserved = (1<<4),
MD_TokenKind_Comment = (1<<5),
MD_TokenKind_Whitespace = (1<<6),
MD_TokenKind_Newline = (1<<7),
MD_TokenKind_BrokenComment = (1<<8),
MD_TokenKind_BrokenString = (1<<9),
MD_TokenKind_BadCharacter = (1<<10),
};
typedef MD_u32 MD_TokenGroups;
enum{
MD_TokenGroup_Comment = (1 << 0),
MD_TokenGroup_Whitespace = (1 << 1),
MD_TokenGroup_Regular = (1 << 2),
MD_TokenGroup_LabelString = (1 << 3),
enum
{
MD_TokenGroup_Comment = MD_TokenKind_Comment,
MD_TokenGroup_Whitespace = (MD_TokenKind_Whitespace|
MD_TokenKind_Newline),
MD_TokenGroup_Irregular = (MD_TokenGroup_Comment|
MD_TokenGroup_Whitespace),
MD_TokenGroup_Regular = ~MD_TokenGroup_Irregular,
MD_TokenGroup_Label = (MD_TokenKind_Identifier|
MD_TokenKind_NumericLiteral|
MD_TokenKind_StringLiteral|
MD_TokenKind_Symbol),
MD_TokenGroup_Error = (MD_TokenKind_BrokenComment|
MD_TokenKind_BrokenString|
MD_TokenKind_BadCharacter),
};
typedef struct MD_Token MD_Token;
@@ -725,10 +727,8 @@ MD_FUNCTION MD_MapSlot* MD_MapOverwrite(MD_Map *map, MD_MapKey key, void *val);
//~ Parsing
MD_FUNCTION MD_TokenGroups MD_TokenGroupsFromTokenKind(MD_TokenKind kind);
MD_FUNCTION MD_Token MD_TokenFromString(MD_String8 string);
MD_FUNCTION MD_u64 MD_BytesFromStringTokenGroupRun(MD_String8 string, MD_TokenGroups groups);
MD_FUNCTION MD_u64 MD_LexAdvanceFromSkips(MD_String8 string, MD_TokenKind skip_kinds);
MD_FUNCTION MD_Error * MD_MakeNodeError(MD_Node *node, MD_MessageKind kind, MD_String8 str);
MD_FUNCTION MD_Error * MD_MakeTokenError(MD_String8 parse_contents, MD_Token token, MD_MessageKind kind, MD_String8 str);
MD_FUNCTION void MD_PushErrorToList(MD_ErrorList *list, MD_Error *error);
+24 -57
View File
@@ -1094,32 +1094,6 @@ _MD_CommentIsSyntacticallyCorrect(MD_Token comment_token)
return result;
}
MD_FUNCTION MD_TokenGroups
MD_TokenGroupsFromTokenKind(MD_TokenKind kind)
{
MD_TokenGroups groups = 0;
switch(kind)
{
//- rjf: nil
default:
case MD_TokenKind_Nil: break;
//- rjf: regular
case MD_TokenKind_Identifier: groups |= MD_TokenGroup_Regular|MD_TokenGroup_LabelString; break;
case MD_TokenKind_NumericLiteral: groups |= MD_TokenGroup_Regular|MD_TokenGroup_LabelString; break;
case MD_TokenKind_StringLiteral: groups |= MD_TokenGroup_Regular|MD_TokenGroup_LabelString; break;
case MD_TokenKind_Symbol: groups |= MD_TokenGroup_Regular|MD_TokenGroup_LabelString; break;
//- rjf: comments
case MD_TokenKind_Comment: groups |= MD_TokenGroup_Comment; break;
//- rjf: whitespace
case MD_TokenKind_Whitespace: groups |= MD_TokenGroup_Whitespace; break;
case MD_TokenKind_Newline: groups |= MD_TokenGroup_Whitespace; break;
}
return groups;
}
MD_FUNCTION MD_Token
MD_TokenFromString(MD_String8 string)
{
@@ -1217,7 +1191,7 @@ MD_TokenFromString(MD_String8 string)
}
}
}
if (token.kind == MD_TokenKind_Nil) goto symbol_lex;
if (token.kind == 0) goto symbol_lex;
}break;
// NOTE(allen): Strings
@@ -1371,28 +1345,21 @@ MD_TokenFromString(MD_String8 string)
}
MD_FUNCTION_IMPL MD_u64
MD_BytesFromStringTokenGroupRun(MD_String8 string, MD_TokenGroups groups)
MD_LexAdvanceFromSkips(MD_String8 string, MD_TokenKind skip_kinds)
{
MD_u64 result = 0;
MD_b32 skip_comment = (groups & MD_TokenGroup_Comment);
MD_b32 skip_whitespace = (groups & MD_TokenGroup_Whitespace);
MD_b32 skip_regular = (groups & MD_TokenGroup_Regular);
loop:
MD_u64 result = string.size;
MD_u64 p = 0;
for (;;)
{
MD_Token token = MD_TokenFromString(MD_StringSkip(string, result));
MD_TokenGroups groups = MD_TokenGroupsFromTokenKind(token.kind);
if((skip_comment && groups & MD_TokenGroup_Comment) ||
(skip_whitespace && groups & MD_TokenGroup_Whitespace) ||
(skip_regular && groups & MD_TokenGroup_Regular))
MD_Token token = MD_TokenFromString(MD_StringSkip(string, p));
if ((skip_kinds & token.kind) == 0)
{
result += token.outer_string.size;
goto loop;
result = p;
break;
}
p += token.outer_string.size;
}
return result;
return(result);
}
MD_FUNCTION_IMPL MD_Error *
@@ -1476,7 +1443,7 @@ MD_ParseNodeSet(MD_String8 string, MD_u64 offset, MD_Node *parent, MD_ParseSetRu
case MD_ParseSetRule_EndOnDelimiter:
{
MD_u64 opener_check_off = off;
opener_check_off += MD_BytesFromStringTokenGroupRun(MD_StringSkip(string, opener_check_off), MD_TokenGroup_Comment|MD_TokenGroup_Whitespace);
opener_check_off += MD_LexAdvanceFromSkips(MD_StringSkip(string, opener_check_off), MD_TokenGroup_Irregular);
initial_token = MD_TokenFromString(MD_StringSkip(string, opener_check_off));
if(initial_token.kind == MD_TokenKind_Symbol)
{
@@ -1568,7 +1535,7 @@ MD_ParseNodeSet(MD_String8 string, MD_u64 offset, MD_Node *parent, MD_ParseSetRu
//- rjf: check separators and possible braces from higher parents
{
closer_check_off += MD_BytesFromStringTokenGroupRun(MD_StringSkip(string, off), MD_TokenGroup_Comment|MD_TokenGroup_Whitespace);
closer_check_off += MD_LexAdvanceFromSkips(MD_StringSkip(string, off), MD_TokenGroup_Comment|MD_TokenGroup_Whitespace);
MD_Token potential_closer = MD_TokenFromString(MD_StringSkip(string, closer_check_off));
if(potential_closer.kind == MD_TokenKind_Symbol &&
(MD_StringMatch(potential_closer.outer_string, MD_S8Lit(","), 0) ||
@@ -1593,7 +1560,7 @@ MD_ParseNodeSet(MD_String8 string, MD_u64 offset, MD_Node *parent, MD_ParseSetRu
if(!close_with_separator && !parse_all)
{
MD_u64 closer_check_off = off;
closer_check_off += MD_BytesFromStringTokenGroupRun(MD_StringSkip(string, off), MD_TokenGroup_Comment|MD_TokenGroup_Whitespace);
closer_check_off += MD_LexAdvanceFromSkips(MD_StringSkip(string, off), MD_TokenGroup_Comment|MD_TokenGroup_Whitespace);
MD_Token potential_closer = MD_TokenFromString(MD_StringSkip(string, closer_check_off));
if(potential_closer.kind == MD_TokenKind_Symbol)
{
@@ -1640,7 +1607,7 @@ MD_ParseNodeSet(MD_String8 string, MD_u64 offset, MD_Node *parent, MD_ParseSetRu
MD_NodeFlags trailing_separator_flags = 0;
if(!close_with_separator)
{
off += MD_BytesFromStringTokenGroupRun(MD_StringSkip(string, off), MD_TokenGroup_Comment|MD_TokenGroup_Whitespace);
off += MD_LexAdvanceFromSkips(MD_StringSkip(string, off), MD_TokenGroup_Comment|MD_TokenGroup_Whitespace);
MD_Token trailing_separator = MD_TokenFromString(MD_StringSkip(string, off));
if(MD_StringMatch(trailing_separator.string, MD_S8Lit(","), 0) &&
trailing_separator.kind == MD_TokenKind_Symbol)
@@ -1698,7 +1665,7 @@ MD_ParseTagList(MD_String8 string, MD_u64 offset)
for(;off < string.size;)
{
//- rjf: parse @ symbol, signifying start of tag
off += MD_BytesFromStringTokenGroupRun(MD_StringSkip(string, off), MD_TokenGroup_Comment|MD_TokenGroup_Whitespace);
off += MD_LexAdvanceFromSkips(MD_StringSkip(string, off), MD_TokenGroup_Comment|MD_TokenGroup_Whitespace);
MD_Token next_token = MD_TokenFromString(MD_StringSkip(string, off));
if(!MD_StringMatch(next_token.string, MD_S8Lit("@"), 0) ||
next_token.kind != MD_TokenKind_Symbol)
@@ -1710,10 +1677,10 @@ MD_ParseTagList(MD_String8 string, MD_u64 offset)
//- rjf: parse string of tag node
MD_Token name = MD_TokenFromString(MD_StringSkip(string, off));
MD_u64 name_off = off;
if(!(MD_TokenGroupsFromTokenKind(name.kind) & MD_TokenGroup_LabelString))
if((name.kind & MD_TokenGroup_Label) == 0)
{
MD_Error *error = MD_MakeTokenError(string, name, MD_MessageKind_Error,
MD_PushStringF("\"%.*s\" is not a proper tag identifier",
MD_PushStringF("\"%.*s\" is not a proper tag label",
MD_StringExpand(name.outer_string)));
MD_PushErrorToList(&result.errors, error);
break;
@@ -1776,7 +1743,7 @@ MD_ParseOneNode(MD_String8 string, MD_u64 offset)
MD_MemoryZero(&comment_token, sizeof(comment_token));
}
}
else if(MD_TokenGroupsFromTokenKind(token.kind) & MD_TokenGroup_Whitespace)
else if((token.kind & MD_TokenGroup_Whitespace) != 0)
{
off += token.outer_string.size;
}
@@ -1809,7 +1776,7 @@ MD_ParseOneNode(MD_String8 string, MD_u64 offset)
retry:;
{
//- rjf: try to parse an unnamed set
off += MD_BytesFromStringTokenGroupRun(MD_StringSkip(string, off), MD_TokenGroup_Comment|MD_TokenGroup_Whitespace);
off += MD_LexAdvanceFromSkips(MD_StringSkip(string, off), MD_TokenGroup_Comment|MD_TokenGroup_Whitespace);
MD_Token unnamed_set_opener = MD_TokenFromString(MD_StringSkip(string, off));
if(unnamed_set_opener.kind == MD_TokenKind_Symbol &&
(MD_StringMatch(unnamed_set_opener.string, MD_S8Lit("("), 0) ||
@@ -1825,9 +1792,9 @@ MD_ParseOneNode(MD_String8 string, MD_u64 offset)
}
//- rjf: try to parse regular node, with/without children
off += MD_BytesFromStringTokenGroupRun(MD_StringSkip(string, off), MD_TokenGroup_Comment|MD_TokenGroup_Whitespace);
off += MD_LexAdvanceFromSkips(MD_StringSkip(string, off), MD_TokenGroup_Comment|MD_TokenGroup_Whitespace);
MD_Token label_name = MD_TokenFromString(MD_StringSkip(string, off));
if(MD_TokenGroupsFromTokenKind(label_name.kind) & MD_TokenGroup_LabelString)
if((label_name.kind & MD_TokenGroup_Label) != 0)
{
off += label_name.outer_string.size;
parsed_node = MD_MakeNode(MD_NodeKind_Label, label_name.string, label_name.outer_string,
@@ -1864,7 +1831,7 @@ MD_ParseOneNode(MD_String8 string, MD_u64 offset)
//- rjf: try to parse children for this node
MD_u64 colon_check_off = off;
colon_check_off += MD_BytesFromStringTokenGroupRun(MD_StringSkip(string, colon_check_off), MD_TokenGroup_Comment|MD_TokenGroup_Whitespace);
colon_check_off += MD_LexAdvanceFromSkips(MD_StringSkip(string, colon_check_off), MD_TokenGroup_Comment|MD_TokenGroup_Whitespace);
MD_Token colon = MD_TokenFromString(MD_StringSkip(string, colon_check_off));
if(MD_StringMatch(colon.string, MD_S8Lit(":"), 0) && colon.kind == MD_TokenKind_Symbol)
{
@@ -1948,7 +1915,7 @@ MD_ParseOneNode(MD_String8 string, MD_u64 offset)
{
break;
}
else if(MD_TokenGroupsFromTokenKind(token.kind) & MD_TokenGroup_Whitespace)
else if((token.kind & MD_TokenGroup_Whitespace) != 0)
{
off += token.outer_string.size;
}