mirror of
https://github.com/Ed94/metadesk.git
synced 2026-06-13 07:52:22 -07:00
tighter implementation of group mechanism
This commit is contained in:
+30
-30
@@ -416,35 +416,37 @@ struct MD_Map
|
||||
|
||||
//~ Tokens
|
||||
|
||||
typedef enum MD_TokenKind
|
||||
typedef MD_u32 MD_TokenKind;
|
||||
enum
|
||||
{
|
||||
// NOTE(rjf): *MUST* stay up-to-date with MD_TokenGroupsFromTokenKind.
|
||||
|
||||
MD_TokenKind_Nil,
|
||||
|
||||
MD_TokenKind_Identifier,
|
||||
MD_TokenKind_NumericLiteral,
|
||||
MD_TokenKind_StringLiteral,
|
||||
MD_TokenKind_Symbol,
|
||||
|
||||
MD_TokenKind_Comment,
|
||||
|
||||
MD_TokenKind_Whitespace,
|
||||
MD_TokenKind_Newline,
|
||||
|
||||
// Character outside currently supported encodings
|
||||
MD_TokenKind_BadCharacter,
|
||||
|
||||
MD_TokenKind_COUNT,
|
||||
}
|
||||
MD_TokenKind;
|
||||
MD_TokenKind_Identifier = (1<<0),
|
||||
MD_TokenKind_NumericLiteral = (1<<1),
|
||||
MD_TokenKind_StringLiteral = (1<<2),
|
||||
MD_TokenKind_Symbol = (1<<3),
|
||||
MD_TokenKind_Reserved = (1<<4),
|
||||
MD_TokenKind_Comment = (1<<5),
|
||||
MD_TokenKind_Whitespace = (1<<6),
|
||||
MD_TokenKind_Newline = (1<<7),
|
||||
MD_TokenKind_BrokenComment = (1<<8),
|
||||
MD_TokenKind_BrokenString = (1<<9),
|
||||
MD_TokenKind_BadCharacter = (1<<10),
|
||||
};
|
||||
|
||||
typedef MD_u32 MD_TokenGroups;
|
||||
enum{
|
||||
MD_TokenGroup_Comment = (1 << 0),
|
||||
MD_TokenGroup_Whitespace = (1 << 1),
|
||||
MD_TokenGroup_Regular = (1 << 2),
|
||||
MD_TokenGroup_LabelString = (1 << 3),
|
||||
enum
|
||||
{
|
||||
MD_TokenGroup_Comment = MD_TokenKind_Comment,
|
||||
MD_TokenGroup_Whitespace = (MD_TokenKind_Whitespace|
|
||||
MD_TokenKind_Newline),
|
||||
MD_TokenGroup_Irregular = (MD_TokenGroup_Comment|
|
||||
MD_TokenGroup_Whitespace),
|
||||
MD_TokenGroup_Regular = ~MD_TokenGroup_Irregular,
|
||||
MD_TokenGroup_Label = (MD_TokenKind_Identifier|
|
||||
MD_TokenKind_NumericLiteral|
|
||||
MD_TokenKind_StringLiteral|
|
||||
MD_TokenKind_Symbol),
|
||||
MD_TokenGroup_Error = (MD_TokenKind_BrokenComment|
|
||||
MD_TokenKind_BrokenString|
|
||||
MD_TokenKind_BadCharacter),
|
||||
};
|
||||
|
||||
typedef struct MD_Token MD_Token;
|
||||
@@ -725,10 +727,8 @@ MD_FUNCTION MD_MapSlot* MD_MapOverwrite(MD_Map *map, MD_MapKey key, void *val);
|
||||
|
||||
//~ Parsing
|
||||
|
||||
MD_FUNCTION MD_TokenGroups MD_TokenGroupsFromTokenKind(MD_TokenKind kind);
|
||||
|
||||
MD_FUNCTION MD_Token MD_TokenFromString(MD_String8 string);
|
||||
MD_FUNCTION MD_u64 MD_BytesFromStringTokenGroupRun(MD_String8 string, MD_TokenGroups groups);
|
||||
MD_FUNCTION MD_u64 MD_LexAdvanceFromSkips(MD_String8 string, MD_TokenKind skip_kinds);
|
||||
MD_FUNCTION MD_Error * MD_MakeNodeError(MD_Node *node, MD_MessageKind kind, MD_String8 str);
|
||||
MD_FUNCTION MD_Error * MD_MakeTokenError(MD_String8 parse_contents, MD_Token token, MD_MessageKind kind, MD_String8 str);
|
||||
MD_FUNCTION void MD_PushErrorToList(MD_ErrorList *list, MD_Error *error);
|
||||
|
||||
+24
-57
@@ -1094,32 +1094,6 @@ _MD_CommentIsSyntacticallyCorrect(MD_Token comment_token)
|
||||
return result;
|
||||
}
|
||||
|
||||
MD_FUNCTION MD_TokenGroups
|
||||
MD_TokenGroupsFromTokenKind(MD_TokenKind kind)
|
||||
{
|
||||
MD_TokenGroups groups = 0;
|
||||
switch(kind)
|
||||
{
|
||||
//- rjf: nil
|
||||
default:
|
||||
case MD_TokenKind_Nil: break;
|
||||
|
||||
//- rjf: regular
|
||||
case MD_TokenKind_Identifier: groups |= MD_TokenGroup_Regular|MD_TokenGroup_LabelString; break;
|
||||
case MD_TokenKind_NumericLiteral: groups |= MD_TokenGroup_Regular|MD_TokenGroup_LabelString; break;
|
||||
case MD_TokenKind_StringLiteral: groups |= MD_TokenGroup_Regular|MD_TokenGroup_LabelString; break;
|
||||
case MD_TokenKind_Symbol: groups |= MD_TokenGroup_Regular|MD_TokenGroup_LabelString; break;
|
||||
|
||||
//- rjf: comments
|
||||
case MD_TokenKind_Comment: groups |= MD_TokenGroup_Comment; break;
|
||||
|
||||
//- rjf: whitespace
|
||||
case MD_TokenKind_Whitespace: groups |= MD_TokenGroup_Whitespace; break;
|
||||
case MD_TokenKind_Newline: groups |= MD_TokenGroup_Whitespace; break;
|
||||
}
|
||||
return groups;
|
||||
}
|
||||
|
||||
MD_FUNCTION MD_Token
|
||||
MD_TokenFromString(MD_String8 string)
|
||||
{
|
||||
@@ -1217,7 +1191,7 @@ MD_TokenFromString(MD_String8 string)
|
||||
}
|
||||
}
|
||||
}
|
||||
if (token.kind == MD_TokenKind_Nil) goto symbol_lex;
|
||||
if (token.kind == 0) goto symbol_lex;
|
||||
}break;
|
||||
|
||||
// NOTE(allen): Strings
|
||||
@@ -1371,28 +1345,21 @@ MD_TokenFromString(MD_String8 string)
|
||||
}
|
||||
|
||||
MD_FUNCTION_IMPL MD_u64
|
||||
MD_BytesFromStringTokenGroupRun(MD_String8 string, MD_TokenGroups groups)
|
||||
MD_LexAdvanceFromSkips(MD_String8 string, MD_TokenKind skip_kinds)
|
||||
{
|
||||
MD_u64 result = 0;
|
||||
|
||||
MD_b32 skip_comment = (groups & MD_TokenGroup_Comment);
|
||||
MD_b32 skip_whitespace = (groups & MD_TokenGroup_Whitespace);
|
||||
MD_b32 skip_regular = (groups & MD_TokenGroup_Regular);
|
||||
|
||||
loop:
|
||||
MD_u64 result = string.size;
|
||||
MD_u64 p = 0;
|
||||
for (;;)
|
||||
{
|
||||
MD_Token token = MD_TokenFromString(MD_StringSkip(string, result));
|
||||
MD_TokenGroups groups = MD_TokenGroupsFromTokenKind(token.kind);
|
||||
if((skip_comment && groups & MD_TokenGroup_Comment) ||
|
||||
(skip_whitespace && groups & MD_TokenGroup_Whitespace) ||
|
||||
(skip_regular && groups & MD_TokenGroup_Regular))
|
||||
MD_Token token = MD_TokenFromString(MD_StringSkip(string, p));
|
||||
if ((skip_kinds & token.kind) == 0)
|
||||
{
|
||||
result += token.outer_string.size;
|
||||
goto loop;
|
||||
result = p;
|
||||
break;
|
||||
}
|
||||
p += token.outer_string.size;
|
||||
}
|
||||
|
||||
return result;
|
||||
return(result);
|
||||
}
|
||||
|
||||
MD_FUNCTION_IMPL MD_Error *
|
||||
@@ -1476,7 +1443,7 @@ MD_ParseNodeSet(MD_String8 string, MD_u64 offset, MD_Node *parent, MD_ParseSetRu
|
||||
case MD_ParseSetRule_EndOnDelimiter:
|
||||
{
|
||||
MD_u64 opener_check_off = off;
|
||||
opener_check_off += MD_BytesFromStringTokenGroupRun(MD_StringSkip(string, opener_check_off), MD_TokenGroup_Comment|MD_TokenGroup_Whitespace);
|
||||
opener_check_off += MD_LexAdvanceFromSkips(MD_StringSkip(string, opener_check_off), MD_TokenGroup_Irregular);
|
||||
initial_token = MD_TokenFromString(MD_StringSkip(string, opener_check_off));
|
||||
if(initial_token.kind == MD_TokenKind_Symbol)
|
||||
{
|
||||
@@ -1568,7 +1535,7 @@ MD_ParseNodeSet(MD_String8 string, MD_u64 offset, MD_Node *parent, MD_ParseSetRu
|
||||
|
||||
//- rjf: check separators and possible braces from higher parents
|
||||
{
|
||||
closer_check_off += MD_BytesFromStringTokenGroupRun(MD_StringSkip(string, off), MD_TokenGroup_Comment|MD_TokenGroup_Whitespace);
|
||||
closer_check_off += MD_LexAdvanceFromSkips(MD_StringSkip(string, off), MD_TokenGroup_Comment|MD_TokenGroup_Whitespace);
|
||||
MD_Token potential_closer = MD_TokenFromString(MD_StringSkip(string, closer_check_off));
|
||||
if(potential_closer.kind == MD_TokenKind_Symbol &&
|
||||
(MD_StringMatch(potential_closer.outer_string, MD_S8Lit(","), 0) ||
|
||||
@@ -1593,7 +1560,7 @@ MD_ParseNodeSet(MD_String8 string, MD_u64 offset, MD_Node *parent, MD_ParseSetRu
|
||||
if(!close_with_separator && !parse_all)
|
||||
{
|
||||
MD_u64 closer_check_off = off;
|
||||
closer_check_off += MD_BytesFromStringTokenGroupRun(MD_StringSkip(string, off), MD_TokenGroup_Comment|MD_TokenGroup_Whitespace);
|
||||
closer_check_off += MD_LexAdvanceFromSkips(MD_StringSkip(string, off), MD_TokenGroup_Comment|MD_TokenGroup_Whitespace);
|
||||
MD_Token potential_closer = MD_TokenFromString(MD_StringSkip(string, closer_check_off));
|
||||
if(potential_closer.kind == MD_TokenKind_Symbol)
|
||||
{
|
||||
@@ -1640,7 +1607,7 @@ MD_ParseNodeSet(MD_String8 string, MD_u64 offset, MD_Node *parent, MD_ParseSetRu
|
||||
MD_NodeFlags trailing_separator_flags = 0;
|
||||
if(!close_with_separator)
|
||||
{
|
||||
off += MD_BytesFromStringTokenGroupRun(MD_StringSkip(string, off), MD_TokenGroup_Comment|MD_TokenGroup_Whitespace);
|
||||
off += MD_LexAdvanceFromSkips(MD_StringSkip(string, off), MD_TokenGroup_Comment|MD_TokenGroup_Whitespace);
|
||||
MD_Token trailing_separator = MD_TokenFromString(MD_StringSkip(string, off));
|
||||
if(MD_StringMatch(trailing_separator.string, MD_S8Lit(","), 0) &&
|
||||
trailing_separator.kind == MD_TokenKind_Symbol)
|
||||
@@ -1698,7 +1665,7 @@ MD_ParseTagList(MD_String8 string, MD_u64 offset)
|
||||
for(;off < string.size;)
|
||||
{
|
||||
//- rjf: parse @ symbol, signifying start of tag
|
||||
off += MD_BytesFromStringTokenGroupRun(MD_StringSkip(string, off), MD_TokenGroup_Comment|MD_TokenGroup_Whitespace);
|
||||
off += MD_LexAdvanceFromSkips(MD_StringSkip(string, off), MD_TokenGroup_Comment|MD_TokenGroup_Whitespace);
|
||||
MD_Token next_token = MD_TokenFromString(MD_StringSkip(string, off));
|
||||
if(!MD_StringMatch(next_token.string, MD_S8Lit("@"), 0) ||
|
||||
next_token.kind != MD_TokenKind_Symbol)
|
||||
@@ -1710,10 +1677,10 @@ MD_ParseTagList(MD_String8 string, MD_u64 offset)
|
||||
//- rjf: parse string of tag node
|
||||
MD_Token name = MD_TokenFromString(MD_StringSkip(string, off));
|
||||
MD_u64 name_off = off;
|
||||
if(!(MD_TokenGroupsFromTokenKind(name.kind) & MD_TokenGroup_LabelString))
|
||||
if((name.kind & MD_TokenGroup_Label) == 0)
|
||||
{
|
||||
MD_Error *error = MD_MakeTokenError(string, name, MD_MessageKind_Error,
|
||||
MD_PushStringF("\"%.*s\" is not a proper tag identifier",
|
||||
MD_PushStringF("\"%.*s\" is not a proper tag label",
|
||||
MD_StringExpand(name.outer_string)));
|
||||
MD_PushErrorToList(&result.errors, error);
|
||||
break;
|
||||
@@ -1776,7 +1743,7 @@ MD_ParseOneNode(MD_String8 string, MD_u64 offset)
|
||||
MD_MemoryZero(&comment_token, sizeof(comment_token));
|
||||
}
|
||||
}
|
||||
else if(MD_TokenGroupsFromTokenKind(token.kind) & MD_TokenGroup_Whitespace)
|
||||
else if((token.kind & MD_TokenGroup_Whitespace) != 0)
|
||||
{
|
||||
off += token.outer_string.size;
|
||||
}
|
||||
@@ -1809,7 +1776,7 @@ MD_ParseOneNode(MD_String8 string, MD_u64 offset)
|
||||
retry:;
|
||||
{
|
||||
//- rjf: try to parse an unnamed set
|
||||
off += MD_BytesFromStringTokenGroupRun(MD_StringSkip(string, off), MD_TokenGroup_Comment|MD_TokenGroup_Whitespace);
|
||||
off += MD_LexAdvanceFromSkips(MD_StringSkip(string, off), MD_TokenGroup_Comment|MD_TokenGroup_Whitespace);
|
||||
MD_Token unnamed_set_opener = MD_TokenFromString(MD_StringSkip(string, off));
|
||||
if(unnamed_set_opener.kind == MD_TokenKind_Symbol &&
|
||||
(MD_StringMatch(unnamed_set_opener.string, MD_S8Lit("("), 0) ||
|
||||
@@ -1825,9 +1792,9 @@ MD_ParseOneNode(MD_String8 string, MD_u64 offset)
|
||||
}
|
||||
|
||||
//- rjf: try to parse regular node, with/without children
|
||||
off += MD_BytesFromStringTokenGroupRun(MD_StringSkip(string, off), MD_TokenGroup_Comment|MD_TokenGroup_Whitespace);
|
||||
off += MD_LexAdvanceFromSkips(MD_StringSkip(string, off), MD_TokenGroup_Comment|MD_TokenGroup_Whitespace);
|
||||
MD_Token label_name = MD_TokenFromString(MD_StringSkip(string, off));
|
||||
if(MD_TokenGroupsFromTokenKind(label_name.kind) & MD_TokenGroup_LabelString)
|
||||
if((label_name.kind & MD_TokenGroup_Label) != 0)
|
||||
{
|
||||
off += label_name.outer_string.size;
|
||||
parsed_node = MD_MakeNode(MD_NodeKind_Label, label_name.string, label_name.outer_string,
|
||||
@@ -1864,7 +1831,7 @@ MD_ParseOneNode(MD_String8 string, MD_u64 offset)
|
||||
|
||||
//- rjf: try to parse children for this node
|
||||
MD_u64 colon_check_off = off;
|
||||
colon_check_off += MD_BytesFromStringTokenGroupRun(MD_StringSkip(string, colon_check_off), MD_TokenGroup_Comment|MD_TokenGroup_Whitespace);
|
||||
colon_check_off += MD_LexAdvanceFromSkips(MD_StringSkip(string, colon_check_off), MD_TokenGroup_Comment|MD_TokenGroup_Whitespace);
|
||||
MD_Token colon = MD_TokenFromString(MD_StringSkip(string, colon_check_off));
|
||||
if(MD_StringMatch(colon.string, MD_S8Lit(":"), 0) && colon.kind == MD_TokenKind_Symbol)
|
||||
{
|
||||
@@ -1948,7 +1915,7 @@ MD_ParseOneNode(MD_String8 string, MD_u64 offset)
|
||||
{
|
||||
break;
|
||||
}
|
||||
else if(MD_TokenGroupsFromTokenKind(token.kind) & MD_TokenGroup_Whitespace)
|
||||
else if((token.kind & MD_TokenGroup_Whitespace) != 0)
|
||||
{
|
||||
off += token.outer_string.size;
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user