diff --git a/source/md.h b/source/md.h index 75bfe7c..fa29567 100644 --- a/source/md.h +++ b/source/md.h @@ -416,35 +416,37 @@ struct MD_Map //~ Tokens -typedef enum MD_TokenKind +typedef MD_u32 MD_TokenKind; +enum { - // NOTE(rjf): *MUST* stay up-to-date with MD_TokenGroupsFromTokenKind. - - MD_TokenKind_Nil, - - MD_TokenKind_Identifier, - MD_TokenKind_NumericLiteral, - MD_TokenKind_StringLiteral, - MD_TokenKind_Symbol, - - MD_TokenKind_Comment, - - MD_TokenKind_Whitespace, - MD_TokenKind_Newline, - - // Character outside currently supported encodings - MD_TokenKind_BadCharacter, - - MD_TokenKind_COUNT, -} -MD_TokenKind; + MD_TokenKind_Identifier = (1<<0), + MD_TokenKind_NumericLiteral = (1<<1), + MD_TokenKind_StringLiteral = (1<<2), + MD_TokenKind_Symbol = (1<<3), + MD_TokenKind_Reserved = (1<<4), + MD_TokenKind_Comment = (1<<5), + MD_TokenKind_Whitespace = (1<<6), + MD_TokenKind_Newline = (1<<7), + MD_TokenKind_BrokenComment = (1<<8), + MD_TokenKind_BrokenString = (1<<9), + MD_TokenKind_BadCharacter = (1<<10), +}; -typedef MD_u32 MD_TokenGroups; -enum{ - MD_TokenGroup_Comment = (1 << 0), - MD_TokenGroup_Whitespace = (1 << 1), - MD_TokenGroup_Regular = (1 << 2), - MD_TokenGroup_LabelString = (1 << 3), +enum +{ + MD_TokenGroup_Comment = MD_TokenKind_Comment, + MD_TokenGroup_Whitespace = (MD_TokenKind_Whitespace| + MD_TokenKind_Newline), + MD_TokenGroup_Irregular = (MD_TokenGroup_Comment| + MD_TokenGroup_Whitespace), + MD_TokenGroup_Regular = ~MD_TokenGroup_Irregular, + MD_TokenGroup_Label = (MD_TokenKind_Identifier| + MD_TokenKind_NumericLiteral| + MD_TokenKind_StringLiteral| + MD_TokenKind_Symbol), + MD_TokenGroup_Error = (MD_TokenKind_BrokenComment| + MD_TokenKind_BrokenString| + MD_TokenKind_BadCharacter), }; typedef struct MD_Token MD_Token; @@ -725,10 +727,8 @@ MD_FUNCTION MD_MapSlot* MD_MapOverwrite(MD_Map *map, MD_MapKey key, void *val); //~ Parsing -MD_FUNCTION MD_TokenGroups MD_TokenGroupsFromTokenKind(MD_TokenKind kind); - MD_FUNCTION MD_Token MD_TokenFromString(MD_String8 string); -MD_FUNCTION MD_u64 MD_BytesFromStringTokenGroupRun(MD_String8 string, MD_TokenGroups groups); +MD_FUNCTION MD_u64 MD_LexAdvanceFromSkips(MD_String8 string, MD_TokenKind skip_kinds); MD_FUNCTION MD_Error * MD_MakeNodeError(MD_Node *node, MD_MessageKind kind, MD_String8 str); MD_FUNCTION MD_Error * MD_MakeTokenError(MD_String8 parse_contents, MD_Token token, MD_MessageKind kind, MD_String8 str); MD_FUNCTION void MD_PushErrorToList(MD_ErrorList *list, MD_Error *error); diff --git a/source/md_impl.c b/source/md_impl.c index d1baf71..d894a4b 100644 --- a/source/md_impl.c +++ b/source/md_impl.c @@ -1094,32 +1094,6 @@ _MD_CommentIsSyntacticallyCorrect(MD_Token comment_token) return result; } -MD_FUNCTION MD_TokenGroups -MD_TokenGroupsFromTokenKind(MD_TokenKind kind) -{ - MD_TokenGroups groups = 0; - switch(kind) - { - //- rjf: nil - default: - case MD_TokenKind_Nil: break; - - //- rjf: regular - case MD_TokenKind_Identifier: groups |= MD_TokenGroup_Regular|MD_TokenGroup_LabelString; break; - case MD_TokenKind_NumericLiteral: groups |= MD_TokenGroup_Regular|MD_TokenGroup_LabelString; break; - case MD_TokenKind_StringLiteral: groups |= MD_TokenGroup_Regular|MD_TokenGroup_LabelString; break; - case MD_TokenKind_Symbol: groups |= MD_TokenGroup_Regular|MD_TokenGroup_LabelString; break; - - //- rjf: comments - case MD_TokenKind_Comment: groups |= MD_TokenGroup_Comment; break; - - //- rjf: whitespace - case MD_TokenKind_Whitespace: groups |= MD_TokenGroup_Whitespace; break; - case MD_TokenKind_Newline: groups |= MD_TokenGroup_Whitespace; break; - } - return groups; -} - MD_FUNCTION MD_Token MD_TokenFromString(MD_String8 string) { @@ -1217,7 +1191,7 @@ MD_TokenFromString(MD_String8 string) } } } - if (token.kind == MD_TokenKind_Nil) goto symbol_lex; + if (token.kind == 0) goto symbol_lex; }break; // NOTE(allen): Strings @@ -1371,28 +1345,21 @@ MD_TokenFromString(MD_String8 string) } MD_FUNCTION_IMPL MD_u64 -MD_BytesFromStringTokenGroupRun(MD_String8 string, MD_TokenGroups groups) +MD_LexAdvanceFromSkips(MD_String8 string, MD_TokenKind skip_kinds) { - MD_u64 result = 0; - - MD_b32 skip_comment = (groups & MD_TokenGroup_Comment); - MD_b32 skip_whitespace = (groups & MD_TokenGroup_Whitespace); - MD_b32 skip_regular = (groups & MD_TokenGroup_Regular); - - loop: + MD_u64 result = string.size; + MD_u64 p = 0; + for (;;) { - MD_Token token = MD_TokenFromString(MD_StringSkip(string, result)); - MD_TokenGroups groups = MD_TokenGroupsFromTokenKind(token.kind); - if((skip_comment && groups & MD_TokenGroup_Comment) || - (skip_whitespace && groups & MD_TokenGroup_Whitespace) || - (skip_regular && groups & MD_TokenGroup_Regular)) + MD_Token token = MD_TokenFromString(MD_StringSkip(string, p)); + if ((skip_kinds & token.kind) == 0) { - result += token.outer_string.size; - goto loop; + result = p; + break; } + p += token.outer_string.size; } - - return result; + return(result); } MD_FUNCTION_IMPL MD_Error * @@ -1476,7 +1443,7 @@ MD_ParseNodeSet(MD_String8 string, MD_u64 offset, MD_Node *parent, MD_ParseSetRu case MD_ParseSetRule_EndOnDelimiter: { MD_u64 opener_check_off = off; - opener_check_off += MD_BytesFromStringTokenGroupRun(MD_StringSkip(string, opener_check_off), MD_TokenGroup_Comment|MD_TokenGroup_Whitespace); + opener_check_off += MD_LexAdvanceFromSkips(MD_StringSkip(string, opener_check_off), MD_TokenGroup_Irregular); initial_token = MD_TokenFromString(MD_StringSkip(string, opener_check_off)); if(initial_token.kind == MD_TokenKind_Symbol) { @@ -1568,7 +1535,7 @@ MD_ParseNodeSet(MD_String8 string, MD_u64 offset, MD_Node *parent, MD_ParseSetRu //- rjf: check separators and possible braces from higher parents { - closer_check_off += MD_BytesFromStringTokenGroupRun(MD_StringSkip(string, off), MD_TokenGroup_Comment|MD_TokenGroup_Whitespace); + closer_check_off += MD_LexAdvanceFromSkips(MD_StringSkip(string, off), MD_TokenGroup_Comment|MD_TokenGroup_Whitespace); MD_Token potential_closer = MD_TokenFromString(MD_StringSkip(string, closer_check_off)); if(potential_closer.kind == MD_TokenKind_Symbol && (MD_StringMatch(potential_closer.outer_string, MD_S8Lit(","), 0) || @@ -1593,7 +1560,7 @@ MD_ParseNodeSet(MD_String8 string, MD_u64 offset, MD_Node *parent, MD_ParseSetRu if(!close_with_separator && !parse_all) { MD_u64 closer_check_off = off; - closer_check_off += MD_BytesFromStringTokenGroupRun(MD_StringSkip(string, off), MD_TokenGroup_Comment|MD_TokenGroup_Whitespace); + closer_check_off += MD_LexAdvanceFromSkips(MD_StringSkip(string, off), MD_TokenGroup_Comment|MD_TokenGroup_Whitespace); MD_Token potential_closer = MD_TokenFromString(MD_StringSkip(string, closer_check_off)); if(potential_closer.kind == MD_TokenKind_Symbol) { @@ -1640,7 +1607,7 @@ MD_ParseNodeSet(MD_String8 string, MD_u64 offset, MD_Node *parent, MD_ParseSetRu MD_NodeFlags trailing_separator_flags = 0; if(!close_with_separator) { - off += MD_BytesFromStringTokenGroupRun(MD_StringSkip(string, off), MD_TokenGroup_Comment|MD_TokenGroup_Whitespace); + off += MD_LexAdvanceFromSkips(MD_StringSkip(string, off), MD_TokenGroup_Comment|MD_TokenGroup_Whitespace); MD_Token trailing_separator = MD_TokenFromString(MD_StringSkip(string, off)); if(MD_StringMatch(trailing_separator.string, MD_S8Lit(","), 0) && trailing_separator.kind == MD_TokenKind_Symbol) @@ -1698,7 +1665,7 @@ MD_ParseTagList(MD_String8 string, MD_u64 offset) for(;off < string.size;) { //- rjf: parse @ symbol, signifying start of tag - off += MD_BytesFromStringTokenGroupRun(MD_StringSkip(string, off), MD_TokenGroup_Comment|MD_TokenGroup_Whitespace); + off += MD_LexAdvanceFromSkips(MD_StringSkip(string, off), MD_TokenGroup_Comment|MD_TokenGroup_Whitespace); MD_Token next_token = MD_TokenFromString(MD_StringSkip(string, off)); if(!MD_StringMatch(next_token.string, MD_S8Lit("@"), 0) || next_token.kind != MD_TokenKind_Symbol) @@ -1710,10 +1677,10 @@ MD_ParseTagList(MD_String8 string, MD_u64 offset) //- rjf: parse string of tag node MD_Token name = MD_TokenFromString(MD_StringSkip(string, off)); MD_u64 name_off = off; - if(!(MD_TokenGroupsFromTokenKind(name.kind) & MD_TokenGroup_LabelString)) + if((name.kind & MD_TokenGroup_Label) == 0) { MD_Error *error = MD_MakeTokenError(string, name, MD_MessageKind_Error, - MD_PushStringF("\"%.*s\" is not a proper tag identifier", + MD_PushStringF("\"%.*s\" is not a proper tag label", MD_StringExpand(name.outer_string))); MD_PushErrorToList(&result.errors, error); break; @@ -1776,7 +1743,7 @@ MD_ParseOneNode(MD_String8 string, MD_u64 offset) MD_MemoryZero(&comment_token, sizeof(comment_token)); } } - else if(MD_TokenGroupsFromTokenKind(token.kind) & MD_TokenGroup_Whitespace) + else if((token.kind & MD_TokenGroup_Whitespace) != 0) { off += token.outer_string.size; } @@ -1809,7 +1776,7 @@ MD_ParseOneNode(MD_String8 string, MD_u64 offset) retry:; { //- rjf: try to parse an unnamed set - off += MD_BytesFromStringTokenGroupRun(MD_StringSkip(string, off), MD_TokenGroup_Comment|MD_TokenGroup_Whitespace); + off += MD_LexAdvanceFromSkips(MD_StringSkip(string, off), MD_TokenGroup_Comment|MD_TokenGroup_Whitespace); MD_Token unnamed_set_opener = MD_TokenFromString(MD_StringSkip(string, off)); if(unnamed_set_opener.kind == MD_TokenKind_Symbol && (MD_StringMatch(unnamed_set_opener.string, MD_S8Lit("("), 0) || @@ -1825,9 +1792,9 @@ MD_ParseOneNode(MD_String8 string, MD_u64 offset) } //- rjf: try to parse regular node, with/without children - off += MD_BytesFromStringTokenGroupRun(MD_StringSkip(string, off), MD_TokenGroup_Comment|MD_TokenGroup_Whitespace); + off += MD_LexAdvanceFromSkips(MD_StringSkip(string, off), MD_TokenGroup_Comment|MD_TokenGroup_Whitespace); MD_Token label_name = MD_TokenFromString(MD_StringSkip(string, off)); - if(MD_TokenGroupsFromTokenKind(label_name.kind) & MD_TokenGroup_LabelString) + if((label_name.kind & MD_TokenGroup_Label) != 0) { off += label_name.outer_string.size; parsed_node = MD_MakeNode(MD_NodeKind_Label, label_name.string, label_name.outer_string, @@ -1864,7 +1831,7 @@ MD_ParseOneNode(MD_String8 string, MD_u64 offset) //- rjf: try to parse children for this node MD_u64 colon_check_off = off; - colon_check_off += MD_BytesFromStringTokenGroupRun(MD_StringSkip(string, colon_check_off), MD_TokenGroup_Comment|MD_TokenGroup_Whitespace); + colon_check_off += MD_LexAdvanceFromSkips(MD_StringSkip(string, colon_check_off), MD_TokenGroup_Comment|MD_TokenGroup_Whitespace); MD_Token colon = MD_TokenFromString(MD_StringSkip(string, colon_check_off)); if(MD_StringMatch(colon.string, MD_S8Lit(":"), 0) && colon.kind == MD_TokenKind_Symbol) { @@ -1948,7 +1915,7 @@ MD_ParseOneNode(MD_String8 string, MD_u64 offset) { break; } - else if(MD_TokenGroupsFromTokenKind(token.kind) & MD_TokenGroup_Whitespace) + else if((token.kind & MD_TokenGroup_Whitespace) != 0) { off += token.outer_string.size; }