From 8f8c5cb9d7d91ffc117668f7fdd2a3d0353ac91f Mon Sep 17 00:00:00 2001 From: ryanfleury Date: Thu, 24 Jun 2021 21:52:22 -0600 Subject: [PATCH 01/12] move to _COUNT instead of _MAX --- source/md.h | 4 ++-- source/md_impl.c | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/source/md.h b/source/md.h index 41eb0d4..9ac6e26 100644 --- a/source/md.h +++ b/source/md.h @@ -308,7 +308,7 @@ typedef enum MD_NodeKind MD_NodeKind_Label, MD_NodeKind_Tag, MD_NodeKind_ErrorMarker, - MD_NodeKind_MAX, + MD_NodeKind_COUNT, } MD_NodeKind; @@ -459,7 +459,7 @@ typedef enum MD_TokenKind MD_TokenKind_BadCharacter, // Character outside currently supported encodings - MD_TokenKind_MAX, + MD_TokenKind_COUNT, } MD_TokenKind; diff --git a/source/md_impl.c b/source/md_impl.c index 1c07170..c4eb774 100644 --- a/source/md_impl.c +++ b/source/md_impl.c @@ -629,7 +629,7 @@ MD_FUNCTION_IMPL MD_String8 MD_StringFromNodeKind(MD_NodeKind kind) { // NOTE(rjf): Must be kept in sync with MD_NodeKind enum. - static char *cstrs[MD_NodeKind_MAX] = + static char *cstrs[MD_NodeKind_COUNT] = { "Nil", "File", From 9175305b08e6b6987519224b7c859c1537cbde32 Mon Sep 17 00:00:00 2001 From: ryanfleury Date: Thu, 24 Jun 2021 22:12:17 -0600 Subject: [PATCH 02/12] rely on tree root for filename --- samples/output_parse/output_parse.c | 2 +- .../static_site_generator.c | 8 ++- source/md.h | 8 +-- source/md_impl.c | 62 ++++++++++--------- tests/grammar.c | 2 +- tests/sanity_tests.c | 2 +- 6 files changed, 45 insertions(+), 39 deletions(-) diff --git a/samples/output_parse/output_parse.c b/samples/output_parse/output_parse.c index 1e294a6..fc70f69 100644 --- a/samples/output_parse/output_parse.c +++ b/samples/output_parse/output_parse.c @@ -71,7 +71,7 @@ int main(int argument_count, char **arguments) for(MD_EachNode(root, first)) { - MD_String8 code_filename = MD_ChopExtension(MD_SkipFolder(root->filename)); + MD_String8 code_filename = MD_ChopExtension(MD_SkipFolder(root->string)); MD_String8 info_filename = MD_PushStringF("parsed_%.*s.txt", MD_StringExpand(code_filename)); printf("Parse Input -> Output: %.*s -> %.*s\n", MD_StringExpand(code_filename), MD_StringExpand(info_filename)); diff --git a/samples/static_site_generator/static_site_generator.c b/samples/static_site_generator/static_site_generator.c index c8de4f8..4689f97 100644 --- a/samples/static_site_generator/static_site_generator.c +++ b/samples/static_site_generator/static_site_generator.c @@ -127,7 +127,7 @@ int main(int argument_count, char **arguments) { PageInfo page_info = ParsePageInfo(root); - MD_String8 name_without_extension = MD_SkipFolder(MD_ChopExtension(root->filename)); + MD_String8 name_without_extension = MD_SkipFolder(MD_ChopExtension(root->string)); FILE *file = fopen(MD_PushStringF("%.*s.html", MD_StringExpand(name_without_extension)).str, "wb"); if(file) { @@ -413,7 +413,8 @@ GeneratePageContent(MD_Map *index_table, SiteInfo *site_info, PageInfo *page_inf { if(strnode->string.str[i] == '@') { - MD_ParseResult parse = MD_ParseOneNode(node->filename, MD_StringSubstring(strnode->string, i, strnode->string.size)); + MD_Node *root = MD_RootFromNode(node); + MD_ParseResult parse = MD_ParseOneNode(root->string, MD_StringSubstring(strnode->string, i, strnode->string.size)); if(!MD_NodeIsNil(parse.node)) { if(MD_NodeHasTag(node, MD_S8Lit("i"))) @@ -521,9 +522,10 @@ GeneratePageContent(MD_Map *index_table, SiteInfo *site_info, PageInfo *page_inf if(slot->val) { MD_Node *node = slot->val; + MD_Node *root = MD_RootFromNode(node); PageInfo info = ParsePageInfo(node); - MD_String8 filename = node->filename; + MD_String8 filename = root->string; MD_String8 filename_no_ext = MD_ChopExtension(MD_SkipFolder(filename)); MD_String8 link = MD_PushStringF("%.*s.html", MD_StringExpand(filename_no_ext)); MD_String8 name = info.title->string; diff --git a/source/md.h b/source/md.h index 9ac6e26..d73c0b2 100644 --- a/source/md.h +++ b/source/md.h @@ -362,8 +362,6 @@ struct MD_Node MD_String8 comment_after; // Source code location information. - MD_String8 filename; - MD_u8 *file_contents; MD_u8 *at; // Reference. @@ -693,15 +691,14 @@ MD_FUNCTION MD_ParseResult MD_ParseWholeString(MD_String8 filename, MD_String8 c MD_FUNCTION MD_ParseResult MD_ParseWholeFile(MD_String8 filename); //~ Location Conversion -MD_FUNCTION MD_CodeLoc MD_CodeLocFromFileOffset(MD_String8 filename, MD_u8 *base, MD_u8 *off); +MD_FUNCTION MD_CodeLoc MD_CodeLocFromFileBaseOff(MD_String8 filename, MD_u8 *base, MD_u8 *off); MD_FUNCTION MD_CodeLoc MD_CodeLocFromNode(MD_Node *node); //~ Tree/List Building MD_FUNCTION MD_b32 MD_NodeIsNil(MD_Node *node); MD_FUNCTION MD_Node *MD_NilNode(void); MD_FUNCTION MD_Node *MD_MakeNode(MD_NodeKind kind, MD_String8 string, - MD_String8 whole_string, MD_String8 filename, - MD_u8 *file_contents, MD_u8 *at); + MD_String8 whole_string, MD_u8 *at); MD_FUNCTION void MD_PushSibling(MD_Node **first, MD_Node **last, MD_Node *new_sibling); MD_FUNCTION void MD_PushChild(MD_Node *parent, MD_Node *new_child); MD_FUNCTION void MD_PushTag(MD_Node *node, MD_Node *tag); @@ -711,6 +708,7 @@ MD_FUNCTION MD_Node *MD_PushReference(MD_Node *list, MD_Node *target); MD_FUNCTION MD_Node * MD_NodeFromString(MD_Node *first, MD_Node *last, MD_String8 string); MD_FUNCTION MD_Node * MD_NodeFromIndex(MD_Node *first, MD_Node *last, int n); MD_FUNCTION int MD_IndexFromNode(MD_Node *node); +MD_FUNCTION MD_Node * MD_RootFromNode(MD_Node *node); MD_FUNCTION MD_Node * MD_NextNodeSibling(MD_Node *last, MD_String8 string); MD_FUNCTION MD_Node * MD_ChildFromString(MD_Node *node, MD_String8 child_string); MD_FUNCTION MD_Node * MD_TagFromString(MD_Node *node, MD_String8 tag_string); diff --git a/source/md_impl.c b/source/md_impl.c index c4eb774..809ca9d 100644 --- a/source/md_impl.c +++ b/source/md_impl.c @@ -27,26 +27,24 @@ static MD_Node _md_nil_node = 0xdeadffffffffffull, // string_hash MD_ZERO_STRUCT, // comment_before MD_ZERO_STRUCT, // comment_after - {(MD_u8*)"`NIL DD NODE`", 13}, // filename - 0, // file_contents 0, // at &_md_nil_node, // ref_target }; //~ Memory Operations -MD_PRIVATE_FUNCTION_IMPL void +MD_FUNCTION_IMPL void MD_MemoryZero(void *memory, MD_u64 size) { memset(memory, 0, size); } -MD_PRIVATE_FUNCTION_IMPL void +MD_FUNCTION_IMPL void MD_MemoryCopy(void *dest, void *src, MD_u64 size) { memcpy(dest, src, size); } -MD_PRIVATE_FUNCTION_IMPL void * +MD_FUNCTION_IMPL void * MD_AllocZero(MD_u64 size) { #if !defined(MD_IMPL_Alloc) @@ -1067,7 +1065,7 @@ MD_PRIVATE_FUNCTION_IMPL MD_Node * _MD_MakeNode_Ctx(MD_ParseCtx *ctx, MD_NodeKind kind, MD_String8 string, MD_String8 outer, MD_u8 *at) { - return MD_MakeNode(kind, string, outer, ctx->filename, ctx->file_contents.str, at); + return MD_MakeNode(kind, string, outer, at); } MD_PRIVATE_FUNCTION_IMPL void _MD_ParseTagList(MD_ParseCtx *ctx, MD_Node **first_out, MD_Node **last_out); @@ -1996,11 +1994,8 @@ MD_FUNCTION_IMPL MD_ParseResult MD_ParseWholeString(MD_String8 filename, MD_String8 contents) { MD_ParseResult result = MD_ZERO_STRUCT; - // TODO(allen): we want to make the string for this actually just - // be the filename in the root/file idea. - MD_String8 root_string = MD_PushStringF("`DD Parsed From \"%.*s\"`", MD_StringExpand(filename)); - MD_Node *root = MD_MakeNode(MD_NodeKind_File, root_string, root_string, - filename, contents.str, contents.str); + MD_String8 root_string = filename; + MD_Node *root = MD_MakeNode(MD_NodeKind_File, root_string, root_string, contents.str); if(contents.size > 0) { // NOTE(allen): setup parse context @@ -2062,23 +2057,26 @@ MD_ParseWholeFile(MD_String8 filename) //~ Location Conversions -MD_PRIVATE_FUNCTION_IMPL MD_CodeLoc -MD_CodeLocFromFileOffset(MD_String8 filename, MD_u8 *base, MD_u8 *at) +MD_FUNCTION_IMPL MD_CodeLoc +MD_CodeLocFromFileBaseOff(MD_String8 filename, MD_u8 *base, MD_u8 *at) { MD_CodeLoc loc; loc.filename = filename; loc.line = 1; loc.column = 1; - for(MD_u64 i = 0; base+i < at && base[i]; i += 1) + if(base != 0) { - if(base[i] == '\n') + for(MD_u64 i = 0; base+i < at && base[i]; i += 1) { - loc.line += 1; - loc.column = 1; - } - else - { - loc.column += 1; + if(base[i] == '\n') + { + loc.line += 1; + loc.column = 1; + } + else + { + loc.column += 1; + } } } return loc; @@ -2087,7 +2085,8 @@ MD_CodeLocFromFileOffset(MD_String8 filename, MD_u8 *base, MD_u8 *at) MD_FUNCTION_IMPL MD_CodeLoc MD_CodeLocFromNode(MD_Node *node) { - MD_CodeLoc loc = MD_CodeLocFromFileOffset(node->filename, node->file_contents, node->at); + MD_Node *root = MD_RootFromNode(node); + MD_CodeLoc loc = MD_CodeLocFromFileBaseOff(root->string, root->at, node->at); return loc; } @@ -2104,8 +2103,7 @@ MD_NilNode(void) { return &_md_nil_node; } MD_FUNCTION_IMPL MD_Node * MD_MakeNode(MD_NodeKind kind, MD_String8 string, - MD_String8 whole_string, MD_String8 filename, - MD_u8 *file_contents, MD_u8 *at) + MD_String8 whole_string, MD_u8 *at) { MD_Node *node = MD_PushArray(MD_Node, 1); node->kind = kind; @@ -2114,8 +2112,6 @@ MD_MakeNode(MD_NodeKind kind, MD_String8 string, node->next = node->prev = node->parent = node->first_child = node->last_child = node->first_tag = node->last_tag = node->ref_target = MD_NilNode(); - node->filename = filename; - node->file_contents = file_contents; node->at = at; return node; } @@ -2161,8 +2157,7 @@ MD_PushTag(MD_Node *node, MD_Node *tag) MD_FUNCTION_IMPL MD_Node* MD_PushReference(MD_Node *list, MD_Node *target) { - MD_Node *n = MD_MakeNode(MD_NodeKind_Reference, target->string, target->whole_string, - target->filename, target->file_contents, target->at); + MD_Node *n = MD_MakeNode(MD_NodeKind_Reference, target->string, target->whole_string, target->at); n->ref_target = target; MD_PushChild(list, n); return(n); @@ -2215,6 +2210,17 @@ MD_IndexFromNode(MD_Node *node) return idx; } +MD_FUNCTION MD_Node * +MD_RootFromNode(MD_Node *node) +{ + MD_Node *parent = node; + for(MD_Node *p = parent; !MD_NodeIsNil(p); p = p->parent) + { + parent = p; + } + return parent; +} + MD_FUNCTION_IMPL MD_Node * MD_NextNodeSibling(MD_Node *last, MD_String8 string) { diff --git a/tests/grammar.c b/tests/grammar.c index de5a5ba..b312d63 100644 --- a/tests/grammar.c +++ b/tests/grammar.c @@ -66,7 +66,7 @@ static MD_Node * NewChildLabel(MD_Node *parent, MD_String8 label) { MD_Node *result = 0; - result = MD_MakeNode(MD_NodeKind_Label, label, label, MD_S8Lit(""), 0, 0); + result = MD_MakeNode(MD_NodeKind_Label, label, label, 0); if(parent) { MD_PushChild(parent, result); diff --git a/tests/sanity_tests.c b/tests/sanity_tests.c index 1a3e0b2..64536a5 100644 --- a/tests/sanity_tests.c +++ b/tests/sanity_tests.c @@ -58,7 +58,7 @@ EndTest(void) static MD_Node * MakeTestNode(MD_NodeKind kind, MD_String8 string) { - return MD_MakeNode(kind, string, string, MD_S8Lit("`TEST_NODE`"), 0, 0); + return MD_MakeNode(kind, string, string, 0); } static MD_C_Expr * From d1fe089804274393532e2c8256f1b8e6c12298b3 Mon Sep 17 00:00:00 2001 From: ryanfleury Date: Thu, 24 Jun 2021 22:46:56 -0600 Subject: [PATCH 03/12] bugfix; build dummy root node for error marker nodes, to ensure that they have an accurate way to tell their location --- source/md_impl.c | 2 ++ tests/sanity_tests.c | 7 +++++++ 2 files changed, 9 insertions(+) diff --git a/source/md_impl.c b/source/md_impl.c index 809ca9d..a6de8c1 100644 --- a/source/md_impl.c +++ b/source/md_impl.c @@ -1234,9 +1234,11 @@ MD_PushNodeErrorF(MD_ParseCtx *ctx, MD_Node *node, MD_MessageKind kind, char *fm MD_FUNCTION void MD_PushTokenError(MD_ParseCtx *ctx, MD_Token token, MD_MessageKind kind, MD_String8 str){ + MD_Node *stub_file = MD_MakeNode(MD_NodeKind_ErrorMarker, ctx->file_contents, ctx->file_contents, ctx->file_contents.str); MD_Node *stub = _MD_MakeNode_Ctx(ctx, MD_NodeKind_ErrorMarker, token.string, token.outer_string, token.outer_string.str); MD_PushNodeError(ctx, stub, kind, str); + MD_PushChild(stub_file, stub); } MD_FUNCTION void diff --git a/tests/sanity_tests.c b/tests/sanity_tests.c index 64536a5..c800638 100644 --- a/tests/sanity_tests.c +++ b/tests/sanity_tests.c @@ -31,6 +31,13 @@ TestResult(MD_b32 result) test_ctx.number_of_tests += 1; test_ctx.number_passed += !!result; printf(result ? "." : "X"); + +#if 0 + if(result == 0) + { + __debugbreak(); + } +#endif } static void From 4b231f0724ca3b53b5d5428c17eac67e99426bae Mon Sep 17 00:00:00 2001 From: ryanfleury Date: Thu, 24 Jun 2021 22:48:24 -0600 Subject: [PATCH 04/12] kill ctx node building helper --- source/md_impl.c | 25 ++++--------------------- 1 file changed, 4 insertions(+), 21 deletions(-) diff --git a/source/md_impl.c b/source/md_impl.c index a6de8c1..cefed76 100644 --- a/source/md_impl.c +++ b/source/md_impl.c @@ -1056,18 +1056,6 @@ MD_MapOverwrite(MD_Map *map, MD_MapKey key, void *val){ //~ Parsing -// TODO(allen): This helper only helps because `ctx` bundles two elements together -// that the "low level" MD_MakeNode treats as seperate. However they aren't very -// useful as seperate concepts. If we get the "handle of file" concept down to -// a single root node pointer, then this can be eliminated and users can get -// this effect by composing 'ctx->file_root' with 'MD_MakeNode'. -MD_PRIVATE_FUNCTION_IMPL MD_Node * -_MD_MakeNode_Ctx(MD_ParseCtx *ctx, MD_NodeKind kind, - MD_String8 string, MD_String8 outer, MD_u8 *at) -{ - return MD_MakeNode(kind, string, outer, at); -} - MD_PRIVATE_FUNCTION_IMPL void _MD_ParseTagList(MD_ParseCtx *ctx, MD_Node **first_out, MD_Node **last_out); MD_PRIVATE_FUNCTION_IMPL MD_NodeFlags @@ -1122,8 +1110,7 @@ _MD_ParseTagList(MD_ParseCtx *ctx, MD_Node **first_out, MD_Node **last_out) MD_Token name = MD_ZERO_STRUCT; if(MD_Parse_RequireKind(ctx, MD_TokenKind_Identifier, &name)) { - MD_Node *tag = _MD_MakeNode_Ctx(ctx, MD_NodeKind_Tag, - name.string, name.outer_string, name.outer_string.str); + MD_Node *tag = MD_MakeNode(MD_NodeKind_Tag, name.string, name.outer_string, name.outer_string.str); MD_Token token = MD_Parse_PeekSkipSome(ctx, 0); if(MD_StringMatch(token.string, MD_S8Lit("("), 0)) { @@ -1235,8 +1222,7 @@ MD_PushNodeErrorF(MD_ParseCtx *ctx, MD_Node *node, MD_MessageKind kind, char *fm MD_FUNCTION void MD_PushTokenError(MD_ParseCtx *ctx, MD_Token token, MD_MessageKind kind, MD_String8 str){ MD_Node *stub_file = MD_MakeNode(MD_NodeKind_ErrorMarker, ctx->file_contents, ctx->file_contents, ctx->file_contents.str); - MD_Node *stub = _MD_MakeNode_Ctx(ctx, MD_NodeKind_ErrorMarker, - token.string, token.outer_string, token.outer_string.str); + MD_Node *stub = MD_MakeNode(MD_NodeKind_ErrorMarker, token.string, token.outer_string, token.outer_string.str); MD_PushNodeError(ctx, stub, kind, str); MD_PushChild(stub_file, stub); } @@ -1835,9 +1821,7 @@ MD_ParseOneNodeFromCtx(MD_ParseCtx *ctx) MD_StringMatch(next_token.string, MD_S8Lit("{"), 0) || MD_StringMatch(next_token.string, MD_S8Lit("["), 0))) { - result.node = _MD_MakeNode_Ctx(ctx, MD_NodeKind_Label, - MD_S8Lit(""), MD_S8Lit(""), - next_token.outer_string.str); + result.node = MD_MakeNode(MD_NodeKind_Label, MD_S8Lit(""), MD_S8Lit(""), next_token.outer_string.str); MD_Parse_Set(ctx, result.node, MD_ParseSetFlag_Paren | @@ -1853,8 +1837,7 @@ MD_ParseOneNodeFromCtx(MD_ParseCtx *ctx) MD_Parse_RequireKind(ctx, MD_TokenKind_CharLiteral, &token) || MD_Parse_RequireKind(ctx, MD_TokenKind_Symbol, &token)) { - result.node = _MD_MakeNode_Ctx(ctx, MD_NodeKind_Label, - token.string, token.outer_string, token.outer_string.str); + result.node = MD_MakeNode(MD_NodeKind_Label, token.string, token.outer_string, token.outer_string.str); result.node->flags |= _MD_NodeFlagsFromTokenKind(token.kind); if(token.kind == MD_TokenKind_CharLiteral || token.kind == MD_TokenKind_StringLiteral) From a533b75ce4d142c68d7a194e5e7deee2d04c645e Mon Sep 17 00:00:00 2001 From: ryanfleury Date: Thu, 24 Jun 2021 22:49:58 -0600 Subject: [PATCH 05/12] publicify canonical conversion helper --- source/md.h | 2 ++ source/md_impl.c | 10 +++++----- 2 files changed, 7 insertions(+), 5 deletions(-) diff --git a/source/md.h b/source/md.h index d73c0b2..2f00562 100644 --- a/source/md.h +++ b/source/md.h @@ -664,6 +664,8 @@ MD_FUNCTION MD_MapSlot* MD_MapOverwrite(MD_Map *map, MD_MapKey key, void *val); //~ Parsing +MD_FUNCTION_IMPL MD_NodeFlags MD_NodeFlagsFromTokenKind(MD_TokenKind kind); + MD_FUNCTION MD_b32 MD_TokenKindIsWhitespace(MD_TokenKind kind); MD_FUNCTION MD_b32 MD_TokenKindIsComment(MD_TokenKind kind); MD_FUNCTION MD_b32 MD_TokenKindIsRegular(MD_TokenKind kind); diff --git a/source/md_impl.c b/source/md_impl.c index cefed76..816eb12 100644 --- a/source/md_impl.c +++ b/source/md_impl.c @@ -1056,10 +1056,8 @@ MD_MapOverwrite(MD_Map *map, MD_MapKey key, void *val){ //~ Parsing -MD_PRIVATE_FUNCTION_IMPL void _MD_ParseTagList(MD_ParseCtx *ctx, MD_Node **first_out, MD_Node **last_out); - -MD_PRIVATE_FUNCTION_IMPL MD_NodeFlags -_MD_NodeFlagsFromTokenKind(MD_TokenKind kind) +MD_FUNCTION_IMPL MD_NodeFlags +MD_NodeFlagsFromTokenKind(MD_TokenKind kind) { MD_NodeFlags result = 0; switch (kind){ @@ -1071,6 +1069,8 @@ _MD_NodeFlagsFromTokenKind(MD_TokenKind kind) return(result); } +MD_PRIVATE_FUNCTION_IMPL void _MD_ParseTagList(MD_ParseCtx *ctx, MD_Node **first_out, MD_Node **last_out); + MD_PRIVATE_FUNCTION_IMPL MD_b32 _MD_StringLiteralIsBalanced(MD_Token token) { @@ -1838,7 +1838,7 @@ MD_ParseOneNodeFromCtx(MD_ParseCtx *ctx) MD_Parse_RequireKind(ctx, MD_TokenKind_Symbol, &token)) { result.node = MD_MakeNode(MD_NodeKind_Label, token.string, token.outer_string, token.outer_string.str); - result.node->flags |= _MD_NodeFlagsFromTokenKind(token.kind); + result.node->flags |= MD_NodeFlagsFromTokenKind(token.kind); if(token.kind == MD_TokenKind_CharLiteral || token.kind == MD_TokenKind_StringLiteral) { From 4b8d2c3adec6ddce5658fd8315a8347073185657 Mon Sep 17 00:00:00 2001 From: ryanfleury Date: Thu, 24 Jun 2021 22:52:47 -0600 Subject: [PATCH 06/12] oops; also add zero-allocator macro --- source/md.h | 7 ++++++- source/md_impl.c | 4 +--- 2 files changed, 7 insertions(+), 4 deletions(-) diff --git a/source/md.h b/source/md.h index 2f00562..eaca30c 100644 --- a/source/md.h +++ b/source/md.h @@ -579,6 +579,11 @@ MD_FUNCTION void MD_MemoryCopy(void *dst, void *src, MD_u64 size); MD_FUNCTION void* MD_AllocZero(MD_u64 size); #define MD_PushArray(T,c) (T*)MD_AllocZero(sizeof(T)*(c)) +// NOTE(rjf): Right now, both calls just automatically zero their memory, +// but I'm explicitly splitting this out to ensure that we don't accidentally +// assume that we have zeroed memory incorrectly in the future (when our +// allocation approach changes). +#define MD_PushArrayZero(T,c) (T*)MD_AllocZero(sizeof(T)*(c)) //~ Characters MD_FUNCTION MD_b32 MD_CharIsAlpha(MD_u8 c); @@ -664,7 +669,7 @@ MD_FUNCTION MD_MapSlot* MD_MapOverwrite(MD_Map *map, MD_MapKey key, void *val); //~ Parsing -MD_FUNCTION_IMPL MD_NodeFlags MD_NodeFlagsFromTokenKind(MD_TokenKind kind); +MD_FUNCTION MD_NodeFlags MD_NodeFlagsFromTokenKind(MD_TokenKind kind); MD_FUNCTION MD_b32 MD_TokenKindIsWhitespace(MD_TokenKind kind); MD_FUNCTION MD_b32 MD_TokenKindIsComment(MD_TokenKind kind); diff --git a/source/md_impl.c b/source/md_impl.c index 816eb12..c4b5f6a 100644 --- a/source/md_impl.c +++ b/source/md_impl.c @@ -942,9 +942,7 @@ MD_MapMakeBucketCount(MD_u64 bucket_count){ // make most sense with a parameter MD_Map result = {0}; result.bucket_count = bucket_count; - // TODO(allen): push array zero - result.buckets = MD_PushArray(MD_MapBucket, bucket_count); - memset(result.buckets, 0, sizeof(*result.buckets)*bucket_count); + result.buckets = MD_PushArrayZero(MD_MapBucket, bucket_count); return(result); } From 647dd40a0f8318427b1988f3a5ae457acdfe5b79 Mon Sep 17 00:00:00 2001 From: ryanfleury Date: Thu, 24 Jun 2021 22:57:06 -0600 Subject: [PATCH 07/12] kill char literal --- source/md.h | 5 ----- source/md_c_helpers.c | 1 - source/md_impl.c | 11 +---------- tests/grammar.c | 12 ++++++------ tests/sanity_tests.c | 6 +++--- 5 files changed, 10 insertions(+), 25 deletions(-) diff --git a/source/md.h b/source/md.h index eaca30c..588e404 100644 --- a/source/md.h +++ b/source/md.h @@ -331,7 +331,6 @@ enum MD_NodeFlag_Numeric = (1<<10), MD_NodeFlag_Identifier = (1<<11), MD_NodeFlag_StringLiteral = (1<<12), - MD_NodeFlag_CharLiteral = (1<<13), }; #define MD_NodeFlag_AfterFromBefore(f) ((f) << 2) @@ -432,10 +431,6 @@ typedef enum MD_TokenKind // many lines. MD_TokenKind_StringLiteral, - // A group of arbitrary characters, grouped together by a ' character at the beginning, - // and a ' character at the end. - MD_TokenKind_CharLiteral, - // A group of symbolic characters, where symbolic characters means any of the following: // ~!@#$%^&*()-+=[{]}:;<>,./?|\ // diff --git a/source/md_c_helpers.c b/source/md_c_helpers.c index f7fcb29..14e0ed2 100644 --- a/source/md_c_helpers.c +++ b/source/md_c_helpers.c @@ -235,7 +235,6 @@ _MD_NodeParse_ConsumeLiteral(_MD_NodeParseCtx *ctx, MD_Node **out) { MD_b32 result = 0; if(ctx->at->flags & MD_NodeFlag_StringLiteral || - ctx->at->flags & MD_NodeFlag_CharLiteral || ctx->at->flags & MD_NodeFlag_Numeric) { result = 1; diff --git a/source/md_impl.c b/source/md_impl.c index c4b5f6a..a159007 100644 --- a/source/md_impl.c +++ b/source/md_impl.c @@ -662,7 +662,6 @@ MD_StringListFromNodeFlags(MD_NodeFlags flags) "Numeric", "Identifier", "StringLiteral", - "CharLiteral", }; MD_String8List list = MD_ZERO_STRUCT; @@ -1062,7 +1061,6 @@ MD_NodeFlagsFromTokenKind(MD_TokenKind kind) case MD_TokenKind_Identifier: result = MD_NodeFlag_Identifier; break; case MD_TokenKind_NumericLiteral: result = MD_NodeFlag_Numeric; break; case MD_TokenKind_StringLiteral: result = MD_NodeFlag_StringLiteral; break; - case MD_TokenKind_CharLiteral: result = MD_NodeFlag_CharLiteral; break; } return(result); } @@ -1450,12 +1448,6 @@ MD_Parse_LexNext(MD_ParseCtx *ctx) // set token kind token.kind = MD_TokenKind_StringLiteral; - // TODO(allen): I don't see any place where this actually proves useful. - // I think it'd tidy things up to drop it. we already use this as a string - // in a lot of usages of metadesk. - if (d == '\'' && !is_triplet){ - token.kind = MD_TokenKind_CharLiteral; - } }break; // NOTE(allen): Identifiers, Numbers, Operators @@ -1832,13 +1824,12 @@ MD_ParseOneNodeFromCtx(MD_ParseCtx *ctx) else if(MD_Parse_RequireKind(ctx, MD_TokenKind_Identifier, &token) || MD_Parse_RequireKind(ctx, MD_TokenKind_NumericLiteral, &token) || MD_Parse_RequireKind(ctx, MD_TokenKind_StringLiteral, &token) || - MD_Parse_RequireKind(ctx, MD_TokenKind_CharLiteral, &token) || MD_Parse_RequireKind(ctx, MD_TokenKind_Symbol, &token)) { result.node = MD_MakeNode(MD_NodeKind_Label, token.string, token.outer_string, token.outer_string.str); result.node->flags |= MD_NodeFlagsFromTokenKind(token.kind); - if(token.kind == MD_TokenKind_CharLiteral || token.kind == MD_TokenKind_StringLiteral) + if(token.kind == MD_TokenKind_StringLiteral) { if(!_MD_StringLiteralIsBalanced(token)) { diff --git a/tests/grammar.c b/tests/grammar.c index b312d63..d9da4ea 100644 --- a/tests/grammar.c +++ b/tests/grammar.c @@ -86,7 +86,7 @@ static MD_Node * NewChild(MD_Node *parent) #define SET_DEPTH(depth_map, node, depth) MD_MapOverwrite(depth_map, MD_MapKeyPtr(node), (void *)(depth)) static void PrintRule(MD_Map *depth_map, MD_Node *rule) { - MD_b32 is_literal_char = rule->flags & MD_NodeFlag_CharLiteral; + MD_b32 is_literal_char = rule->flags & MD_NodeFlag_StringLiteral; MD_b32 optional = MD_NodeHasTag(rule, MD_S8Lit(OPTIONAL_TAG)); @@ -221,7 +221,7 @@ static void ExpandRule(MD_Node *rule, MD_String8List *out_strings, MD_Node *cur_ } else { - if(rule_element->flags & MD_NodeFlag_CharLiteral) // NOTE(mal): Terminal production + if(rule_element->flags & MD_NodeFlag_StringLiteral) // NOTE(mal): Terminal production { char c = 0; if(rule_element->string.size == 2 && rule_element->string.str[0] == '\\') @@ -308,7 +308,7 @@ static MD_Node * FindNonTerminalProduction(MD_Node *node, MD_Map *visited) { if(MD_NodeIsNil(node->first_child)) { - if(node->flags & MD_NodeFlag_CharLiteral) + if(node->flags & MD_NodeFlag_StringLiteral) { } else @@ -425,7 +425,7 @@ static void ComputeElementDepth(MD_Map *depth_map, MD_Node *re) } else { - if(re->flags & MD_NodeFlag_CharLiteral) // NOTE(mal): Terminal production + if(re->flags & MD_NodeFlag_StringLiteral) // NOTE(mal): Terminal production { result = 1; } @@ -561,7 +561,7 @@ int main(int argument_count, char **arguments) else { if(MD_StringMatch(rule_element->string, MD_S8Lit("|"), 0) && - !(rule_element->flags & MD_NodeFlag_CharLiteral)) + !(rule_element->flags & MD_NodeFlag_StringLiteral)) { rule = NewChild(production); } @@ -658,7 +658,7 @@ int main(int argument_count, char **arguments) { MD_u64 depth = 0; MD_Assert(MD_NodeIsNil(rule_element->first_child)); - if(!(rule_element->flags & MD_NodeFlag_CharLiteral)) + if(!(rule_element->flags & MD_NodeFlag_StringLiteral)) { MD_Node * production = MD_MapLookup(globals.production_table, MD_MapKeyStr(rule_element->string))->val; depth = GET_DEPTH(depth_map, production); diff --git a/tests/sanity_tests.c b/tests/sanity_tests.c index c800638..ee8c178 100644 --- a/tests/sanity_tests.c +++ b/tests/sanity_tests.c @@ -287,7 +287,7 @@ int main(void) TestResult(MD_ParseOneNode(MD_S8Lit(""), MD_S8Lit("\"foo\"")).node->flags & MD_NodeFlag_StringLiteral); TestResult(MD_ParseOneNode(MD_S8Lit(""), MD_S8Lit("'foo'")).node->flags & - MD_NodeFlag_CharLiteral); + MD_NodeFlag_StringLiteral); } Test("Expression Evaluation") @@ -395,11 +395,11 @@ int main(void) { TestResult(MD_StringMatch(MD_StringFromNodeKind(MD_NodeKind_Label), MD_S8Lit("Label"), 0)); TestResult(MD_StringMatch(MD_StringFromNodeKind(MD_NodeKind_Label), MD_S8Lit("Label"), 0)); - MD_String8List list = MD_StringListFromNodeFlags(MD_NodeFlag_CharLiteral | MD_NodeFlag_ParenLeft | MD_NodeFlag_BeforeSemicolon); + MD_String8List list = MD_StringListFromNodeFlags(MD_NodeFlag_StringLiteral | MD_NodeFlag_ParenLeft | MD_NodeFlag_BeforeSemicolon); MD_b32 match = 1; for(MD_String8Node *node = list.first; node; node = node->next) { - if(!MD_StringMatch(node->string, MD_S8Lit("CharLiteral"), 0) && + if(!MD_StringMatch(node->string, MD_S8Lit("StringLiteral"), 0) && !MD_StringMatch(node->string, MD_S8Lit("ParenLeft"), 0) && !MD_StringMatch(node->string, MD_S8Lit("BeforeSemicolon"), 0)) { From e1f5953598e9ea32ddfa3e1c78fb68e51fea251e Mon Sep 17 00:00:00 2001 From: ryanfleury Date: Thu, 24 Jun 2021 22:58:27 -0600 Subject: [PATCH 08/12] reorder node flags, so that after-from-before is always the same --- source/md.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/source/md.h b/source/md.h index 588e404..a010b9a 100644 --- a/source/md.h +++ b/source/md.h @@ -323,9 +323,9 @@ enum MD_NodeFlag_BraceRight = (1<<5), MD_NodeFlag_BeforeSemicolon = (1<<6), - MD_NodeFlag_BeforeComma = (1<<7), + MD_NodeFlag_AfterSemicolon = (1<<7), - MD_NodeFlag_AfterSemicolon = (1<<8), + MD_NodeFlag_BeforeComma = (1<<8), MD_NodeFlag_AfterComma = (1<<9), MD_NodeFlag_Numeric = (1<<10), @@ -333,7 +333,7 @@ enum MD_NodeFlag_StringLiteral = (1<<12), }; -#define MD_NodeFlag_AfterFromBefore(f) ((f) << 2) +#define MD_NodeFlag_AfterFromBefore(f) ((f) << 1) typedef struct MD_Node MD_Node; struct MD_Node From f7e40e224fa9fc9783be58e65d5aac91527f1fe7 Mon Sep 17 00:00:00 2001 From: ryanfleury Date: Thu, 24 Jun 2021 23:00:38 -0600 Subject: [PATCH 09/12] fix up before/after flag helper macro to be more robust to future changes --- source/md.h | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/source/md.h b/source/md.h index a010b9a..2a0ef9a 100644 --- a/source/md.h +++ b/source/md.h @@ -313,8 +313,14 @@ typedef enum MD_NodeKind MD_NodeKind; typedef MD_u32 MD_NodeFlags; +#define MD_NodeFlag_AfterFromBefore(f) ((f) << 1) enum { + // NOTE(rjf): Because of MD_NodeFlag_AfterFromBefore, it is *required* that + // every single pair of "Before*" or "After*" flags be in the correct order + // which is that the Before* flag comes first, and the After* flag comes + // immediately after (After* being the more significant bit). + MD_NodeFlag_ParenLeft = (1<<0), MD_NodeFlag_ParenRight = (1<<1), MD_NodeFlag_BracketLeft = (1<<2), @@ -333,8 +339,6 @@ enum MD_NodeFlag_StringLiteral = (1<<12), }; -#define MD_NodeFlag_AfterFromBefore(f) ((f) << 1) - typedef struct MD_Node MD_Node; struct MD_Node { From 4ec674600dc93c702dc9a07a5161a6b7bdcde9a0 Mon Sep 17 00:00:00 2001 From: ryanfleury Date: Thu, 24 Jun 2021 23:30:01 -0600 Subject: [PATCH 10/12] split up string literal token kinds to encode which syntax they used; translate these to node flags --- source/md.h | 68 ++++++++++++++--------------- source/md_impl.c | 111 ++++++++++++++++++++++++++++++++++------------- 2 files changed, 113 insertions(+), 66 deletions(-) diff --git a/source/md.h b/source/md.h index 2a0ef9a..2206ee6 100644 --- a/source/md.h +++ b/source/md.h @@ -298,6 +298,8 @@ MD_WordStyle; typedef enum MD_NodeKind { + // NOTE(rjf): Must be kept in sync with MD_StringFromNodeKind. + MD_NodeKind_Nil, MD_NodeKind_File, MD_NodeKind_List, @@ -312,31 +314,40 @@ typedef enum MD_NodeKind } MD_NodeKind; -typedef MD_u32 MD_NodeFlags; +typedef MD_u64 MD_NodeFlags; #define MD_NodeFlag_AfterFromBefore(f) ((f) << 1) enum { + // NOTE(rjf): Must be kept in sync with MD_StringListFromNodeFlags. + // NOTE(rjf): Because of MD_NodeFlag_AfterFromBefore, it is *required* that // every single pair of "Before*" or "After*" flags be in the correct order // which is that the Before* flag comes first, and the After* flag comes // immediately after (After* being the more significant bit). - MD_NodeFlag_ParenLeft = (1<<0), - MD_NodeFlag_ParenRight = (1<<1), - MD_NodeFlag_BracketLeft = (1<<2), - MD_NodeFlag_BracketRight = (1<<3), - MD_NodeFlag_BraceLeft = (1<<4), - MD_NodeFlag_BraceRight = (1<<5), + MD_NodeFlag_ParenLeft = (1<<0), + MD_NodeFlag_ParenRight = (1<<1), + MD_NodeFlag_BracketLeft = (1<<2), + MD_NodeFlag_BracketRight = (1<<3), + MD_NodeFlag_BraceLeft = (1<<4), + MD_NodeFlag_BraceRight = (1<<5), - MD_NodeFlag_BeforeSemicolon = (1<<6), - MD_NodeFlag_AfterSemicolon = (1<<7), + MD_NodeFlag_BeforeSemicolon = (1<<6), + MD_NodeFlag_AfterSemicolon = (1<<7), - MD_NodeFlag_BeforeComma = (1<<8), - MD_NodeFlag_AfterComma = (1<<9), + MD_NodeFlag_BeforeComma = (1<<8), + MD_NodeFlag_AfterComma = (1<<9), - MD_NodeFlag_Numeric = (1<<10), - MD_NodeFlag_Identifier = (1<<11), - MD_NodeFlag_StringLiteral = (1<<12), + MD_NodeFlag_StringSingleQuote = (1<<10), + MD_NodeFlag_StringDoubleQuote = (1<<13), + MD_NodeFlag_StringTick = (1<<15), + MD_NodeFlag_StringTripletSingleQuote= (1<<16), + MD_NodeFlag_StringTripletDoubleQuote= (1<<18), + MD_NodeFlag_StringTripletTick = (1<<20), + + MD_NodeFlag_Numeric = (1<<22), + MD_NodeFlag_Identifier = (1<<23), + MD_NodeFlag_StringLiteral = (1<<24), }; typedef struct MD_Node MD_Node; @@ -420,30 +431,15 @@ typedef enum MD_TokenKind MD_TokenKind_Nil, MD_TokenKind_RegularMin, - - // A group of characters that begins with an underscore or alphabetic character, - // and consists of numbers, alphabetic characters, or underscores after that. MD_TokenKind_Identifier, - - // A group of characters beginning with a numeric character or a '-', and then - // consisting of only numbers, alphabetic characters, or '.'s after that. MD_TokenKind_NumericLiteral, - - // A group of arbitrary characters, grouped together by a " character, OR by a - // """ symbol at the beginning and end of the group. String literals beginning with - // " are to only be specified on a single line, but """ strings can exist across - // many lines. - MD_TokenKind_StringLiteral, - - // A group of symbolic characters, where symbolic characters means any of the following: - // ~!@#$%^&*()-+=[{]}:;<>,./?|\ - // - // Groups of multiple characters are only allowed in specific circumstances. Most of these - // are only 1 character long, but some groups are allowed: - // - // "<<", ">>", "<=", ">=", "+=", "-=", "*=", "/=", "::", ":=", "==", "&=", "|=", "->" + MD_TokenKind_StringLiteralSingleQuote, + MD_TokenKind_StringLiteralSingleQuoteTriplet, + MD_TokenKind_StringLiteralDoubleQuote, + MD_TokenKind_StringLiteralDoubleQuoteTriplet, + MD_TokenKind_StringLiteralTick, + MD_TokenKind_StringLiteralTickTriplet, MD_TokenKind_Symbol, - MD_TokenKind_RegularMax, MD_TokenKind_Comment, @@ -453,8 +449,8 @@ typedef enum MD_TokenKind MD_TokenKind_Newline, MD_TokenKind_WhitespaceMax, - MD_TokenKind_BadCharacter, // Character outside currently supported encodings + MD_TokenKind_BadCharacter, MD_TokenKind_COUNT, } diff --git a/source/md_impl.c b/source/md_impl.c index a159007..d2f0a0f 100644 --- a/source/md_impl.c +++ b/source/md_impl.c @@ -4,13 +4,8 @@ #define MD_PRIVATE_FUNCTION_IMPL MD_FUNCTION_IMPL #define MD_UNTERMINATED_TOKEN_LEN_CAP 20 -//~ +//~ Nil Node Definition -// NOTE(allen): Review @rjf; Building in C++ -// While very latest version of C++ have designated initializers -// I would like to be able to build on more simple versions, so I -// ditched the designated initializers in favor of the extra work -// of maintaining order based initializers. static MD_Node _md_nil_node = { &_md_nil_node, // next @@ -32,6 +27,7 @@ static MD_Node _md_nil_node = }; //~ Memory Operations + MD_FUNCTION_IMPL void MD_MemoryZero(void *memory, MD_u64 size) { @@ -654,11 +650,18 @@ MD_StringListFromNodeFlags(MD_NodeFlags flags) "BraceRight", "BeforeSemicolon", - "BeforeComma", - "AfterSemicolon", + + "BeforeComma", "AfterComma", + "StringSingleQuote", + "StringDoubleQuote", + "StringTick", + "StringTripletSingleQuote", + "StringTripletDoubleQuote", + "StringTripletTick", + "Numeric", "Identifier", "StringLiteral", @@ -1058,9 +1061,18 @@ MD_NodeFlagsFromTokenKind(MD_TokenKind kind) { MD_NodeFlags result = 0; switch (kind){ - case MD_TokenKind_Identifier: result = MD_NodeFlag_Identifier; break; - case MD_TokenKind_NumericLiteral: result = MD_NodeFlag_Numeric; break; - case MD_TokenKind_StringLiteral: result = MD_NodeFlag_StringLiteral; break; + case MD_TokenKind_Identifier: result = MD_NodeFlag_Identifier; break; + case MD_TokenKind_NumericLiteral: result = MD_NodeFlag_Numeric; break; + case MD_TokenKind_StringLiteralSingleQuote: result |= MD_NodeFlag_StringSingleQuote; goto string_lit; + case MD_TokenKind_StringLiteralDoubleQuote: result |= MD_NodeFlag_StringDoubleQuote; goto string_lit; + case MD_TokenKind_StringLiteralTick: result |= MD_NodeFlag_StringTick; goto string_lit; + case MD_TokenKind_StringLiteralSingleQuoteTriplet: result |= MD_NodeFlag_StringTripletSingleQuote; goto string_lit; + case MD_TokenKind_StringLiteralDoubleQuoteTriplet: result |= MD_NodeFlag_StringTripletDoubleQuote; goto string_lit; + case MD_TokenKind_StringLiteralTickTriplet: result |= MD_NodeFlag_StringTripletTick; goto string_lit; + string_lit:; + { + result |= MD_NodeFlag_StringLiteral; + }break; } return(result); } @@ -1357,11 +1369,6 @@ MD_Parse_LexNext(MD_ParseCtx *ctx) // NOTE(allen): Strings case '"': case '\'': - - // NOTE(rjf): "Bundle-of-tokens" strings (`stuff` or ```stuff```) - // In practice no different than a regular string, but provides an - // alternate syntax which will allow tools like 4coder to treat the - // contents as regular tokens. case '`': { // TODO(allen): proposal: @@ -1447,7 +1454,27 @@ MD_Parse_LexNext(MD_ParseCtx *ctx) } // set token kind - token.kind = MD_TokenKind_StringLiteral; + if(is_triplet) + { + switch(d) + { + case '\'': token.kind = MD_TokenKind_StringLiteralSingleQuoteTriplet; break; + case '"': token.kind = MD_TokenKind_StringLiteralDoubleQuoteTriplet; break; + case '`': token.kind = MD_TokenKind_StringLiteralTickTriplet; break; + default: break; + } + } + else + { + switch(d) + { + case '\'': token.kind = MD_TokenKind_StringLiteralSingleQuote; break; + case '"': token.kind = MD_TokenKind_StringLiteralDoubleQuote; break; + case '`': token.kind = MD_TokenKind_StringLiteralTick; break; + default: break; + } + } + }break; // NOTE(allen): Identifiers, Numbers, Operators @@ -1821,33 +1848,57 @@ MD_ParseOneNodeFromCtx(MD_ParseCtx *ctx) } // NOTE(rjf): Labels - else if(MD_Parse_RequireKind(ctx, MD_TokenKind_Identifier, &token) || - MD_Parse_RequireKind(ctx, MD_TokenKind_NumericLiteral, &token) || - MD_Parse_RequireKind(ctx, MD_TokenKind_StringLiteral, &token) || - MD_Parse_RequireKind(ctx, MD_TokenKind_Symbol, &token)) + else if(next_token.kind == MD_TokenKind_Identifier || + next_token.kind == MD_TokenKind_NumericLiteral || + next_token.kind == MD_TokenKind_StringLiteralTick || + next_token.kind == MD_TokenKind_StringLiteralSingleQuote || + next_token.kind == MD_TokenKind_StringLiteralDoubleQuote || + next_token.kind == MD_TokenKind_StringLiteralTickTriplet || + next_token.kind == MD_TokenKind_StringLiteralSingleQuoteTriplet || + next_token.kind == MD_TokenKind_StringLiteralDoubleQuoteTriplet || + next_token.kind == MD_TokenKind_Symbol ) { - result.node = MD_MakeNode(MD_NodeKind_Label, token.string, token.outer_string, token.outer_string.str); - result.node->flags |= MD_NodeFlagsFromTokenKind(token.kind); + MD_Parse_Bump(ctx, next_token); + result.node = MD_MakeNode(MD_NodeKind_Label, next_token.string, next_token.outer_string, next_token.outer_string.str); + result.node->flags |= MD_NodeFlagsFromTokenKind(next_token.kind); - if(token.kind == MD_TokenKind_StringLiteral) + // TODO(rjf): Before we were just able to check one kind. I think preserving + // which kind of string literal was used is very important, for the same reason + // that preserving which symbols were used to delimit a set is important. + // But, having to manage this "group of kinds" is a little bit annoying. + // Maybe we should define the set of legal kinds for certain syntactic + // contexts somewhere unified, so that the parser is never duplicating this? + // + // It's also possible that it never matters and we only ever use this group + // in one place, but I just got that "we're duplicating stuff" allergy that + // I usually get. + // + // If that turned out to be a good idea, maybe we could do something like + // MD_TokenKindIsLegalLabelHead, MD_TokenKindNeedsBalancing?? I don't know. + if(next_token.kind == MD_TokenKind_StringLiteralTick || + next_token.kind == MD_TokenKind_StringLiteralSingleQuote || + next_token.kind == MD_TokenKind_StringLiteralDoubleQuote || + next_token.kind == MD_TokenKind_StringLiteralTickTriplet || + next_token.kind == MD_TokenKind_StringLiteralSingleQuoteTriplet || + next_token.kind == MD_TokenKind_StringLiteralDoubleQuoteTriplet) { - if(!_MD_StringLiteralIsBalanced(token)) + if(!_MD_StringLiteralIsBalanced(next_token)) { - MD_String8 capped = MD_StringPrefix(token.outer_string, MD_UNTERMINATED_TOKEN_LEN_CAP); + MD_String8 capped = MD_StringPrefix(next_token.outer_string, MD_UNTERMINATED_TOKEN_LEN_CAP); MD_PushNodeErrorF(ctx, result.node, MD_MessageKind_CatastrophicError, "Unterminated text literal \"%.*s\"", MD_StringExpand(capped)); } } - else if(token.kind == MD_TokenKind_Symbol && token.string.size == 1 && MD_CharIsReservedSymbol(token.string.str[0])) + else if(next_token.kind == MD_TokenKind_Symbol && next_token.string.size == 1 && MD_CharIsReservedSymbol(next_token.string.str[0])) { - MD_u8 c = token.string.str[0]; + MD_u8 c = next_token.string.str[0]; if(c == '}' || c == ']' || c == ')') { - MD_PushTokenErrorF(ctx, token, MD_MessageKind_CatastrophicError, "Unbalanced \"%c\"", c); + MD_PushTokenErrorF(ctx, next_token, MD_MessageKind_CatastrophicError, "Unbalanced \"%c\"", c); } else { - MD_PushTokenErrorF(ctx, token, MD_MessageKind_Error, "Unexpected reserved symbol \"%c\"", + MD_PushTokenErrorF(ctx, next_token, MD_MessageKind_Error, "Unexpected reserved symbol \"%c\"", c); } } From 792f0a6aa8c65ae62a443c5934c074cefa38cfc4 Mon Sep 17 00:00:00 2001 From: ryanfleury Date: Thu, 24 Jun 2021 23:41:12 -0600 Subject: [PATCH 11/12] cleanup notes on parser details --- source/md_impl.c | 21 +++++++++++++++++---- 1 file changed, 17 insertions(+), 4 deletions(-) diff --git a/source/md_impl.c b/source/md_impl.c index d2f0a0f..bac2ffc 100644 --- a/source/md_impl.c +++ b/source/md_impl.c @@ -932,6 +932,7 @@ MD_FUNCTION_IMPL MD_u64 MD_HashPointer(void *p) { MD_u64 h = (MD_u64)p; + // TODO(rjf): Do we want our own equivalent of UINT64_C? h = (h ^ (h >> 30)) * UINT64_C(0xbf58476d1ce4e5b9); h = (h ^ (h >> 27)) * UINT64_C(0x94d049bb133111eb); h = h ^ (h >> 31); @@ -1080,7 +1081,7 @@ MD_NodeFlagsFromTokenKind(MD_TokenKind kind) MD_PRIVATE_FUNCTION_IMPL void _MD_ParseTagList(MD_ParseCtx *ctx, MD_Node **first_out, MD_Node **last_out); MD_PRIVATE_FUNCTION_IMPL MD_b32 -_MD_StringLiteralIsBalanced(MD_Token token) +_MD_TokenBoundariesAreBalanced(MD_Token token) { MD_u64 front_len = token.string.str - token.outer_string.str; MD_u64 back_len = (token.outer_string.str + token.outer_string.size) - (token.string.str + token.string.size); @@ -1116,14 +1117,26 @@ _MD_ParseTagList(MD_ParseCtx *ctx, MD_Node **first_out, MD_Node **last_out) MD_Parse_Bump(ctx, next_token); MD_Token name = MD_ZERO_STRUCT; + + // TODO(rjf): Do we actually care to prohibit people from using + // something other than identifiers as their tag names? If so, + // why? If we can't come up with a good answer for it, then I + // think it makes sense to just allow anything that would've + // been a legal label string here too. + if(MD_Parse_RequireKind(ctx, MD_TokenKind_Identifier, &name)) { MD_Node *tag = MD_MakeNode(MD_NodeKind_Tag, name.string, name.outer_string, name.outer_string.str); + + // TODO(rjf): Don't we care if this is a MD_TokenKind_Symbol? + // for the sake of consistency with regular sets, I think it + // makes sense to disallow @foo"("), for example... MD_Token token = MD_Parse_PeekSkipSome(ctx, 0); if(MD_StringMatch(token.string, MD_S8Lit("("), 0)) { MD_Parse_Set(ctx, tag, MD_ParseSetFlag_Paren); } + MD_PushSibling(&first, &last, tag); } else @@ -1149,7 +1162,7 @@ _MD_ParseTagList(MD_ParseCtx *ctx, MD_Node **first_out, MD_Node **last_out) MD_FUNCTION_IMPL MD_b32 MD_TokenKindIsWhitespace(MD_TokenKind kind) { - return kind > MD_TokenKind_WhitespaceMin && kind < MD_TokenKind_WhitespaceMax; + return MD_TokenKind_WhitespaceMin < kind && kind < MD_TokenKind_WhitespaceMax; } MD_FUNCTION_IMPL MD_b32 @@ -1161,7 +1174,7 @@ MD_TokenKindIsComment(MD_TokenKind kind) MD_FUNCTION_IMPL MD_b32 MD_TokenKindIsRegular(MD_TokenKind kind) { - return(kind > MD_TokenKind_RegularMin && kind < MD_TokenKind_RegularMax); + return(MD_TokenKind_RegularMin < kind && kind < MD_TokenKind_RegularMax); } MD_FUNCTION void @@ -1882,7 +1895,7 @@ MD_ParseOneNodeFromCtx(MD_ParseCtx *ctx) next_token.kind == MD_TokenKind_StringLiteralSingleQuoteTriplet || next_token.kind == MD_TokenKind_StringLiteralDoubleQuoteTriplet) { - if(!_MD_StringLiteralIsBalanced(next_token)) + if(!_MD_TokenBoundariesAreBalanced(next_token)) { MD_String8 capped = MD_StringPrefix(next_token.outer_string, MD_UNTERMINATED_TOKEN_LEN_CAP); MD_PushNodeErrorF(ctx, result.node, MD_MessageKind_CatastrophicError, From f2991a89bbccf37009b30dba80e10be77a03719a Mon Sep 17 00:00:00 2001 From: ryanfleury Date: Thu, 24 Jun 2021 23:44:40 -0600 Subject: [PATCH 12/12] todo --- source/md.h | 7 ------- 1 file changed, 7 deletions(-) diff --git a/source/md.h b/source/md.h index 2206ee6..df20701 100644 --- a/source/md.h +++ b/source/md.h @@ -13,13 +13,6 @@ // [ ] Helpers for parsing NodeFlags, figuring out which nodes in a set are // separated by a semicolon, something like MD_SeekNodeWithFlags(node) -> node ? // [ ] Escaping characters from strings -// [x] Get rid of MD_JoinStringListWithSeparator, just have a separator argument on -// MD_JoinStringList. -// [x] MD_StringMap_Next, for iterating matching slots in an MD_Map, that all -// share the same key (important in the case of hash collisions) -// [x] Helper for making a reference for a node, e.g. MD_ReferenceFromNode -// [x] Organization decision for C generator helpers: splitting from md.h? file name? folder? -// [ ] Collapse down map types // [ ] Fill in more String -> Integer helpers // [ ] Memory Management Strategy // [ ] Gather map of current memory management situation