From 1372db561031390cbf3bdad40e1e1a62ee697279 Mon Sep 17 00:00:00 2001 From: Allen Webster Date: Sat, 2 Oct 2021 20:04:00 -0700 Subject: [PATCH] [expr parser] a few misc bits of tidying - poking around for next steps - notes on issues --- expression_parsing_notes.txt | 37 +++++++++ source/md.c | 145 +++++++++++++++++++---------------- source/md.h | 3 +- tests/expression_tests.c | 8 +- 4 files changed, 125 insertions(+), 68 deletions(-) create mode 100644 expression_parsing_notes.txt diff --git a/expression_parsing_notes.txt b/expression_parsing_notes.txt new file mode 100644 index 0000000..74938cc --- /dev/null +++ b/expression_parsing_notes.txt @@ -0,0 +1,37 @@ +It looks like there was some confusion around the [] and {} handling. + +1. I don't think these should be allowed as prefix operators at all. + Here for example I find it weird and ambiguous `[1, 100] * n` + Is this two prefix operators on `n` or is `*` a binary operator? + I want to read it as a binary operator, which makes `[1, 100]` a leaf + not a prefix. Yet the test sets up "[]" as a prefix and if I remove + the "[]" prefix operator the test fails + 'Example 12 : "[1, 100] * n". Unexpected Expr parsing error: "Unexpected set."' + This is a good test but I think it should work *all the time*. Any set + with "[]" "{}" "[)" or "(]" should just always be allowed. + + I would like to: + a. Always accept the kinds of sets I just listed as leafs when they appear + in a location expecting a leaf + b. Emit an error when the user tries to create a binary or prefix operator with + "[]" "()" or any other set type; allowing "[]" and "()" should be a special + case of postfix operators only + +2. When a set type is allowed as a postfix operator, let's do the same thing no + matter what kind of set it is. Right now the "[]" postfix has it's children + parsed but the "()" does not. I think we should just treat all sets-as-postfix + operators the way we're doing "()". Then it's just a matter of checking if a + node is a particular set type that matches an existing postfix set. + +3. We should emit errors whenever an operator string does not parse to a single + metadesk token - with the exception of the special cases for sets "()" and + "[]" Also they should be restricted to the "label" making tokens: + identifiers, numbers, string literals and symbols. We could make this easier + by allowing even fewer kinds. At the limit it could be just symbols, but + that cuts out the clever 'sizeof' from the test... Maybe symbols and + identifers only? If that's not too difficult to pull off let's go with that. + If an issue comes up with this one we can discuss more because it's not + exactly clear to me what's best. + + + diff --git a/source/md.c b/source/md.c index cb3979c..d922df5 100644 --- a/source/md.c +++ b/source/md.c @@ -3361,6 +3361,8 @@ MD_ExprBakeOperatorTableFromList(MD_Arena *arena, MD_ExprOprList *list) { MD_ExprOprTable result = MD_ZERO_STRUCT; + // TODO(allen): @upgrade_potential(minor) + for(MD_ExprOpr *op = list->first; op != 0; op = op->next) @@ -3368,8 +3370,6 @@ MD_ExprBakeOperatorTableFromList(MD_Arena *arena, MD_ExprOprList *list) MD_ExprOprKind op_kind = op->kind; MD_String8 op_s = op->string; - // TODO(allen): @upgrade_potential(minor) - // error checking MD_String8 error_str = MD_ZERO_STRUCT; if(op_kind != MD_ExprOprKind_Prefix && op_kind != MD_ExprOprKind_Postfix && @@ -3439,6 +3439,8 @@ MD_ExprBakeOperatorTableFromList(MD_Arena *arena, MD_ExprOprList *list) MD_FUNCTION MD_ExprOpr* MD_ExprOprFromKindString(MD_ExprOprTable *table, MD_ExprOprKind kind, MD_String8 s) { + // TODO(allen): @upgrade_potential + // NOTE(mal): Look for operator on one or all (kind == MD_ExprOprKind_Null) tables MD_ExprOpr *result = 0; for(MD_ExprOprKind cur_kind = (MD_ExprOprKind)(MD_ExprOprKind_Null + 1); @@ -3464,39 +3466,6 @@ MD_ExprOprFromKindString(MD_ExprOprTable *table, MD_ExprOprKind kind, MD_String8 return result; } -MD_FUNCTION MD_Expr* -MD_Expr_Alloc(MD_Arena *arena, MD_ExprOpr *op, MD_Node *op_node, - MD_Expr *left, MD_Expr *right) -{ - MD_Expr *result = MD_PushArrayZero(arena, MD_Expr, 1); - result->is_op = 1; - result->op_id = op->op_id; - result->op_ptr = op->op_ptr; - result->md_node = op_node; - result->left = left; - result->right = right; - result->left->parent = result; - if(result->right) - { // TODO(mal): Introduce Nil expr node? - result->right->parent = result; - } - return result; -} - -MD_FUNCTION MD_ExprParseCtx -MD_ExprParse_MakeContext(MD_ExprOprTable *op_table) -{ - MD_ExprParseCtx result = MD_ZERO_STRUCT; - result.op_table = op_table; - - result.accel.bracket_set_op = MD_ExprOprFromKindString(op_table, MD_ExprOprKind_Prefix, MD_S8Lit("[]")); - result.accel.brace_set_op = MD_ExprOprFromKindString(op_table, MD_ExprOprKind_Prefix, MD_S8Lit("{}")); - result.accel.call_op = MD_ExprOprFromKindString(op_table, MD_ExprOprKind_Postfix, MD_S8Lit("()")); - result.accel.subscript_op = MD_ExprOprFromKindString(op_table, MD_ExprOprKind_Binary, MD_S8Lit("[]")); - - return(result); -} - MD_FUNCTION MD_ExprParseResult MD_ExprParse(MD_Arena *arena, MD_ExprOprTable *op_table, MD_Node *first, MD_Node *opl) { @@ -3513,6 +3482,52 @@ MD_ExprParse(MD_Arena *arena, MD_ExprOprTable *op_table, MD_Node *first, MD_Node return(result); } +MD_FUNCTION MD_Expr* +MD_Expr_NewLeaf(MD_Arena *arena, MD_Node *node) +{ + MD_Expr *result = MD_PushArrayZero(arena, MD_Expr, 1); + result->md_node = node; + return(result); +} + +MD_FUNCTION MD_Expr* +MD_Expr_NewOp(MD_Arena *arena, MD_ExprOpr *op, MD_Node *op_node, MD_Expr *l, MD_Expr *r) +{ + MD_Expr *result = MD_PushArrayZero(arena, MD_Expr, 1); + result->is_op = 1; + result->op_id = op->op_id; + result->op_ptr = op->op_ptr; + result->md_node = op_node; + result->parent = 0; + result->left = l; + result->right = r; + if (l != 0) + { + MD_Assert(l->parent == 0); + l->parent = result; + } + if(r != 0) + { + MD_Assert(r->parent == 0); + r->parent = result; + } + return(result); +} + +MD_FUNCTION MD_ExprParseCtx +MD_ExprParse_MakeContext(MD_ExprOprTable *op_table) +{ + MD_ExprParseCtx result = MD_ZERO_STRUCT; + result.op_table = op_table; + + result.accel.bracket_set_op = MD_ExprOprFromKindString(op_table, MD_ExprOprKind_Prefix, MD_S8Lit("[]")); + result.accel.brace_set_op = MD_ExprOprFromKindString(op_table, MD_ExprOprKind_Prefix, MD_S8Lit("{}")); + result.accel.call_op = MD_ExprOprFromKindString(op_table, MD_ExprOprKind_Postfix, MD_S8Lit("()")); + result.accel.subscript_op = MD_ExprOprFromKindString(op_table, MD_ExprOprKind_Binary, MD_S8Lit("[]")); + + return(result); +} + MD_FUNCTION MD_Expr* MD_ExprParse_TopLevel(MD_Arena *arena, MD_ExprParseCtx *ctx, MD_Node *first, MD_Node *opl) { @@ -3589,6 +3604,8 @@ MD_ExprParse_Atom(MD_Arena *arena, MD_ExprParseCtx *ctx, MD_Node **iter, *iter = MD_NodeNextWithLimit(*iter, opl); result = MD_ExprParse_TopLevel(arena, ctx, node->first_child, MD_NilNode()); } + // TODO(allen): this part seems a bit odd. I think any (delimited) set + // should get this treatment, without making these operators to enable them. else if(((node->flags & MD_NodeFlag_HasBracketLeft) && (node->flags & MD_NodeFlag_HasBracketRight) && ctx->accel.bracket_set_op) || ((node->flags & MD_NodeFlag_HasBraceLeft) && @@ -3597,8 +3614,7 @@ MD_ExprParse_Atom(MD_Arena *arena, MD_ExprParseCtx *ctx, MD_Node **iter, { *iter = MD_NodeNextWithLimit(*iter, opl); // NOTE(mal): Unparsed leaf sets ({ ... }, [ ... ]) - result = MD_PushArrayZero(arena, MD_Expr, 1); - result->md_node = node; + result = MD_Expr_NewLeaf(arena, node); } else if(MD_ExprParse_OprConsume(ctx, iter, opl, MD_ExprOprKind_Prefix, 1, &op)) { @@ -3607,7 +3623,7 @@ MD_ExprParse_Atom(MD_Arena *arena, MD_ExprParseCtx *ctx, MD_Node **iter, MD_ExprParse_MinPrecedence(arena, ctx, iter, first, opl, min_precedence); if(ctx->errors.max_message_kind == MD_MessageKind_Null) { - result = MD_Expr_Alloc(arena, op, node, sub_expr, 0); + result = MD_Expr_NewOp(arena, op, node, sub_expr, 0); } } else if(MD_ExprParse_OprConsume(ctx, iter, opl, MD_ExprOprKind_Null, 1, &op)) @@ -3627,8 +3643,7 @@ MD_ExprParse_Atom(MD_Arena *arena, MD_ExprParseCtx *ctx, MD_Node **iter, } else{ // NOTE(mal): leaf *iter = MD_NodeNextWithLimit(*iter, opl); - result = MD_PushArrayZero(arena, MD_Expr, 1); - result->md_node = node; + result = MD_Expr_NewLeaf(arena, node); } return(result); @@ -3652,48 +3667,50 @@ MD_ExprParse_MinPrecedence(MD_Arena *arena, MD_ExprParseCtx *ctx, MD_Node *node = *iter; MD_ExprOpr *op = 0; - if(subscript_op != 0 && subscript_op->precedence >= min_precedence && - (node->flags & MD_NodeFlag_HasBracketLeft) && - (node->flags & MD_NodeFlag_HasBracketRight)) - { - *iter = MD_NodeNextWithLimit(*iter, opl); - - // NOTE(mal): Array subscript - MD_Expr* sub_expr = MD_ExprParse_TopLevel(arena, ctx, node->first_child, MD_NilNode()); - if(ctx->errors.max_message_kind == MD_MessageKind_Null) - { - result = MD_Expr_Alloc(arena, subscript_op, node, result, sub_expr); - } - else{ - break; - } - } - else if(MD_ExprParse_OprConsume(ctx, iter, opl, MD_ExprOprKind_Binary, - min_precedence, &op) || - MD_ExprParse_OprConsume(ctx, iter, opl, MD_ExprOprKind_BinaryRightAssociative, - min_precedence, &op)) + if(MD_ExprParse_OprConsume(ctx, iter, opl, MD_ExprOprKind_Binary, + min_precedence, &op) || + MD_ExprParse_OprConsume(ctx, iter, opl, MD_ExprOprKind_BinaryRightAssociative, + min_precedence, &op)) { MD_u32 next_min_precedence = op->precedence + (op->kind == MD_ExprOprKind_Binary); MD_Expr *sub_expr = MD_ExprParse_MinPrecedence(arena, ctx, iter, first, opl, next_min_precedence); if(ctx->errors.max_message_kind == MD_MessageKind_Null) { - result = MD_Expr_Alloc(arena, op, node, result, sub_expr); + result = MD_Expr_NewOp(arena, op, node, result, sub_expr); } else{ break; } } + + else if(subscript_op != 0 && subscript_op->precedence >= min_precedence && + (node->flags & MD_NodeFlag_HasBracketLeft) && + (node->flags & MD_NodeFlag_HasBracketRight)) + { + *iter = MD_NodeNextWithLimit(*iter, opl); + // NOTE(mal): Array subscript + MD_Expr* sub_expr = MD_ExprParse_TopLevel(arena, ctx, node->first_child, MD_NilNode()); + if(ctx->errors.max_message_kind == MD_MessageKind_Null) + { + result = MD_Expr_NewOp(arena, subscript_op, node, result, sub_expr); + } + else{ + break; + } + } + else if(ctx->accel.call_op && ctx->accel.call_op->precedence >= min_precedence && - node->flags & MD_NodeFlag_HasParenLeft && node->flags & MD_NodeFlag_HasParenRight) + (node->flags & MD_NodeFlag_HasParenLeft) && + (node->flags & MD_NodeFlag_HasParenRight)) { // NOTE: call *iter = MD_NodeNextWithLimit(*iter, opl); - result = MD_Expr_Alloc(arena, ctx->accel.call_op, node, result, 0); + result = MD_Expr_NewOp(arena, ctx->accel.call_op, node, result, 0); } else if(MD_ExprParse_OprConsume(ctx, iter, opl, MD_ExprOprKind_Postfix, min_precedence, &op)) { - result = MD_Expr_Alloc(arena, op, node, result, 0); + result = MD_Expr_NewOp(arena, op, node, result, 0); } else { diff --git a/source/md.h b/source/md.h index a417d27..e9e3fd9 100644 --- a/source/md.h +++ b/source/md.h @@ -1122,7 +1122,8 @@ MD_FUNCTION MD_ExprOpr* MD_ExprOprFromKindString(MD_ExprOprTable *table, MD_FUNCTION MD_ExprParseResult MD_ExprParse(MD_Arena *arena, MD_ExprOprTable *op_table, MD_Node *first, MD_Node *one_past_last); -MD_FUNCTION MD_Expr* MD_Expr_Alloc(MD_Arena *arena, MD_ExprOpr *op, MD_Node *op_node, +MD_FUNCTION MD_Expr* MD_Expr_NewLeaf(MD_Arena *arena, MD_Node *node); +MD_FUNCTION MD_Expr* MD_Expr_NewOp(MD_Arena *arena, MD_ExprOpr *op, MD_Node *op_node, MD_Expr *left, MD_Expr *right); MD_FUNCTION MD_ExprParseCtx MD_ExprParse_MakeContext(MD_ExprOprTable *table); diff --git a/tests/expression_tests.c b/tests/expression_tests.c index 63ca87a..6fe1d75 100644 --- a/tests/expression_tests.c +++ b/tests/expression_tests.c @@ -72,9 +72,11 @@ X(AssignLeftShift, "<<=", BinaryRightAssociative, 4) \ X(AssignRightShift, ">>=", BinaryRightAssociative, 4) \ X(AssignBitwiseAnd, "&=", BinaryRightAssociative, 4) \ X(AssignBitwiseXor, "^=", BinaryRightAssociative, 4) \ -X(AssignBitwiseOr, "|=", BinaryRightAssociative, 4) \ -X(BracketSet, "[]", Prefix, 3) \ -X(BraceSet, "{}", Prefix, 3) +X(AssignBitwiseOr, "|=", BinaryRightAssociative, 4) + +// TODO(allen): I don't think we want to do this +// X(BracketSet, "[]", Prefix, 3) +// X(BraceSet, "{}", Prefix, 3) // X(Cast "()", Prefix, 17) // X(Comma, ",", Binary, 3)