[expr parser] a few misc bits of tidying - poking around for next steps - notes on issues

This commit is contained in:
Allen Webster
2021-10-02 20:04:00 -07:00
parent f635b2d066
commit 1372db5610
4 changed files with 125 additions and 68 deletions
+37
View File
@@ -0,0 +1,37 @@
It looks like there was some confusion around the [] and {} handling.
1. I don't think these should be allowed as prefix operators at all.
Here for example I find it weird and ambiguous `[1, 100] * n`
Is this two prefix operators on `n` or is `*` a binary operator?
I want to read it as a binary operator, which makes `[1, 100]` a leaf
not a prefix. Yet the test sets up "[]" as a prefix and if I remove
the "[]" prefix operator the test fails
'Example 12 : "[1, 100] * n". Unexpected Expr parsing error: "Unexpected set."'
This is a good test but I think it should work *all the time*. Any set
with "[]" "{}" "[)" or "(]" should just always be allowed.
I would like to:
a. Always accept the kinds of sets I just listed as leafs when they appear
in a location expecting a leaf
b. Emit an error when the user tries to create a binary or prefix operator with
"[]" "()" or any other set type; allowing "[]" and "()" should be a special
case of postfix operators only
2. When a set type is allowed as a postfix operator, let's do the same thing no
matter what kind of set it is. Right now the "[]" postfix has it's children
parsed but the "()" does not. I think we should just treat all sets-as-postfix
operators the way we're doing "()". Then it's just a matter of checking if a
node is a particular set type that matches an existing postfix set.
3. We should emit errors whenever an operator string does not parse to a single
metadesk token - with the exception of the special cases for sets "()" and
"[]" Also they should be restricted to the "label" making tokens:
identifiers, numbers, string literals and symbols. We could make this easier
by allowing even fewer kinds. At the limit it could be just symbols, but
that cuts out the clever 'sizeof' from the test... Maybe symbols and
identifers only? If that's not too difficult to pull off let's go with that.
If an issue comes up with this one we can discuss more because it's not
exactly clear to me what's best.
+81 -64
View File
@@ -3361,6 +3361,8 @@ MD_ExprBakeOperatorTableFromList(MD_Arena *arena, MD_ExprOprList *list)
{
MD_ExprOprTable result = MD_ZERO_STRUCT;
// TODO(allen): @upgrade_potential(minor)
for(MD_ExprOpr *op = list->first;
op != 0;
op = op->next)
@@ -3368,8 +3370,6 @@ MD_ExprBakeOperatorTableFromList(MD_Arena *arena, MD_ExprOprList *list)
MD_ExprOprKind op_kind = op->kind;
MD_String8 op_s = op->string;
// TODO(allen): @upgrade_potential(minor)
// error checking
MD_String8 error_str = MD_ZERO_STRUCT;
if(op_kind != MD_ExprOprKind_Prefix && op_kind != MD_ExprOprKind_Postfix &&
@@ -3439,6 +3439,8 @@ MD_ExprBakeOperatorTableFromList(MD_Arena *arena, MD_ExprOprList *list)
MD_FUNCTION MD_ExprOpr*
MD_ExprOprFromKindString(MD_ExprOprTable *table, MD_ExprOprKind kind, MD_String8 s)
{
// TODO(allen): @upgrade_potential
// NOTE(mal): Look for operator on one or all (kind == MD_ExprOprKind_Null) tables
MD_ExprOpr *result = 0;
for(MD_ExprOprKind cur_kind = (MD_ExprOprKind)(MD_ExprOprKind_Null + 1);
@@ -3464,39 +3466,6 @@ MD_ExprOprFromKindString(MD_ExprOprTable *table, MD_ExprOprKind kind, MD_String8
return result;
}
MD_FUNCTION MD_Expr*
MD_Expr_Alloc(MD_Arena *arena, MD_ExprOpr *op, MD_Node *op_node,
MD_Expr *left, MD_Expr *right)
{
MD_Expr *result = MD_PushArrayZero(arena, MD_Expr, 1);
result->is_op = 1;
result->op_id = op->op_id;
result->op_ptr = op->op_ptr;
result->md_node = op_node;
result->left = left;
result->right = right;
result->left->parent = result;
if(result->right)
{ // TODO(mal): Introduce Nil expr node?
result->right->parent = result;
}
return result;
}
MD_FUNCTION MD_ExprParseCtx
MD_ExprParse_MakeContext(MD_ExprOprTable *op_table)
{
MD_ExprParseCtx result = MD_ZERO_STRUCT;
result.op_table = op_table;
result.accel.bracket_set_op = MD_ExprOprFromKindString(op_table, MD_ExprOprKind_Prefix, MD_S8Lit("[]"));
result.accel.brace_set_op = MD_ExprOprFromKindString(op_table, MD_ExprOprKind_Prefix, MD_S8Lit("{}"));
result.accel.call_op = MD_ExprOprFromKindString(op_table, MD_ExprOprKind_Postfix, MD_S8Lit("()"));
result.accel.subscript_op = MD_ExprOprFromKindString(op_table, MD_ExprOprKind_Binary, MD_S8Lit("[]"));
return(result);
}
MD_FUNCTION MD_ExprParseResult
MD_ExprParse(MD_Arena *arena, MD_ExprOprTable *op_table, MD_Node *first, MD_Node *opl)
{
@@ -3513,6 +3482,52 @@ MD_ExprParse(MD_Arena *arena, MD_ExprOprTable *op_table, MD_Node *first, MD_Node
return(result);
}
MD_FUNCTION MD_Expr*
MD_Expr_NewLeaf(MD_Arena *arena, MD_Node *node)
{
MD_Expr *result = MD_PushArrayZero(arena, MD_Expr, 1);
result->md_node = node;
return(result);
}
MD_FUNCTION MD_Expr*
MD_Expr_NewOp(MD_Arena *arena, MD_ExprOpr *op, MD_Node *op_node, MD_Expr *l, MD_Expr *r)
{
MD_Expr *result = MD_PushArrayZero(arena, MD_Expr, 1);
result->is_op = 1;
result->op_id = op->op_id;
result->op_ptr = op->op_ptr;
result->md_node = op_node;
result->parent = 0;
result->left = l;
result->right = r;
if (l != 0)
{
MD_Assert(l->parent == 0);
l->parent = result;
}
if(r != 0)
{
MD_Assert(r->parent == 0);
r->parent = result;
}
return(result);
}
MD_FUNCTION MD_ExprParseCtx
MD_ExprParse_MakeContext(MD_ExprOprTable *op_table)
{
MD_ExprParseCtx result = MD_ZERO_STRUCT;
result.op_table = op_table;
result.accel.bracket_set_op = MD_ExprOprFromKindString(op_table, MD_ExprOprKind_Prefix, MD_S8Lit("[]"));
result.accel.brace_set_op = MD_ExprOprFromKindString(op_table, MD_ExprOprKind_Prefix, MD_S8Lit("{}"));
result.accel.call_op = MD_ExprOprFromKindString(op_table, MD_ExprOprKind_Postfix, MD_S8Lit("()"));
result.accel.subscript_op = MD_ExprOprFromKindString(op_table, MD_ExprOprKind_Binary, MD_S8Lit("[]"));
return(result);
}
MD_FUNCTION MD_Expr*
MD_ExprParse_TopLevel(MD_Arena *arena, MD_ExprParseCtx *ctx, MD_Node *first, MD_Node *opl)
{
@@ -3589,6 +3604,8 @@ MD_ExprParse_Atom(MD_Arena *arena, MD_ExprParseCtx *ctx, MD_Node **iter,
*iter = MD_NodeNextWithLimit(*iter, opl);
result = MD_ExprParse_TopLevel(arena, ctx, node->first_child, MD_NilNode());
}
// TODO(allen): this part seems a bit odd. I think any (delimited) set
// should get this treatment, without making these operators to enable them.
else if(((node->flags & MD_NodeFlag_HasBracketLeft) && (node->flags & MD_NodeFlag_HasBracketRight) &&
ctx->accel.bracket_set_op) ||
((node->flags & MD_NodeFlag_HasBraceLeft) &&
@@ -3597,8 +3614,7 @@ MD_ExprParse_Atom(MD_Arena *arena, MD_ExprParseCtx *ctx, MD_Node **iter,
{
*iter = MD_NodeNextWithLimit(*iter, opl);
// NOTE(mal): Unparsed leaf sets ({ ... }, [ ... ])
result = MD_PushArrayZero(arena, MD_Expr, 1);
result->md_node = node;
result = MD_Expr_NewLeaf(arena, node);
}
else if(MD_ExprParse_OprConsume(ctx, iter, opl, MD_ExprOprKind_Prefix, 1, &op))
{
@@ -3607,7 +3623,7 @@ MD_ExprParse_Atom(MD_Arena *arena, MD_ExprParseCtx *ctx, MD_Node **iter,
MD_ExprParse_MinPrecedence(arena, ctx, iter, first, opl, min_precedence);
if(ctx->errors.max_message_kind == MD_MessageKind_Null)
{
result = MD_Expr_Alloc(arena, op, node, sub_expr, 0);
result = MD_Expr_NewOp(arena, op, node, sub_expr, 0);
}
}
else if(MD_ExprParse_OprConsume(ctx, iter, opl, MD_ExprOprKind_Null, 1, &op))
@@ -3627,8 +3643,7 @@ MD_ExprParse_Atom(MD_Arena *arena, MD_ExprParseCtx *ctx, MD_Node **iter,
}
else{ // NOTE(mal): leaf
*iter = MD_NodeNextWithLimit(*iter, opl);
result = MD_PushArrayZero(arena, MD_Expr, 1);
result->md_node = node;
result = MD_Expr_NewLeaf(arena, node);
}
return(result);
@@ -3652,48 +3667,50 @@ MD_ExprParse_MinPrecedence(MD_Arena *arena, MD_ExprParseCtx *ctx,
MD_Node *node = *iter;
MD_ExprOpr *op = 0;
if(subscript_op != 0 && subscript_op->precedence >= min_precedence &&
(node->flags & MD_NodeFlag_HasBracketLeft) &&
(node->flags & MD_NodeFlag_HasBracketRight))
{
*iter = MD_NodeNextWithLimit(*iter, opl);
// NOTE(mal): Array subscript
MD_Expr* sub_expr = MD_ExprParse_TopLevel(arena, ctx, node->first_child, MD_NilNode());
if(ctx->errors.max_message_kind == MD_MessageKind_Null)
{
result = MD_Expr_Alloc(arena, subscript_op, node, result, sub_expr);
}
else{
break;
}
}
else if(MD_ExprParse_OprConsume(ctx, iter, opl, MD_ExprOprKind_Binary,
min_precedence, &op) ||
MD_ExprParse_OprConsume(ctx, iter, opl, MD_ExprOprKind_BinaryRightAssociative,
min_precedence, &op))
if(MD_ExprParse_OprConsume(ctx, iter, opl, MD_ExprOprKind_Binary,
min_precedence, &op) ||
MD_ExprParse_OprConsume(ctx, iter, opl, MD_ExprOprKind_BinaryRightAssociative,
min_precedence, &op))
{
MD_u32 next_min_precedence = op->precedence + (op->kind == MD_ExprOprKind_Binary);
MD_Expr *sub_expr =
MD_ExprParse_MinPrecedence(arena, ctx, iter, first, opl, next_min_precedence);
if(ctx->errors.max_message_kind == MD_MessageKind_Null)
{
result = MD_Expr_Alloc(arena, op, node, result, sub_expr);
result = MD_Expr_NewOp(arena, op, node, result, sub_expr);
}
else{
break;
}
}
else if(subscript_op != 0 && subscript_op->precedence >= min_precedence &&
(node->flags & MD_NodeFlag_HasBracketLeft) &&
(node->flags & MD_NodeFlag_HasBracketRight))
{
*iter = MD_NodeNextWithLimit(*iter, opl);
// NOTE(mal): Array subscript
MD_Expr* sub_expr = MD_ExprParse_TopLevel(arena, ctx, node->first_child, MD_NilNode());
if(ctx->errors.max_message_kind == MD_MessageKind_Null)
{
result = MD_Expr_NewOp(arena, subscript_op, node, result, sub_expr);
}
else{
break;
}
}
else if(ctx->accel.call_op && ctx->accel.call_op->precedence >= min_precedence &&
node->flags & MD_NodeFlag_HasParenLeft && node->flags & MD_NodeFlag_HasParenRight)
(node->flags & MD_NodeFlag_HasParenLeft) &&
(node->flags & MD_NodeFlag_HasParenRight))
{ // NOTE: call
*iter = MD_NodeNextWithLimit(*iter, opl);
result = MD_Expr_Alloc(arena, ctx->accel.call_op, node, result, 0);
result = MD_Expr_NewOp(arena, ctx->accel.call_op, node, result, 0);
}
else if(MD_ExprParse_OprConsume(ctx, iter, opl, MD_ExprOprKind_Postfix,
min_precedence, &op))
{
result = MD_Expr_Alloc(arena, op, node, result, 0);
result = MD_Expr_NewOp(arena, op, node, result, 0);
}
else
{
+2 -1
View File
@@ -1122,7 +1122,8 @@ MD_FUNCTION MD_ExprOpr* MD_ExprOprFromKindString(MD_ExprOprTable *table,
MD_FUNCTION MD_ExprParseResult MD_ExprParse(MD_Arena *arena, MD_ExprOprTable *op_table,
MD_Node *first, MD_Node *one_past_last);
MD_FUNCTION MD_Expr* MD_Expr_Alloc(MD_Arena *arena, MD_ExprOpr *op, MD_Node *op_node,
MD_FUNCTION MD_Expr* MD_Expr_NewLeaf(MD_Arena *arena, MD_Node *node);
MD_FUNCTION MD_Expr* MD_Expr_NewOp(MD_Arena *arena, MD_ExprOpr *op, MD_Node *op_node,
MD_Expr *left, MD_Expr *right);
MD_FUNCTION MD_ExprParseCtx MD_ExprParse_MakeContext(MD_ExprOprTable *table);
+5 -3
View File
@@ -72,9 +72,11 @@ X(AssignLeftShift, "<<=", BinaryRightAssociative, 4) \
X(AssignRightShift, ">>=", BinaryRightAssociative, 4) \
X(AssignBitwiseAnd, "&=", BinaryRightAssociative, 4) \
X(AssignBitwiseXor, "^=", BinaryRightAssociative, 4) \
X(AssignBitwiseOr, "|=", BinaryRightAssociative, 4) \
X(BracketSet, "[]", Prefix, 3) \
X(BraceSet, "{}", Prefix, 3)
X(AssignBitwiseOr, "|=", BinaryRightAssociative, 4)
// TODO(allen): I don't think we want to do this
// X(BracketSet, "[]", Prefix, 3)
// X(BraceSet, "{}", Prefix, 3)
// X(Cast "()", Prefix, 17)
// X(Comma, ",", Binary, 3)