From 493f4239d52f4dca544bf502e9a0824f004615d9 Mon Sep 17 00:00:00 2001 From: Miguel Lechon Date: Fri, 5 Mar 2021 09:51:26 +0100 Subject: [PATCH] [grammar test] Cleaner grammar. --- tests/grammar.md | 243 ++++++++++++++++++++--------------------------- 1 file changed, 101 insertions(+), 142 deletions(-) diff --git a/tests/grammar.md b/tests/grammar.md index eb74beb..03dcb4a 100644 --- a/tests/grammar.md +++ b/tests/grammar.md @@ -9,148 +9,20 @@ * and miscellaneous semantics (@fill, @markup) */ -//// Arbitrarily deep tree, possibly empty -// file : [@child set] -// set : '{' [@child set] '}' | set [separator @sibling set] -// separator : ' ' | '\n' | ',' | ';' - -//// Labeled leaves -// file : [@child set] -// set : @fill 'A' | '{' [@child set] '}' | set [separator @sibling set] -// separator : ' ' | '\n' | ',' | ';' - -//// Labeled internal nodes -// file : [@child set] -// set : @fill 'A' | [@fill 'A' ':'] scoped_set | set [separator @sibling set] -// separator : ' ' | '\n' | ',' | ';' -// scoped_set : '{' [@child set] '}' - -//// Unscoped tests (feels like there should be an easier way to express this) -// file : general_set -// general_set : { [@child set] | [@child uns_set] } -// set : @fill 'A' | set [sep1 @sibling set] | scoped_set | uns_set sep2 @sibling set -// uns_set : @fill 'A' [uns_set_tail] -// uns_set_tail : ':' @child uns_set | ' ' @sibling uns_set | ':' scoped_set | ' ' @sibling scoped_set -// scoped_set : '{' general_set '}' -// sep1 : ' ' | '\n' | ',' | ';' -// sep2 : '\n'| ',' | ';' - -//// Tags -// file : general_set -// general_set : { [@child set] | [@child uns_set] } -// set : {[tag_list] untagged_set} -// uns_set : {[tag_list] untagged_uns_set} -// tag_list : '@' @tag tag ' ' [tag_list] -// tag : @fill 'T'['(' general_set ')'] -// untagged_set : @fill 'A' | set [sep1 @sibling set] | scoped_set | uns_set sep2 @sibling set -// untagged_uns_set: @fill 'A' [uns_set_tail] -// uns_set_tail : ':' @child uns_set | ' ' @sibling uns_set | ':' scoped_set | ' ' @sibling scoped_set -// scoped_set : '{' general_set '}' -// sep1 : ' ' | '\n' | ',' | ';' -// sep2 : '\n'| ',' | ';' - -//// Alternative scope markers -// file : general_set -// general_set : { [@child set] | [@child uns_set] } -// set : {[tag_list] untagged_set} -// uns_set : {[tag_list] untagged_uns_set} -// tag_list : '@' @tag tag ' ' [tag_list] -// tag : @fill 'T'['(' general_set ')'] -// untagged_set : @fill 'A' | set [sep1 @sibling set] | scoped_set | uns_set sep2 @sibling set -// untagged_uns_set: @fill 'A' [uns_set_tail] -// uns_set_tail : ':' @child uns_set | ' ' @sibling uns_set | ':' scoped_set | ' ' @sibling scoped_set -// scoped_set : '{' general_set '}' | alt_scope_beg general_set alt_scope_end -// alt_scope_beg : '(' | '[' -// alt_scope_end : ')' | ']' -// sep1 : ' ' | '\n' | ',' | ';' -// sep2 : '\n'| ',' | ';' - -//// Identifiers -// file : general_set -// general_set : { [@child set] | [@child uns_set] } -// set : {[tag_list] untagged_set} -// uns_set : {[tag_list] untagged_uns_set} -// tag_list : '@' @tag tag ' ' [tag_list] -// tag : @fill id['(' general_set ')'] -// untagged_set : @fill 'A' | set [sep1 @sibling set] | scoped_set | uns_set sep2 @sibling set -// untagged_uns_set: @fill 'A' [uns_set_tail] -// uns_set_tail : ':' @child uns_set | ' ' @sibling uns_set | ':' scoped_set | ' ' @sibling scoped_set -// scoped_set : '{' general_set '}' | alt_scope_beg general_set alt_scope_end -// alt_scope_beg : '(' | '[' -// alt_scope_end : ')' | ']' -// sep1 : ' ' | '\n' | ',' | ';' -// sep2 : '\n'| ',' | ';' -// id : alpha [alphanumeric] | '_' [alphanumeric] -// alphanumeric : alpha [alphanumeric] | digit [alphanumeric] | '_' [alphanumeric] -// alpha : lowercase | uppercase -// lowercase : 'a'|'b'|'c'|'d'|'e'|'f'|'g'|'h'|'i'|'j'|'k'|'l'|'m'|'n'|'o'|'p'|'q'|'r'|'s'|'t'|'u'|'v'|'w'|'z'|'y'|'z' -// uppercase : 'A'|'B'|'C'|'D'|'E'|'F'|'G'|'H'|'I'|'J'|'K'|'L'|'M'|'N'|'O'|'P'|'Q'|'R'|'S'|'T'|'U'|'V'|'W'|'Z'|'Y'|'Z' -// digit : '0'|'1'|'2'|'3'|'4'|'5'|'6'|'7'|'8'|'9' - -//// General labels -//file : general_set -//general_set : { [@child set] | [@child uns_set] } -//set : {[tag_list] untagged_set} -//uns_set : {[tag_list] untagged_uns_set} -//tag_list : '@' @tag tag ' ' [tag_list] -//tag : @fill id['(' general_set ')'] -//untagged_set : @fill label | untagged_set [sep1 @sibling set] | scoped_set | untagged_uns_set sep2 @sibling set -//untagged_uns_set: @fill label [uns_set_tail] -//uns_set_tail : ':' @child uns_set | ' ' @sibling uns_set | ':' scoped_set | ' ' @sibling scoped_set -//scoped_set : '{' general_set '}' | alt_scope_beg general_set alt_scope_end -//alt_scope_beg : '(' | '[' -//alt_scope_end : ')' | ']' -//sep1 : ' ' | '\n' | ',' | ';' -//sep2 : '\n'| ',' | ';' -//id : alpha [alphanumeric] | '_' [alphanumeric] -//alphanumeric : alpha [alphanumeric] | digit [alphanumeric] | '_' [alphanumeric] -//alpha : lowercase | uppercase -//lowercase : 'a'|'b'|'c'|'d'|'e'|'f'|'g'|'h'|'i'|'j'|'k'|'l'|'m'|'n'|'o'|'p'|'q'|'r'|'s'|'t'|'u'|'v'|'w'|'z'|'y'|'z' -//uppercase : 'A'|'B'|'C'|'D'|'E'|'F'|'G'|'H'|'I'|'J'|'K'|'L'|'M'|'N'|'O'|'P'|'Q'|'R'|'S'|'T'|'U'|'V'|'W'|'Z'|'Y'|'Z' -//digit : '0'|'1'|'2'|'3'|'4'|'5'|'6'|'7'|'8'|'9' -//label : id | integer_literal | char_literal | string_literal | symbol_label -//integer_literal : { ['-'] natural_literal } -//natural_literal : digit [natural_literal] -//char_literal : @markup '\'' [char_literal_items] @markup '\'' -//char_literal_items : char_literal_item [char_literal_items] -//char_literal_item : ascii_no_backslash_no_quotes | '"' | '\\' ascii -//ascii : ascii_no_backslash_no_quotes | '\'' | '"' | '`' | '\\' -//ascii_no_backslash_no_quotes : digit | alpha | symbol_no_backslash_no_quotes | ' ' -//symbol_no_backslash_no_quotes : symbol_no_backslash_no_quotes_1 | symbol_no_backslash_no_quotes_2 -//symbol_no_backslash_no_quotes_1 : '!'|'#'|'$'|'%'|'&'|'('|')'|'*'|'+'|','|'-'|'.'|'/'|':' -//symbol_no_backslash_no_quotes_2 : ';'|'<'|'='|'>'|'?'|'@'|'['|']'|'^'|'_'|'{'|'|'|'}'|'~' -//string_literal : @markup '"' [string_literal_items] @markup '"' | @markup '`' [string_literal_items] @markup '`' -//string_literal_items : string_literal_item [string_literal_items] -//string_literal_item : ascii_no_backslash_no_quotes | '\'' | '\\' ascii -//symbol_label : '~'|'!'|'%'|'^'|'&'|'*'|'+'|'-'|'/'|'|'|'<'|'>'|'$'|'='|'.'|'?'|'$' - -//// Comments before -file : general_set -general_set : { [@child set] | [@child uns_set] } -set : {[tag_list] ['\n' @pre_comment pre_comment] untagged_set} -pre_comment : '/' '/' [' '] [@fill c_code_content] '\n' | '/' '*' [@fill c_code_content] '*' '/' -c_code_content : 'c''o''m''m''e''n''t' // TODO: Arbitrary strings, including C-style comments, as long as /* */ pairs are balanced -cpp_code_content: 'c''o''m''m''e''n''t' // TODO: Arbitrary strings that don't start with space -uns_set : {[tag_list] untagged_uns_set} -tag_list : '@' @tag tag ' ' [tag_list] -tag : @fill id['(' general_set ')'] -untagged_set : @fill label | untagged_set [sep1 @sibling set] | scoped_set | untagged_uns_set sep2 @sibling set -untagged_uns_set: @fill label [uns_set_tail] -uns_set_tail : ':' @child uns_set | ' ' @sibling uns_set | ':' scoped_set | ' ' @sibling scoped_set -scoped_set : '{' general_set '}' | alt_scope_beg general_set alt_scope_end -alt_scope_beg : '(' | '[' -alt_scope_end : ')' | ']' -sep1 : ' ' | '\n' | ',' | ';' -sep2 : '\n'| ',' | ';' -id : alpha [alphanumeric] | '_' [alphanumeric] -alphanumeric : alpha [alphanumeric] | digit [alphanumeric] | '_' [alphanumeric] -alpha : lowercase | uppercase -lowercase : 'a'|'b'|'c'|'d'|'e'|'f'|'g'|'h'|'i'|'j'|'k'|'l'|'m'|'n'|'o'|'p'|'q'|'r'|'s'|'t'|'u'|'v'|'w'|'z'|'y'|'z' -uppercase : 'A'|'B'|'C'|'D'|'E'|'F'|'G'|'H'|'I'|'J'|'K'|'L'|'M'|'N'|'O'|'P'|'Q'|'R'|'S'|'T'|'U'|'V'|'W'|'Z'|'Y'|'Z' -digit : '0'|'1'|'2'|'3'|'4'|'5'|'6'|'7'|'8'|'9' -label : id | integer_literal | char_literal | string_literal | symbol_label -integer_literal : { ['-'] natural_literal } -natural_literal : digit [natural_literal] +//// A bunch of common definitions +separator : ' ' | unscoped_separator +unscoped_separator : '\n'| ',' | ';' +alt_scope_beg : '(' | '[' +alt_scope_end : ')' | ']' +id : alpha [alphanumeric] | '_' [alphanumeric] +alphanumeric : alpha [alphanumeric] | digit [alphanumeric] | '_' [alphanumeric] +alpha : lowercase | uppercase +lowercase : 'a'|'b'|'c'|'d'|'e'|'f'|'g'|'h'|'i'|'j'|'k'|'l'|'m'|'n'|'o'|'p'|'q'|'r'|'s'|'t'|'u'|'v'|'w'|'z'|'y'|'z' +uppercase : 'A'|'B'|'C'|'D'|'E'|'F'|'G'|'H'|'I'|'J'|'K'|'L'|'M'|'N'|'O'|'P'|'Q'|'R'|'S'|'T'|'U'|'V'|'W'|'Z'|'Y'|'Z' +digit : '0'|'1'|'2'|'3'|'4'|'5'|'6'|'7'|'8'|'9' +label : id | integer_literal | char_literal | string_literal | symbol_label +integer_literal : { ['-'] natural_literal } +natural_literal : digit [natural_literal] char_literal : @markup '\'' [char_literal_items] @markup '\'' char_literal_items : char_literal_item [char_literal_items] char_literal_item : ascii_no_backslash_no_quotes | '"' | '\\' ascii @@ -163,3 +35,90 @@ string_literal : @markup '"' [string_literal_items] @markup '"' string_literal_items : string_literal_item [string_literal_items] string_literal_item : ascii_no_backslash_no_quotes | '\'' | '\\' ascii symbol_label : '~'|'!'|'%'|'^'|'&'|'*'|'+'|'-'|'/'|'|'|'<'|'>'|'$'|'='|'.'|'?'|'$' + +//// Arbitrarily deep tree, possibly empty +// file : [@child set_list] +// set_list : '{' [@child set_list] '}' [separator @sibling set_list] + +//// Labeled leaves +// file : [@child set_list] +// set_list : set [separator @sibling set_list] +// set : @fill 'A' | '{' [@child set_list] '}' + +//// Labeled internal nodes +// file : [@child set_list] +// set_list : set [separator @sibling set_list] +// set : @fill 'A' | [@fill 'A' ':'] '{' [@child set_list] '}' + +//// Unscoped sets +// file : [@child set_list] +// set_list : scoped_set [separator @sibling set_list] | unscoped_set [unscoped_separator @sibling set_list] +// scoped_set : '{' [@child set_list] '}' +// unscoped_set : @fill 'A' [unscoped_set_tail] +// unscoped_set_tail : ':' @child unscoped_set | ' ' @sibling unscoped_set | ':' scoped_set |' ' @sibling scoped_set + +//// Tags +// file : [@child set_list] +// set_list : scoped_set [separator @sibling set_list] | unscoped_set [unscoped_separator @sibling set_list] +// scoped_set : {[tag_list] '{' [@child set_list] '}'} +// unscoped_set : {[tag_list] @fill 'A' [unscoped_set_tail]} +// tag_list : '@' @tag tag ' ' [tag_list] +// tag : @fill 'T'['(' [@child set_list] ')'] +// // unscoped_set_tail : ':' @child unscoped_set | ' ' @sibling unscoped_set | ':' @child scoped_set | ' ' @sibling scoped_set +// unscoped_set_tail : {':' @child unscoped_set | ' ' @sibling unscoped_set | set_tail_hack | ' ' @sibling scoped_set} +// // NOTE(mal): Ideally the set_tail_hack should be captured by the simpler +// // ':' @child scoped_set +// // but there's a quirk in the grammar. +// // In "A:{}", A has no children but in "A:@T{}", it has one tagged children, +// // which means that *the presence of tags* introduces the child +// set_tail_hack : ':' [@child tag_list] '{' '}' | ':' @child [tag_list] '{' @child set_list '}' + +//// Alternative scope markers +// file : [@child set_list] +// set_list : scoped_set [separator @sibling set_list] | unscoped_set [unscoped_separator @sibling set_list] +// scoped_set : {[tag_list] untagged_scoped_set} +// untagged_scoped_set : '{' [@child set_list] '}' | alt_scope_beg [@child set_list] alt_scope_end +// unscoped_set : {[tag_list] @fill 'A' [unscoped_set_tail]} +// tag_list : '@' @tag tag ' ' [tag_list] +// tag : @fill 'T'['(' [@child set_list] ')'] +// unscoped_set_tail : {':' @child unscoped_set | ' ' @sibling unscoped_set | set_tail_hack | ' ' @sibling scoped_set} +// set_tail_hack : {':' [@child tag_list] '{' '}' | +// ':' @child [tag_list] '{' @child set_list '}' | +// ':' [@child tag_list] alt_scope_beg alt_scope_end | +// ':' @child [tag_list] alt_scope_beg @child set_list alt_scope_end +// } + +//// General tags and labels +file : [@child set_list] +set_list : scoped_set [separator @sibling set_list] | unscoped_set [unscoped_separator @sibling set_list] +scoped_set : {[tag_list] untagged_scoped_set} +untagged_scoped_set : '{' [@child set_list] '}' | alt_scope_beg [@child set_list] alt_scope_end +unscoped_set : {[tag_list] @fill label [unscoped_set_tail]} +tag_list : '@' @tag tag ' ' [tag_list] +tag : @fill id['(' [@child set_list] ')'] +unscoped_set_tail : {':' @child unscoped_set | ' ' @sibling unscoped_set | set_tail_hack | ' ' @sibling scoped_set} +set_tail_hack : {':' [@child tag_list] '{' '}' | + ':' @child [tag_list] '{' @child set_list '}' | + ':' [@child tag_list] alt_scope_beg alt_scope_end | + ':' @child [tag_list] alt_scope_beg @child set_list alt_scope_end + } + +//// Comments before +//// TODO: This needs some work to make sure that "a /* comment_before_b */ b" can't be generated +//// but that "a /* comment_after_a */ b" can +// file : [@child set_list] +// set_list : scoped_set [separator @sibling set_list] | unscoped_set [unscoped_separator @sibling set_list] +// scoped_set : {[tag_list] [@pre_comment pre_comment] untagged_scoped_set} +// pre_comment : '/' '/' [' '] [@fill c_code_content] '\n' | '/' '*' [@fill cpp_code_content] '*' '/' +// c_code_content : 'c''o''m''m''e''n''t' // TODO: Arbitrary strings, including C-style comments, as long as /* */ pairs are balanced +// cpp_code_content : 'c''o''m''m''e''n''t' // TODO: Arbitrary strings that don't start with space +// untagged_scoped_set : '{' [@child set_list] '}' | alt_scope_beg [@child set_list] alt_scope_end +// unscoped_set : {[tag_list] @fill label [unscoped_set_tail]} +// tag_list : '@' @tag tag ' ' [tag_list] +// tag : @fill id['(' [@child set_list] ')'] +// unscoped_set_tail : {':' @child unscoped_set | ' ' @sibling unscoped_set | set_tail_hack | ' ' @sibling scoped_set} +// set_tail_hack : {':' [@child tag_list] '{' '}' | +// ':' @child [tag_list] '{' @child set_list '}' | +// ':' [@child tag_list] alt_scope_beg alt_scope_end | +// ':' @child [tag_list] alt_scope_beg @child set_list alt_scope_end +// }